From 3df15f34511a3dc975697b957eda6e7bd7b7d179 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 27 Aug 2021 13:11:12 +0200 Subject: [PATCH 001/440] cfg80211: honour V=1 in certificate code generation When we generate the code for built-in certificates, honour the V=1 build option to print out the script doing it. Signed-off-by: Johannes Berg Link: https://lore.kernel.org/r/20210827131112.dc5492458d55.Idefe4ce8f9681a5ad576d3c6e57c7bff142244de@changeid Signed-off-by: Johannes Berg --- net/wireless/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/wireless/Makefile b/net/wireless/Makefile index af590ae606b6..756e7de7e33f 100644 --- a/net/wireless/Makefile +++ b/net/wireless/Makefile @@ -26,7 +26,7 @@ endif $(obj)/shipped-certs.c: $(wildcard $(srctree)/$(src)/certs/*.hex) @$(kecho) " GEN $@" - @(echo '#include "reg.h"'; \ + $(Q)(echo '#include "reg.h"'; \ echo 'const u8 shipped_regdb_certs[] = {'; \ echo | cat - $^ ; \ echo '};'; \ @@ -36,7 +36,7 @@ $(obj)/shipped-certs.c: $(wildcard $(srctree)/$(src)/certs/*.hex) $(obj)/extra-certs.c: $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%) \ $(wildcard $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%)/*.x509) @$(kecho) " GEN $@" - @(set -e; \ + $(Q)(set -e; \ allf=""; \ for f in $^ ; do \ test -f $$f || continue;\ From 68ba1131d4b566dfad0b319a861552778a6cdfb8 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Fri, 27 Aug 2021 10:07:19 +0200 Subject: [PATCH 002/440] mac80211: check hostapd configuration parsing twt requests Check twt_responder in ieee80211_process_rx_twt_action routine in order to take into account the case where twt has been disabled in hostapd configuration. Signed-off-by: Lorenzo Bianconi Link: https://lore.kernel.org/r/516057fe4ca73ad257e8c2762e25f4b7872957fc.1630051438.git.lorenzo@kernel.org Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 99ed68f7dc36..8acc743559c3 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -3216,10 +3216,7 @@ static bool ieee80211_process_rx_twt_action(struct ieee80211_rx_data *rx) { struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)rx->skb->data; - struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb); struct ieee80211_sub_if_data *sdata = rx->sdata; - const struct ieee80211_sta_he_cap *hecap; - struct ieee80211_supported_band *sband; /* TWT actions are only supported in AP for the moment */ if (sdata->vif.type != NL80211_IFTYPE_AP) @@ -3228,14 +3225,7 @@ ieee80211_process_rx_twt_action(struct ieee80211_rx_data *rx) if (!rx->local->ops->add_twt_setup) return false; - sband = rx->local->hw.wiphy->bands[status->band]; - hecap = ieee80211_get_he_iftype_cap(sband, - ieee80211_vif_type_p2p(&sdata->vif)); - if (!hecap) - return false; - - if (!(hecap->he_cap_elem.mac_cap_info[0] & - IEEE80211_HE_MAC_CAP0_TWT_RES)) + if (!sdata->vif.bss_conf.twt_responder) return false; if (!rx->sta) From 40f231e75a1d986a8f8705765e2c5c770370318c Mon Sep 17 00:00:00 2001 From: Len Baker Date: Sun, 19 Sep 2021 13:40:40 +0200 Subject: [PATCH 003/440] nl80211: prefer struct_size over open coded arithmetic As noted in the "Deprecated Interfaces, Language Features, Attributes, and Conventions" documentation [1], size calculations (especially multiplication) should not be performed in memory allocator (or similar) function arguments due to the risk of them overflowing. This could lead to values wrapping around and a smaller allocation being made than the caller was expecting. Using those allocations could lead to linear overflows of heap memory and other misbehaviors. So, use the struct_size() helper to do the arithmetic instead of the argument "size + count * size" in the kzalloc() functions. Also, take the opportunity to refactor the memcpy() call to use the flex_array_size() helper. [1] https://www.kernel.org/doc/html/latest/process/deprecated.html#open-coded-arithmetic-in-allocator-arguments Signed-off-by: Len Baker Link: https://lore.kernel.org/r/20210919114040.41522-1-len.baker@gmx.com [remove unnecessary variable] Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index bf7cd4752547..e4787c74e80b 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -11767,8 +11767,9 @@ static int nl80211_set_cqm_rssi(struct genl_info *info, if (n_thresholds) { struct cfg80211_cqm_config *cqm_config; - cqm_config = kzalloc(sizeof(struct cfg80211_cqm_config) + - n_thresholds * sizeof(s32), GFP_KERNEL); + cqm_config = kzalloc(struct_size(cqm_config, rssi_thresholds, + n_thresholds), + GFP_KERNEL); if (!cqm_config) { err = -ENOMEM; goto unlock; @@ -11777,7 +11778,8 @@ static int nl80211_set_cqm_rssi(struct genl_info *info, cqm_config->rssi_hyst = hysteresis; cqm_config->n_rssi_thresholds = n_thresholds; memcpy(cqm_config->rssi_thresholds, thresholds, - n_thresholds * sizeof(s32)); + flex_array_size(cqm_config, rssi_thresholds, + n_thresholds)); wdev->cqm_config = cqm_config; } @@ -15081,9 +15083,7 @@ static int nl80211_set_sar_specs(struct sk_buff *skb, struct genl_info *info) if (specs > rdev->wiphy.sar_capa->num_freq_ranges) return -EINVAL; - sar_spec = kzalloc(sizeof(*sar_spec) + - specs * sizeof(struct cfg80211_sar_sub_specs), - GFP_KERNEL); + sar_spec = kzalloc(struct_size(sar_spec, sub_specs, specs), GFP_KERNEL); if (!sar_spec) return -ENOMEM; From 01f84f0ed3b431b9aa40ad0af7d6849056fd3064 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 20 Sep 2021 15:40:06 +0200 Subject: [PATCH 004/440] mac80211: reduce stack usage in debugfs We put a few large buffers on the stack here, but it's easy to just allocate them on the heap, so do that. Link: https://lore.kernel.org/r/20210920154009.1387f44e7382.Ife043c169e6a44edace516fea9f8311a5ca4282a@changeid Signed-off-by: Johannes Berg --- net/mac80211/debugfs_sta.c | 38 +++++++++++++++++++++++++++++++------- 1 file changed, 31 insertions(+), 7 deletions(-) diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index 8be28cfd6f64..da22725eca0f 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -5,7 +5,7 @@ * Copyright 2007 Johannes Berg * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright(c) 2016 Intel Deutschland GmbH - * Copyright (C) 2018 - 2020 Intel Corporation + * Copyright (C) 2018 - 2021 Intel Corporation */ #include @@ -314,11 +314,17 @@ STA_OPS_RW(aql); static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf, size_t count, loff_t *ppos) { - char buf[71 + IEEE80211_NUM_TIDS * 40], *p = buf; + char *buf, *p; int i; struct sta_info *sta = file->private_data; struct tid_ampdu_rx *tid_rx; struct tid_ampdu_tx *tid_tx; + ssize_t ret; + + buf = kzalloc(71 + IEEE80211_NUM_TIDS * 40, GFP_KERNEL); + if (!buf) + return -ENOMEM; + p = buf; rcu_read_lock(); @@ -352,7 +358,9 @@ static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf, } rcu_read_unlock(); - return simple_read_from_buffer(userbuf, count, ppos, buf, p - buf); + ret = simple_read_from_buffer(userbuf, count, ppos, buf, p - buf); + kfree(buf); + return ret; } static ssize_t sta_agg_status_write(struct file *file, const char __user *userbuf, @@ -434,10 +442,16 @@ static ssize_t sta_ht_capa_read(struct file *file, char __user *userbuf, if (_cond) \ p += scnprintf(p, sizeof(buf)+buf-p, "\t" _str "\n"); \ } while (0) - char buf[512], *p = buf; + char *buf, *p; int i; struct sta_info *sta = file->private_data; struct ieee80211_sta_ht_cap *htc = &sta->sta.ht_cap; + ssize_t ret; + + buf = kzalloc(512, GFP_KERNEL); + if (!buf) + return -ENOMEM; + p = buf; p += scnprintf(p, sizeof(buf) + buf - p, "ht %ssupported\n", htc->ht_supported ? "" : "not "); @@ -504,16 +518,24 @@ static ssize_t sta_ht_capa_read(struct file *file, char __user *userbuf, htc->mcs.tx_params); } - return simple_read_from_buffer(userbuf, count, ppos, buf, p - buf); + ret = simple_read_from_buffer(userbuf, count, ppos, buf, p - buf); + kfree(buf); + return ret; } STA_OPS(ht_capa); static ssize_t sta_vht_capa_read(struct file *file, char __user *userbuf, size_t count, loff_t *ppos) { - char buf[512], *p = buf; + char *buf, *p; struct sta_info *sta = file->private_data; struct ieee80211_sta_vht_cap *vhtc = &sta->sta.vht_cap; + ssize_t ret; + + buf = kzalloc(512, GFP_KERNEL); + if (!buf) + return -ENOMEM; + p = buf; p += scnprintf(p, sizeof(buf) + buf - p, "VHT %ssupported\n", vhtc->vht_supported ? "" : "not "); @@ -609,7 +631,9 @@ static ssize_t sta_vht_capa_read(struct file *file, char __user *userbuf, #undef PFLAG } - return simple_read_from_buffer(userbuf, count, ppos, buf, p - buf); + ret = simple_read_from_buffer(userbuf, count, ppos, buf, p - buf); + kfree(buf); + return ret; } STA_OPS(vht_capa); From a5b983c6073140b624f64e79fea6d33c3e4315a0 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 20 Sep 2021 15:40:07 +0200 Subject: [PATCH 005/440] mac80211: mesh: clean up rx_bcn_presp API We currently pass the entire elements to the rx_bcn_presp() method, but only need mesh_config. Additionally, we use the length of the elements to calculate back the entire frame's length, but that's confusing - just pass the length of the frame instead. Link: https://lore.kernel.org/r/20210920154009.a18ed3d2da6c.I1824b773a0fbae4453e1433c184678ca14e8df45@changeid Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 7 +++---- net/mac80211/mesh.c | 4 ++-- net/mac80211/mesh_sync.c | 26 ++++++++++++-------------- 3 files changed, 17 insertions(+), 20 deletions(-) diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 159af6c3ffb0..d74031bb4ae6 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -631,10 +631,9 @@ struct ieee80211_if_ocb { */ struct ieee802_11_elems; struct ieee80211_mesh_sync_ops { - void (*rx_bcn_presp)(struct ieee80211_sub_if_data *sdata, - u16 stype, - struct ieee80211_mgmt *mgmt, - struct ieee802_11_elems *elems, + void (*rx_bcn_presp)(struct ieee80211_sub_if_data *sdata, u16 stype, + struct ieee80211_mgmt *mgmt, unsigned int len, + const struct ieee80211_meshconf_ie *mesh_cfg, struct ieee80211_rx_status *rx_status); /* should be called with beacon_data under RCU read lock */ diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 97095b7c9c64..65e9335b3614 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -1353,8 +1353,8 @@ static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, } if (ifmsh->sync_ops) - ifmsh->sync_ops->rx_bcn_presp(sdata, - stype, mgmt, &elems, rx_status); + ifmsh->sync_ops->rx_bcn_presp(sdata, stype, mgmt, len, + elems.mesh_config, rx_status); } int ieee80211_mesh_finish_csa(struct ieee80211_sub_if_data *sdata) diff --git a/net/mac80211/mesh_sync.c b/net/mac80211/mesh_sync.c index fde93de2b80a..9e342cc2504c 100644 --- a/net/mac80211/mesh_sync.c +++ b/net/mac80211/mesh_sync.c @@ -3,6 +3,7 @@ * Copyright 2011-2012, Pavel Zubarev * Copyright 2011-2012, Marco Porsch * Copyright 2011-2012, cozybit Inc. + * Copyright (C) 2021 Intel Corporation */ #include "ieee80211_i.h" @@ -35,12 +36,12 @@ struct sync_method { /** * mesh_peer_tbtt_adjusting - check if an mp is currently adjusting its TBTT * - * @ie: information elements of a management frame from the mesh peer + * @cfg: mesh config element from the mesh peer (or %NULL) */ -static bool mesh_peer_tbtt_adjusting(struct ieee802_11_elems *ie) +static bool mesh_peer_tbtt_adjusting(const struct ieee80211_meshconf_ie *cfg) { - return (ie->mesh_config->meshconf_cap & - IEEE80211_MESHCONF_CAPAB_TBTT_ADJUSTING) != 0; + return cfg && + (cfg->meshconf_cap & IEEE80211_MESHCONF_CAPAB_TBTT_ADJUSTING); } void mesh_sync_adjust_tsf(struct ieee80211_sub_if_data *sdata) @@ -76,11 +77,11 @@ void mesh_sync_adjust_tsf(struct ieee80211_sub_if_data *sdata) } } -static void mesh_sync_offset_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, - u16 stype, - struct ieee80211_mgmt *mgmt, - struct ieee802_11_elems *elems, - struct ieee80211_rx_status *rx_status) +static void +mesh_sync_offset_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, u16 stype, + struct ieee80211_mgmt *mgmt, unsigned int len, + const struct ieee80211_meshconf_ie *mesh_cfg, + struct ieee80211_rx_status *rx_status) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; struct ieee80211_local *local = sdata->local; @@ -101,10 +102,7 @@ static void mesh_sync_offset_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, */ if (ieee80211_have_rx_timestamp(rx_status)) t_r = ieee80211_calculate_rx_timestamp(local, rx_status, - 24 + 12 + - elems->total_len + - FCS_LEN, - 24); + len + FCS_LEN, 24); else t_r = drv_get_tsf(local, sdata); @@ -119,7 +117,7 @@ static void mesh_sync_offset_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, * dot11MeshNbrOffsetMaxNeighbor non-peer non-MBSS neighbors */ - if (elems->mesh_config && mesh_peer_tbtt_adjusting(elems)) { + if (mesh_peer_tbtt_adjusting(mesh_cfg)) { msync_dbg(sdata, "STA %pM : is adjusting TBTT\n", sta->sta.addr); goto no_sync; From c6e37ed498f958254b5459253199e816b6bfc52f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 20 Sep 2021 15:40:08 +0200 Subject: [PATCH 006/440] mac80211: move CRC into struct ieee802_11_elems We're currently returning this value, but to prepare for returning the allocated structure, move it into there. Link: https://lore.kernel.org/r/20210920154009.479b8ebf999d.If0d4ba75ee38998dc3eeae25058aa748efcb2fc9@changeid Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 9 +++++---- net/mac80211/mlme.c | 9 +++++---- net/mac80211/util.c | 10 +++++----- 3 files changed, 15 insertions(+), 13 deletions(-) diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index d74031bb4ae6..6a129a08bc9b 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1507,6 +1507,7 @@ struct ieee80211_csa_ie { struct ieee802_11_elems { const u8 *ie_start; size_t total_len; + u32 crc; /* pointers to IEs */ const struct ieee80211_tdls_lnkie *lnk_id; @@ -2193,10 +2194,10 @@ static inline void ieee80211_tx_skb(struct ieee80211_sub_if_data *sdata, ieee80211_tx_skb_tid(sdata, skb, 7); } -u32 ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action, - struct ieee802_11_elems *elems, - u64 filter, u32 crc, u8 *transmitter_bssid, - u8 *bss_bssid); +void ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action, + struct ieee802_11_elems *elems, + u64 filter, u32 crc, u8 *transmitter_bssid, + u8 *bss_bssid); static inline void ieee802_11_parse_elems(const u8 *start, size_t len, bool action, struct ieee802_11_elems *elems, diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index c0ea3b1aa9e1..39b3ed89e0da 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -4070,10 +4070,11 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, */ if (!ieee80211_is_s1g_beacon(hdr->frame_control)) ncrc = crc32_be(0, (void *)&mgmt->u.beacon.beacon_int, 4); - ncrc = ieee802_11_parse_elems_crc(variable, - len - baselen, false, &elems, - care_about_ies, ncrc, - mgmt->bssid, bssid); + ieee802_11_parse_elems_crc(variable, + len - baselen, false, &elems, + care_about_ies, ncrc, + mgmt->bssid, bssid); + ncrc = elems.crc; if (ieee80211_hw_check(&local->hw, PS_NULLFUNC_STACK) && ieee80211_check_tim(elems.tim, elems.tim_len, bss_conf->aid)) { diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 49cb96d25169..43ccad8b24c7 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1461,10 +1461,10 @@ static size_t ieee802_11_find_bssid_profile(const u8 *start, size_t len, return found ? profile_len : 0; } -u32 ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action, - struct ieee802_11_elems *elems, - u64 filter, u32 crc, u8 *transmitter_bssid, - u8 *bss_bssid) +void ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action, + struct ieee802_11_elems *elems, + u64 filter, u32 crc, u8 *transmitter_bssid, + u8 *bss_bssid) { const struct element *non_inherit = NULL; u8 *nontransmitted_profile; @@ -1516,7 +1516,7 @@ u32 ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action, kfree(nontransmitted_profile); - return crc; + elems->crc = crc; } void ieee80211_regulatory_limit_wmm_params(struct ieee80211_sub_if_data *sdata, From 49a765d6785e99157ff5091cc37485732496864e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 20 Sep 2021 15:40:09 +0200 Subject: [PATCH 007/440] mac80211: mlme: find auth challenge directly There's no need to parse all elements etc. just to find the authentication challenge - use cfg80211_find_elem() instead. This also allows us to remove WLAN_EID_CHALLENGE handling from the element parsing entirely. Link: https://lore.kernel.org/r/20210920154009.45f9b3a15722.Ice3159ffad03a007d6154cbf1fb3a8c48489e86f@changeid Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 2 -- net/mac80211/mlme.c | 11 ++++++----- net/mac80211/util.c | 4 ---- 3 files changed, 6 insertions(+), 11 deletions(-) diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 6a129a08bc9b..7a9e529f8366 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1517,7 +1517,6 @@ struct ieee802_11_elems { const u8 *supp_rates; const u8 *ds_params; const struct ieee80211_tim_ie *tim; - const u8 *challenge; const u8 *rsn; const u8 *rsnx; const u8 *erp_info; @@ -1571,7 +1570,6 @@ struct ieee802_11_elems { u8 ssid_len; u8 supp_rates_len; u8 tim_len; - u8 challenge_len; u8 rsn_len; u8 rsnx_len; u8 ext_supp_rates_len; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 39b3ed89e0da..e18bd07f6822 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2870,17 +2870,17 @@ static void ieee80211_auth_challenge(struct ieee80211_sub_if_data *sdata, { struct ieee80211_local *local = sdata->local; struct ieee80211_mgd_auth_data *auth_data = sdata->u.mgd.auth_data; + const struct element *challenge; u8 *pos; - struct ieee802_11_elems elems; u32 tx_flags = 0; struct ieee80211_prep_tx_info info = { .subtype = IEEE80211_STYPE_AUTH, }; pos = mgmt->u.auth.variable; - ieee802_11_parse_elems(pos, len - (pos - (u8 *)mgmt), false, &elems, - mgmt->bssid, auth_data->bss->bssid); - if (!elems.challenge) + challenge = cfg80211_find_elem(WLAN_EID_CHALLENGE, pos, + len - (pos - (u8 *)mgmt)); + if (!challenge) return; auth_data->expected_transaction = 4; drv_mgd_prepare_tx(sdata->local, sdata, &info); @@ -2888,7 +2888,8 @@ static void ieee80211_auth_challenge(struct ieee80211_sub_if_data *sdata, tx_flags = IEEE80211_TX_CTL_REQ_TX_STATUS | IEEE80211_TX_INTFL_MLME_CONN_TX; ieee80211_send_auth(sdata, 3, auth_data->algorithm, 0, - elems.challenge - 2, elems.challenge_len + 2, + (void *)challenge, + challenge->datalen + sizeof(*challenge), auth_data->bss->bssid, auth_data->bss->bssid, auth_data->key, auth_data->key_len, auth_data->key_idx, tx_flags); diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 43ccad8b24c7..dce841228297 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1112,10 +1112,6 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action, } else elem_parse_failed = true; break; - case WLAN_EID_CHALLENGE: - elems->challenge = pos; - elems->challenge_len = elen; - break; case WLAN_EID_VENDOR_SPECIFIC: if (elen >= 4 && pos[0] == 0x00 && pos[1] == 0x50 && pos[2] == 0xf2) { From 5d24828d05f37ad770599de00b53d5386e35aa61 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 20 Sep 2021 15:40:10 +0200 Subject: [PATCH 008/440] mac80211: always allocate struct ieee802_11_elems As the 802.11 spec evolves, we need to parse more and more elements. This is causing the struct to grow, and we can no longer get away with putting it on the stack. Change the API to always dynamically allocate and return an allocated pointer that must be kfree()d later. As an alternative, I contemplated a scheme whereby we'd say in the code which elements we needed, e.g. DECLARE_ELEMENT_PARSER(elems, SUPPORTED_CHANNELS, CHANNEL_SWITCH, EXT(KEY_DELIVERY)); ieee802_11_parse_elems(..., &elems, ...); and while I think this is possible and will save us a lot since most individual places only care about a small subset of the elements, it ended up being a bit more work since a lot of places do the parsing and then pass the struct to other functions, sometimes with multiple levels. Link: https://lore.kernel.org/r/20210920154009.26caff6b5998.I05ae58768e990e611aee8eca8abefd9d7bc15e05@changeid Signed-off-by: Johannes Berg --- net/mac80211/agg-rx.c | 11 +-- net/mac80211/ibss.c | 25 +++--- net/mac80211/ieee80211_i.h | 22 ++--- net/mac80211/mesh.c | 87 ++++++++++-------- net/mac80211/mesh_hwmp.c | 44 +++++----- net/mac80211/mesh_plink.c | 11 +-- net/mac80211/mlme.c | 176 +++++++++++++++++++++---------------- net/mac80211/scan.c | 16 ++-- net/mac80211/tdls.c | 63 +++++++------ net/mac80211/util.c | 20 +++-- 10 files changed, 273 insertions(+), 202 deletions(-) diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index cce28e3b2232..94c65def102c 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -477,7 +477,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, size_t len) { u16 capab, tid, timeout, ba_policy, buf_size, start_seq_num; - struct ieee802_11_elems elems = { }; + struct ieee802_11_elems *elems = NULL; u8 dialog_token; int ies_len; @@ -495,16 +495,17 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, ies_len = len - offsetof(struct ieee80211_mgmt, u.action.u.addba_req.variable); if (ies_len) { - ieee802_11_parse_elems(mgmt->u.action.u.addba_req.variable, - ies_len, true, &elems, mgmt->bssid, NULL); - if (elems.parse_error) + elems = ieee802_11_parse_elems(mgmt->u.action.u.addba_req.variable, + ies_len, true, mgmt->bssid, NULL); + if (!elems || elems->parse_error) return; } __ieee80211_start_rx_ba_session(sta, dialog_token, timeout, start_seq_num, ba_policy, tid, buf_size, true, false, - elems.addba_ext_ie); + elems ? elems->addba_ext_ie : NULL); + kfree(elems); } void ieee80211_manage_rx_ba_offl(struct ieee80211_vif *vif, diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 5d6ca4c3e698..66b00046f0c2 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -9,7 +9,7 @@ * Copyright 2009, Johannes Berg * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright(c) 2016 Intel Deutschland GmbH - * Copyright(c) 2018-2020 Intel Corporation + * Copyright(c) 2018-2021 Intel Corporation */ #include @@ -1589,7 +1589,7 @@ void ieee80211_rx_mgmt_probe_beacon(struct ieee80211_sub_if_data *sdata, struct ieee80211_rx_status *rx_status) { size_t baselen; - struct ieee802_11_elems elems; + struct ieee802_11_elems *elems; BUILD_BUG_ON(offsetof(typeof(mgmt->u.probe_resp), variable) != offsetof(typeof(mgmt->u.beacon), variable)); @@ -1602,10 +1602,14 @@ void ieee80211_rx_mgmt_probe_beacon(struct ieee80211_sub_if_data *sdata, if (baselen > len) return; - ieee802_11_parse_elems(mgmt->u.probe_resp.variable, len - baselen, - false, &elems, mgmt->bssid, NULL); + elems = ieee802_11_parse_elems(mgmt->u.probe_resp.variable, + len - baselen, false, + mgmt->bssid, NULL); - ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems); + if (elems) { + ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, elems); + kfree(elems); + } } void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, @@ -1614,7 +1618,7 @@ void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, struct ieee80211_rx_status *rx_status; struct ieee80211_mgmt *mgmt; u16 fc; - struct ieee802_11_elems elems; + struct ieee802_11_elems *elems; int ies_len; rx_status = IEEE80211_SKB_RXCB(skb); @@ -1651,15 +1655,16 @@ void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, if (ies_len < 0) break; - ieee802_11_parse_elems( + elems = ieee802_11_parse_elems( mgmt->u.action.u.chan_switch.variable, - ies_len, true, &elems, mgmt->bssid, NULL); + ies_len, true, mgmt->bssid, NULL); - if (elems.parse_error) + if (!elems || elems->parse_error) break; ieee80211_rx_mgmt_spectrum_mgmt(sdata, mgmt, skb->len, - rx_status, &elems); + rx_status, elems); + kfree(elems); break; } } diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 7a9e529f8366..c3b8590a7e90 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -2192,18 +2192,18 @@ static inline void ieee80211_tx_skb(struct ieee80211_sub_if_data *sdata, ieee80211_tx_skb_tid(sdata, skb, 7); } -void ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action, - struct ieee802_11_elems *elems, - u64 filter, u32 crc, u8 *transmitter_bssid, - u8 *bss_bssid); -static inline void ieee802_11_parse_elems(const u8 *start, size_t len, - bool action, - struct ieee802_11_elems *elems, - u8 *transmitter_bssid, - u8 *bss_bssid) +struct ieee802_11_elems *ieee802_11_parse_elems_crc(const u8 *start, size_t len, + bool action, + u64 filter, u32 crc, + const u8 *transmitter_bssid, + const u8 *bss_bssid); +static inline struct ieee802_11_elems * +ieee802_11_parse_elems(const u8 *start, size_t len, bool action, + const u8 *transmitter_bssid, + const u8 *bss_bssid) { - ieee802_11_parse_elems_crc(start, len, action, elems, 0, 0, - transmitter_bssid, bss_bssid); + return ieee802_11_parse_elems_crc(start, len, action, 0, 0, + transmitter_bssid, bss_bssid); } diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 65e9335b3614..a4212a333d61 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -1246,7 +1246,7 @@ ieee80211_mesh_rx_probe_req(struct ieee80211_sub_if_data *sdata, struct sk_buff *presp; struct beacon_data *bcn; struct ieee80211_mgmt *hdr; - struct ieee802_11_elems elems; + struct ieee802_11_elems *elems; size_t baselen; u8 *pos; @@ -1255,22 +1255,24 @@ ieee80211_mesh_rx_probe_req(struct ieee80211_sub_if_data *sdata, if (baselen > len) return; - ieee802_11_parse_elems(pos, len - baselen, false, &elems, mgmt->bssid, - NULL); - - if (!elems.mesh_id) + elems = ieee802_11_parse_elems(pos, len - baselen, false, mgmt->bssid, + NULL); + if (!elems) return; + if (!elems->mesh_id) + goto free; + /* 802.11-2012 10.1.4.3.2 */ if ((!ether_addr_equal(mgmt->da, sdata->vif.addr) && !is_broadcast_ether_addr(mgmt->da)) || - elems.ssid_len != 0) - return; + elems->ssid_len != 0) + goto free; - if (elems.mesh_id_len != 0 && - (elems.mesh_id_len != ifmsh->mesh_id_len || - memcmp(elems.mesh_id, ifmsh->mesh_id, ifmsh->mesh_id_len))) - return; + if (elems->mesh_id_len != 0 && + (elems->mesh_id_len != ifmsh->mesh_id_len || + memcmp(elems->mesh_id, ifmsh->mesh_id, ifmsh->mesh_id_len))) + goto free; rcu_read_lock(); bcn = rcu_dereference(ifmsh->beacon); @@ -1294,6 +1296,8 @@ ieee80211_mesh_rx_probe_req(struct ieee80211_sub_if_data *sdata, ieee80211_tx_skb(sdata, presp); out: rcu_read_unlock(); +free: + kfree(elems); } static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, @@ -1304,7 +1308,7 @@ static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, { struct ieee80211_local *local = sdata->local; struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; - struct ieee802_11_elems elems; + struct ieee802_11_elems *elems; struct ieee80211_channel *channel; size_t baselen; int freq; @@ -1319,42 +1323,47 @@ static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, if (baselen > len) return; - ieee802_11_parse_elems(mgmt->u.probe_resp.variable, len - baselen, - false, &elems, mgmt->bssid, NULL); - - /* ignore non-mesh or secure / unsecure mismatch */ - if ((!elems.mesh_id || !elems.mesh_config) || - (elems.rsn && sdata->u.mesh.security == IEEE80211_MESH_SEC_NONE) || - (!elems.rsn && sdata->u.mesh.security != IEEE80211_MESH_SEC_NONE)) + elems = ieee802_11_parse_elems(mgmt->u.probe_resp.variable, + len - baselen, + false, mgmt->bssid, NULL); + if (!elems) return; - if (elems.ds_params) - freq = ieee80211_channel_to_frequency(elems.ds_params[0], band); + /* ignore non-mesh or secure / unsecure mismatch */ + if ((!elems->mesh_id || !elems->mesh_config) || + (elems->rsn && sdata->u.mesh.security == IEEE80211_MESH_SEC_NONE) || + (!elems->rsn && sdata->u.mesh.security != IEEE80211_MESH_SEC_NONE)) + goto free; + + if (elems->ds_params) + freq = ieee80211_channel_to_frequency(elems->ds_params[0], band); else freq = rx_status->freq; channel = ieee80211_get_channel(local->hw.wiphy, freq); if (!channel || channel->flags & IEEE80211_CHAN_DISABLED) - return; + goto free; - if (mesh_matches_local(sdata, &elems)) { + if (mesh_matches_local(sdata, elems)) { mpl_dbg(sdata, "rssi_threshold=%d,rx_status->signal=%d\n", sdata->u.mesh.mshcfg.rssi_threshold, rx_status->signal); if (!sdata->u.mesh.user_mpm || sdata->u.mesh.mshcfg.rssi_threshold == 0 || sdata->u.mesh.mshcfg.rssi_threshold < rx_status->signal) - mesh_neighbour_update(sdata, mgmt->sa, &elems, + mesh_neighbour_update(sdata, mgmt->sa, elems, rx_status); if (ifmsh->csa_role != IEEE80211_MESH_CSA_ROLE_INIT && !sdata->vif.csa_active) - ieee80211_mesh_process_chnswitch(sdata, &elems, true); + ieee80211_mesh_process_chnswitch(sdata, elems, true); } if (ifmsh->sync_ops) ifmsh->sync_ops->rx_bcn_presp(sdata, stype, mgmt, len, - elems.mesh_config, rx_status); + elems->mesh_config, rx_status); +free: + kfree(elems); } int ieee80211_mesh_finish_csa(struct ieee80211_sub_if_data *sdata) @@ -1446,7 +1455,7 @@ static void mesh_rx_csa_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, size_t len) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; - struct ieee802_11_elems elems; + struct ieee802_11_elems *elems; u16 pre_value; bool fwd_csa = true; size_t baselen; @@ -1459,33 +1468,37 @@ static void mesh_rx_csa_frame(struct ieee80211_sub_if_data *sdata, pos = mgmt->u.action.u.chan_switch.variable; baselen = offsetof(struct ieee80211_mgmt, u.action.u.chan_switch.variable); - ieee802_11_parse_elems(pos, len - baselen, true, &elems, - mgmt->bssid, NULL); - - if (!mesh_matches_local(sdata, &elems)) + elems = ieee802_11_parse_elems(pos, len - baselen, true, + mgmt->bssid, NULL); + if (!elems) return; - ifmsh->chsw_ttl = elems.mesh_chansw_params_ie->mesh_ttl; + if (!mesh_matches_local(sdata, elems)) + goto free; + + ifmsh->chsw_ttl = elems->mesh_chansw_params_ie->mesh_ttl; if (!--ifmsh->chsw_ttl) fwd_csa = false; - pre_value = le16_to_cpu(elems.mesh_chansw_params_ie->mesh_pre_value); + pre_value = le16_to_cpu(elems->mesh_chansw_params_ie->mesh_pre_value); if (ifmsh->pre_value >= pre_value) - return; + goto free; ifmsh->pre_value = pre_value; if (!sdata->vif.csa_active && - !ieee80211_mesh_process_chnswitch(sdata, &elems, false)) { + !ieee80211_mesh_process_chnswitch(sdata, elems, false)) { mcsa_dbg(sdata, "Failed to process CSA action frame"); - return; + goto free; } /* forward or re-broadcast the CSA frame */ if (fwd_csa) { - if (mesh_fwd_csa_frame(sdata, mgmt, len, &elems) < 0) + if (mesh_fwd_csa_frame(sdata, mgmt, len, elems) < 0) mcsa_dbg(sdata, "Failed to forward the CSA frame"); } +free: + kfree(elems); } static void ieee80211_mesh_rx_mgmt_action(struct ieee80211_sub_if_data *sdata, diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index a05b615deb51..44a6fdb6efbd 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2008, 2009 open80211s Ltd. - * Copyright (C) 2019 Intel Corporation + * Copyright (C) 2019, 2021 Intel Corporation * Author: Luis Carlos Cobo */ @@ -908,7 +908,7 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata, void mesh_rx_path_sel_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, size_t len) { - struct ieee802_11_elems elems; + struct ieee802_11_elems *elems; size_t baselen; u32 path_metric; struct sta_info *sta; @@ -926,37 +926,41 @@ void mesh_rx_path_sel_frame(struct ieee80211_sub_if_data *sdata, rcu_read_unlock(); baselen = (u8 *) mgmt->u.action.u.mesh_action.variable - (u8 *) mgmt; - ieee802_11_parse_elems(mgmt->u.action.u.mesh_action.variable, - len - baselen, false, &elems, mgmt->bssid, NULL); + elems = ieee802_11_parse_elems(mgmt->u.action.u.mesh_action.variable, + len - baselen, false, mgmt->bssid, NULL); + if (!elems) + return; - if (elems.preq) { - if (elems.preq_len != 37) + if (elems->preq) { + if (elems->preq_len != 37) /* Right now we support just 1 destination and no AE */ - return; - path_metric = hwmp_route_info_get(sdata, mgmt, elems.preq, + goto free; + path_metric = hwmp_route_info_get(sdata, mgmt, elems->preq, MPATH_PREQ); if (path_metric) - hwmp_preq_frame_process(sdata, mgmt, elems.preq, + hwmp_preq_frame_process(sdata, mgmt, elems->preq, path_metric); } - if (elems.prep) { - if (elems.prep_len != 31) + if (elems->prep) { + if (elems->prep_len != 31) /* Right now we support no AE */ - return; - path_metric = hwmp_route_info_get(sdata, mgmt, elems.prep, + goto free; + path_metric = hwmp_route_info_get(sdata, mgmt, elems->prep, MPATH_PREP); if (path_metric) - hwmp_prep_frame_process(sdata, mgmt, elems.prep, + hwmp_prep_frame_process(sdata, mgmt, elems->prep, path_metric); } - if (elems.perr) { - if (elems.perr_len != 15) + if (elems->perr) { + if (elems->perr_len != 15) /* Right now we support only one destination per PERR */ - return; - hwmp_perr_frame_process(sdata, mgmt, elems.perr); + goto free; + hwmp_perr_frame_process(sdata, mgmt, elems->perr); } - if (elems.rann) - hwmp_rann_frame_process(sdata, mgmt, elems.rann); + if (elems->rann) + hwmp_rann_frame_process(sdata, mgmt, elems->rann); +free: + kfree(elems); } /** diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index a6915847d78a..a829470dd59e 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2008, 2009 open80211s Ltd. - * Copyright (C) 2019 Intel Corporation + * Copyright (C) 2019, 2021 Intel Corporation * Author: Luis Carlos Cobo */ #include @@ -1200,7 +1200,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, size_t len, struct ieee80211_rx_status *rx_status) { - struct ieee802_11_elems elems; + struct ieee802_11_elems *elems; size_t baselen; u8 *baseaddr; @@ -1228,7 +1228,8 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, if (baselen > len) return; } - ieee802_11_parse_elems(baseaddr, len - baselen, true, &elems, - mgmt->bssid, NULL); - mesh_process_plink_frame(sdata, mgmt, &elems, rx_status); + elems = ieee802_11_parse_elems(baseaddr, len - baselen, true, + mgmt->bssid, NULL); + mesh_process_plink_frame(sdata, mgmt, elems, rx_status); + kfree(elems); } diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index e18bd07f6822..e80f3388b0c5 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -3291,8 +3291,11 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, aid = 0; /* TODO */ } capab_info = le16_to_cpu(mgmt->u.assoc_resp.capab_info); - ieee802_11_parse_elems(pos, len - (pos - (u8 *)mgmt), false, elems, - mgmt->bssid, assoc_data->bss->bssid); + elems = ieee802_11_parse_elems(pos, len - (pos - (u8 *)mgmt), false, + mgmt->bssid, assoc_data->bss->bssid); + + if (!elems) + return false; if (elems->aid_resp) aid = le16_to_cpu(elems->aid_resp->aid); @@ -3314,7 +3317,8 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, if (!is_s1g && !elems->supp_rates) { sdata_info(sdata, "no SuppRates element in AssocResp\n"); - return false; + ret = false; + goto out; } sdata->vif.bss_conf.aid = aid; @@ -3336,7 +3340,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT) && (!elems->vht_cap_elem || !elems->vht_operation)))) { const struct cfg80211_bss_ies *ies; - struct ieee802_11_elems bss_elems; + struct ieee802_11_elems *bss_elems; rcu_read_lock(); ies = rcu_dereference(cbss->ies); @@ -3347,13 +3351,17 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, if (!bss_ies) return false; - ieee802_11_parse_elems(bss_ies->data, bss_ies->len, - false, &bss_elems, - mgmt->bssid, - assoc_data->bss->bssid); + bss_elems = ieee802_11_parse_elems(bss_ies->data, bss_ies->len, + false, mgmt->bssid, + assoc_data->bss->bssid); + if (!bss_elems) { + ret = false; + goto out; + } + if (assoc_data->wmm && - !elems->wmm_param && bss_elems.wmm_param) { - elems->wmm_param = bss_elems.wmm_param; + !elems->wmm_param && bss_elems->wmm_param) { + elems->wmm_param = bss_elems->wmm_param; sdata_info(sdata, "AP bug: WMM param missing from AssocResp\n"); } @@ -3362,30 +3370,32 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, * Also check if we requested HT/VHT, otherwise the AP doesn't * have to include the IEs in the (re)association response. */ - if (!elems->ht_cap_elem && bss_elems.ht_cap_elem && + if (!elems->ht_cap_elem && bss_elems->ht_cap_elem && !(ifmgd->flags & IEEE80211_STA_DISABLE_HT)) { - elems->ht_cap_elem = bss_elems.ht_cap_elem; + elems->ht_cap_elem = bss_elems->ht_cap_elem; sdata_info(sdata, "AP bug: HT capability missing from AssocResp\n"); } - if (!elems->ht_operation && bss_elems.ht_operation && + if (!elems->ht_operation && bss_elems->ht_operation && !(ifmgd->flags & IEEE80211_STA_DISABLE_HT)) { - elems->ht_operation = bss_elems.ht_operation; + elems->ht_operation = bss_elems->ht_operation; sdata_info(sdata, "AP bug: HT operation missing from AssocResp\n"); } - if (!elems->vht_cap_elem && bss_elems.vht_cap_elem && + if (!elems->vht_cap_elem && bss_elems->vht_cap_elem && !(ifmgd->flags & IEEE80211_STA_DISABLE_VHT)) { - elems->vht_cap_elem = bss_elems.vht_cap_elem; + elems->vht_cap_elem = bss_elems->vht_cap_elem; sdata_info(sdata, "AP bug: VHT capa missing from AssocResp\n"); } - if (!elems->vht_operation && bss_elems.vht_operation && + if (!elems->vht_operation && bss_elems->vht_operation && !(ifmgd->flags & IEEE80211_STA_DISABLE_VHT)) { - elems->vht_operation = bss_elems.vht_operation; + elems->vht_operation = bss_elems->vht_operation; sdata_info(sdata, "AP bug: VHT operation missing from AssocResp\n"); } + + kfree(bss_elems); } /* @@ -3630,6 +3640,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, ret = true; out: + kfree(elems); kfree(bss_ies); return ret; } @@ -3641,7 +3652,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_mgd_assoc_data *assoc_data = ifmgd->assoc_data; u16 capab_info, status_code, aid; - struct ieee802_11_elems elems; + struct ieee802_11_elems *elems; int ac, uapsd_queues = -1; u8 *pos; bool reassoc; @@ -3698,14 +3709,16 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, fils_decrypt_assoc_resp(sdata, (u8 *)mgmt, &len, assoc_data) < 0) return; - ieee802_11_parse_elems(pos, len - (pos - (u8 *)mgmt), false, &elems, - mgmt->bssid, assoc_data->bss->bssid); + elems = ieee802_11_parse_elems(pos, len - (pos - (u8 *)mgmt), false, + mgmt->bssid, assoc_data->bss->bssid); + if (!elems) + goto notify_driver; if (status_code == WLAN_STATUS_ASSOC_REJECTED_TEMPORARILY && - elems.timeout_int && - elems.timeout_int->type == WLAN_TIMEOUT_ASSOC_COMEBACK) { + elems->timeout_int && + elems->timeout_int->type == WLAN_TIMEOUT_ASSOC_COMEBACK) { u32 tu, ms; - tu = le32_to_cpu(elems.timeout_int->value); + tu = le32_to_cpu(elems->timeout_int->value); ms = tu * 1024 / 1000; sdata_info(sdata, "%pM rejected association temporarily; comeback duration %u TU (%u ms)\n", @@ -3725,7 +3738,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, event.u.mlme.reason = status_code; drv_event_callback(sdata->local, sdata, &event); } else { - if (!ieee80211_assoc_success(sdata, cbss, mgmt, len, &elems)) { + if (!ieee80211_assoc_success(sdata, cbss, mgmt, len, elems)) { /* oops -- internal error -- send timeout for now */ ieee80211_destroy_assoc_data(sdata, false, false); cfg80211_assoc_timeout(sdata->dev, cbss); @@ -3755,6 +3768,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, ifmgd->assoc_req_ies, ifmgd->assoc_req_ies_len); notify_driver: drv_mgd_complete_tx(sdata->local, sdata, &info); + kfree(elems); } static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, @@ -3959,7 +3973,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf; struct ieee80211_mgmt *mgmt = (void *) hdr; size_t baselen; - struct ieee802_11_elems elems; + struct ieee802_11_elems *elems; struct ieee80211_local *local = sdata->local; struct ieee80211_chanctx_conf *chanctx_conf; struct ieee80211_channel *chan; @@ -4005,15 +4019,16 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, if (ifmgd->assoc_data && ifmgd->assoc_data->need_beacon && ieee80211_rx_our_beacon(bssid, ifmgd->assoc_data->bss)) { - ieee802_11_parse_elems(variable, - len - baselen, false, &elems, - bssid, - ifmgd->assoc_data->bss->bssid); + elems = ieee802_11_parse_elems(variable, len - baselen, false, + bssid, + ifmgd->assoc_data->bss->bssid); + if (!elems) + return; ieee80211_rx_bss_info(sdata, mgmt, len, rx_status); - if (elems.dtim_period) - ifmgd->dtim_period = elems.dtim_period; + if (elems->dtim_period) + ifmgd->dtim_period = elems->dtim_period; ifmgd->have_beacon = true; ifmgd->assoc_data->need_beacon = false; if (ieee80211_hw_check(&local->hw, TIMING_BEACON_ONLY)) { @@ -4021,17 +4036,17 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, le64_to_cpu(mgmt->u.beacon.timestamp); sdata->vif.bss_conf.sync_device_ts = rx_status->device_timestamp; - sdata->vif.bss_conf.sync_dtim_count = elems.dtim_count; + sdata->vif.bss_conf.sync_dtim_count = elems->dtim_count; } - if (elems.mbssid_config_ie) + if (elems->mbssid_config_ie) bss_conf->profile_periodicity = - elems.mbssid_config_ie->profile_periodicity; + elems->mbssid_config_ie->profile_periodicity; else bss_conf->profile_periodicity = 0; - if (elems.ext_capab_len >= 11 && - (elems.ext_capab[10] & WLAN_EXT_CAPA11_EMA_SUPPORT)) + if (elems->ext_capab_len >= 11 && + (elems->ext_capab[10] & WLAN_EXT_CAPA11_EMA_SUPPORT)) bss_conf->ema_ap = true; else bss_conf->ema_ap = false; @@ -4040,6 +4055,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, ifmgd->assoc_data->timeout = jiffies; ifmgd->assoc_data->timeout_started = true; run_again(sdata, ifmgd->assoc_data->timeout); + kfree(elems); return; } @@ -4071,14 +4087,15 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, */ if (!ieee80211_is_s1g_beacon(hdr->frame_control)) ncrc = crc32_be(0, (void *)&mgmt->u.beacon.beacon_int, 4); - ieee802_11_parse_elems_crc(variable, - len - baselen, false, &elems, - care_about_ies, ncrc, - mgmt->bssid, bssid); - ncrc = elems.crc; + elems = ieee802_11_parse_elems_crc(variable, len - baselen, + false, care_about_ies, ncrc, + mgmt->bssid, bssid); + if (!elems) + return; + ncrc = elems->crc; if (ieee80211_hw_check(&local->hw, PS_NULLFUNC_STACK) && - ieee80211_check_tim(elems.tim, elems.tim_len, bss_conf->aid)) { + ieee80211_check_tim(elems->tim, elems->tim_len, bss_conf->aid)) { if (local->hw.conf.dynamic_ps_timeout > 0) { if (local->hw.conf.flags & IEEE80211_CONF_PS) { local->hw.conf.flags &= ~IEEE80211_CONF_PS; @@ -4148,12 +4165,12 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, le64_to_cpu(mgmt->u.beacon.timestamp); sdata->vif.bss_conf.sync_device_ts = rx_status->device_timestamp; - sdata->vif.bss_conf.sync_dtim_count = elems.dtim_count; + sdata->vif.bss_conf.sync_dtim_count = elems->dtim_count; } if ((ncrc == ifmgd->beacon_crc && ifmgd->beacon_crc_valid) || ieee80211_is_s1g_short_beacon(mgmt->frame_control)) - return; + goto free; ifmgd->beacon_crc = ncrc; ifmgd->beacon_crc_valid = true; @@ -4161,12 +4178,12 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, ieee80211_sta_process_chanswitch(sdata, rx_status->mactime, rx_status->device_timestamp, - &elems, true); + elems, true); if (!(ifmgd->flags & IEEE80211_STA_DISABLE_WMM) && - ieee80211_sta_wmm_params(local, sdata, elems.wmm_param, - elems.wmm_param_len, - elems.mu_edca_param_set)) + ieee80211_sta_wmm_params(local, sdata, elems->wmm_param, + elems->wmm_param_len, + elems->mu_edca_param_set)) changed |= BSS_CHANGED_QOS; /* @@ -4175,7 +4192,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, */ if (!ifmgd->have_beacon) { /* a few bogus AP send dtim_period = 0 or no TIM IE */ - bss_conf->dtim_period = elems.dtim_period ?: 1; + bss_conf->dtim_period = elems->dtim_period ?: 1; changed |= BSS_CHANGED_BEACON_INFO; ifmgd->have_beacon = true; @@ -4187,9 +4204,9 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, ieee80211_recalc_ps_vif(sdata); } - if (elems.erp_info) { + if (elems->erp_info) { erp_valid = true; - erp_value = elems.erp_info[0]; + erp_value = elems->erp_info[0]; } else { erp_valid = false; } @@ -4202,12 +4219,12 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, mutex_lock(&local->sta_mtx); sta = sta_info_get(sdata, bssid); - changed |= ieee80211_recalc_twt_req(sdata, sta, &elems); + changed |= ieee80211_recalc_twt_req(sdata, sta, elems); - if (ieee80211_config_bw(sdata, sta, elems.ht_cap_elem, - elems.vht_cap_elem, elems.ht_operation, - elems.vht_operation, elems.he_operation, - elems.s1g_oper, bssid, &changed)) { + if (ieee80211_config_bw(sdata, sta, elems->ht_cap_elem, + elems->vht_cap_elem, elems->ht_operation, + elems->vht_operation, elems->he_operation, + elems->s1g_oper, bssid, &changed)) { mutex_unlock(&local->sta_mtx); sdata_info(sdata, "failed to follow AP %pM bandwidth change, disconnect\n", @@ -4219,21 +4236,23 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, sizeof(deauth_buf), true, WLAN_REASON_DEAUTH_LEAVING, false); - return; + goto free; } - if (sta && elems.opmode_notif) - ieee80211_vht_handle_opmode(sdata, sta, *elems.opmode_notif, + if (sta && elems->opmode_notif) + ieee80211_vht_handle_opmode(sdata, sta, *elems->opmode_notif, rx_status->band); mutex_unlock(&local->sta_mtx); changed |= ieee80211_handle_pwr_constr(sdata, chan, mgmt, - elems.country_elem, - elems.country_elem_len, - elems.pwr_constr_elem, - elems.cisco_dtpc_elem); + elems->country_elem, + elems->country_elem_len, + elems->pwr_constr_elem, + elems->cisco_dtpc_elem); ieee80211_bss_info_change_notify(sdata, changed); +free: + kfree(elems); } void ieee80211_sta_rx_queued_ext(struct ieee80211_sub_if_data *sdata, @@ -4262,7 +4281,6 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, struct ieee80211_rx_status *rx_status; struct ieee80211_mgmt *mgmt; u16 fc; - struct ieee802_11_elems elems; int ies_len; rx_status = (struct ieee80211_rx_status *) skb->cb; @@ -4294,6 +4312,8 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, break; case IEEE80211_STYPE_ACTION: if (mgmt->u.action.category == WLAN_CATEGORY_SPECTRUM_MGMT) { + struct ieee802_11_elems *elems; + ies_len = skb->len - offsetof(struct ieee80211_mgmt, u.action.u.chan_switch.variable); @@ -4302,18 +4322,21 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, break; /* CSA IE cannot be overridden, no need for BSSID */ - ieee802_11_parse_elems( - mgmt->u.action.u.chan_switch.variable, - ies_len, true, &elems, mgmt->bssid, NULL); + elems = ieee802_11_parse_elems( + mgmt->u.action.u.chan_switch.variable, + ies_len, true, mgmt->bssid, NULL); - if (elems.parse_error) + if (!elems || elems->parse_error) break; ieee80211_sta_process_chanswitch(sdata, rx_status->mactime, rx_status->device_timestamp, - &elems, false); + elems, false); + kfree(elems); } else if (mgmt->u.action.category == WLAN_CATEGORY_PUBLIC) { + struct ieee802_11_elems *elems; + ies_len = skb->len - offsetof(struct ieee80211_mgmt, u.action.u.ext_chan_switch.variable); @@ -4325,21 +4348,22 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, * extended CSA IE can't be overridden, no need for * BSSID */ - ieee802_11_parse_elems( - mgmt->u.action.u.ext_chan_switch.variable, - ies_len, true, &elems, mgmt->bssid, NULL); + elems = ieee802_11_parse_elems( + mgmt->u.action.u.ext_chan_switch.variable, + ies_len, true, mgmt->bssid, NULL); - if (elems.parse_error) + if (!elems || elems->parse_error) break; /* for the handling code pretend this was also an IE */ - elems.ext_chansw_ie = + elems->ext_chansw_ie = &mgmt->u.action.u.ext_chan_switch.data; ieee80211_sta_process_chanswitch(sdata, rx_status->mactime, rx_status->device_timestamp, - &elems, false); + elems, false); + kfree(elems); } break; } diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 6b50cb5e0e3c..5e6b275afc9e 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -9,7 +9,7 @@ * Copyright 2007, Michael Wu * Copyright 2013-2015 Intel Mobile Communications GmbH * Copyright 2016-2017 Intel Deutschland GmbH - * Copyright (C) 2018-2020 Intel Corporation + * Copyright (C) 2018-2021 Intel Corporation */ #include @@ -155,7 +155,7 @@ ieee80211_bss_info_update(struct ieee80211_local *local, }; bool signal_valid; struct ieee80211_sub_if_data *scan_sdata; - struct ieee802_11_elems elems; + struct ieee802_11_elems *elems; size_t baselen; u8 *elements; @@ -209,8 +209,10 @@ ieee80211_bss_info_update(struct ieee80211_local *local, if (baselen > len) return NULL; - ieee802_11_parse_elems(elements, len - baselen, false, &elems, - mgmt->bssid, cbss->bssid); + elems = ieee802_11_parse_elems(elements, len - baselen, false, + mgmt->bssid, cbss->bssid); + if (!elems) + return NULL; /* In case the signal is invalid update the status */ signal_valid = channel == cbss->channel; @@ -218,15 +220,17 @@ ieee80211_bss_info_update(struct ieee80211_local *local, rx_status->flag |= RX_FLAG_NO_SIGNAL_VAL; bss = (void *)cbss->priv; - ieee80211_update_bss_from_elems(local, bss, &elems, rx_status, beacon); + ieee80211_update_bss_from_elems(local, bss, elems, rx_status, beacon); list_for_each_entry(non_tx_cbss, &cbss->nontrans_list, nontrans_list) { non_tx_bss = (void *)non_tx_cbss->priv; - ieee80211_update_bss_from_elems(local, non_tx_bss, &elems, + ieee80211_update_bss_from_elems(local, non_tx_bss, elems, rx_status, beacon); } + kfree(elems); + return bss; } diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c index 45e532ad1215..137be9ec94af 100644 --- a/net/mac80211/tdls.c +++ b/net/mac80211/tdls.c @@ -6,7 +6,7 @@ * Copyright 2014, Intel Corporation * Copyright 2014 Intel Mobile Communications GmbH * Copyright 2015 - 2016 Intel Deutschland GmbH - * Copyright (C) 2019 Intel Corporation + * Copyright (C) 2019, 2021 Intel Corporation */ #include @@ -1684,7 +1684,7 @@ ieee80211_process_tdls_channel_switch_resp(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) { struct ieee80211_local *local = sdata->local; - struct ieee802_11_elems elems; + struct ieee802_11_elems *elems = NULL; struct sta_info *sta; struct ieee80211_tdls_data *tf = (void *)skb->data; bool local_initiator; @@ -1718,16 +1718,20 @@ ieee80211_process_tdls_channel_switch_resp(struct ieee80211_sub_if_data *sdata, goto call_drv; } - ieee802_11_parse_elems(tf->u.chan_switch_resp.variable, - skb->len - baselen, false, &elems, - NULL, NULL); - if (elems.parse_error) { + elems = ieee802_11_parse_elems(tf->u.chan_switch_resp.variable, + skb->len - baselen, false, NULL, NULL); + if (!elems) { + ret = -ENOMEM; + goto out; + } + + if (elems->parse_error) { tdls_dbg(sdata, "Invalid IEs in TDLS channel switch resp\n"); ret = -EINVAL; goto out; } - if (!elems.ch_sw_timing || !elems.lnk_id) { + if (!elems->ch_sw_timing || !elems->lnk_id) { tdls_dbg(sdata, "TDLS channel switch resp - missing IEs\n"); ret = -EINVAL; goto out; @@ -1735,15 +1739,15 @@ ieee80211_process_tdls_channel_switch_resp(struct ieee80211_sub_if_data *sdata, /* validate the initiator is set correctly */ local_initiator = - !memcmp(elems.lnk_id->init_sta, sdata->vif.addr, ETH_ALEN); + !memcmp(elems->lnk_id->init_sta, sdata->vif.addr, ETH_ALEN); if (local_initiator == sta->sta.tdls_initiator) { tdls_dbg(sdata, "TDLS chan switch invalid lnk-id initiator\n"); ret = -EINVAL; goto out; } - params.switch_time = le16_to_cpu(elems.ch_sw_timing->switch_time); - params.switch_timeout = le16_to_cpu(elems.ch_sw_timing->switch_timeout); + params.switch_time = le16_to_cpu(elems->ch_sw_timing->switch_time); + params.switch_timeout = le16_to_cpu(elems->ch_sw_timing->switch_timeout); params.tmpl_skb = ieee80211_tdls_ch_sw_resp_tmpl_get(sta, ¶ms.ch_sw_tm_ie); @@ -1763,6 +1767,7 @@ call_drv: out: mutex_unlock(&local->sta_mtx); dev_kfree_skb_any(params.tmpl_skb); + kfree(elems); return ret; } @@ -1771,7 +1776,7 @@ ieee80211_process_tdls_channel_switch_req(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) { struct ieee80211_local *local = sdata->local; - struct ieee802_11_elems elems; + struct ieee802_11_elems *elems; struct cfg80211_chan_def chandef; struct ieee80211_channel *chan; enum nl80211_channel_type chan_type; @@ -1831,22 +1836,27 @@ ieee80211_process_tdls_channel_switch_req(struct ieee80211_sub_if_data *sdata, return -EINVAL; } - ieee802_11_parse_elems(tf->u.chan_switch_req.variable, - skb->len - baselen, false, &elems, NULL, NULL); - if (elems.parse_error) { + elems = ieee802_11_parse_elems(tf->u.chan_switch_req.variable, + skb->len - baselen, false, NULL, NULL); + if (!elems) + return -ENOMEM; + + if (elems->parse_error) { tdls_dbg(sdata, "Invalid IEs in TDLS channel switch req\n"); - return -EINVAL; + ret = -EINVAL; + goto free; } - if (!elems.ch_sw_timing || !elems.lnk_id) { + if (!elems->ch_sw_timing || !elems->lnk_id) { tdls_dbg(sdata, "TDLS channel switch req - missing IEs\n"); - return -EINVAL; + ret = -EINVAL; + goto free; } - if (!elems.sec_chan_offs) { + if (!elems->sec_chan_offs) { chan_type = NL80211_CHAN_HT20; } else { - switch (elems.sec_chan_offs->sec_chan_offs) { + switch (elems->sec_chan_offs->sec_chan_offs) { case IEEE80211_HT_PARAM_CHA_SEC_ABOVE: chan_type = NL80211_CHAN_HT40PLUS; break; @@ -1865,7 +1875,8 @@ ieee80211_process_tdls_channel_switch_req(struct ieee80211_sub_if_data *sdata, if (!cfg80211_reg_can_beacon_relax(sdata->local->hw.wiphy, &chandef, sdata->wdev.iftype)) { tdls_dbg(sdata, "TDLS chan switch to forbidden channel\n"); - return -EINVAL; + ret = -EINVAL; + goto free; } mutex_lock(&local->sta_mtx); @@ -1881,7 +1892,7 @@ ieee80211_process_tdls_channel_switch_req(struct ieee80211_sub_if_data *sdata, /* validate the initiator is set correctly */ local_initiator = - !memcmp(elems.lnk_id->init_sta, sdata->vif.addr, ETH_ALEN); + !memcmp(elems->lnk_id->init_sta, sdata->vif.addr, ETH_ALEN); if (local_initiator == sta->sta.tdls_initiator) { tdls_dbg(sdata, "TDLS chan switch invalid lnk-id initiator\n"); ret = -EINVAL; @@ -1889,16 +1900,16 @@ ieee80211_process_tdls_channel_switch_req(struct ieee80211_sub_if_data *sdata, } /* peer should have known better */ - if (!sta->sta.ht_cap.ht_supported && elems.sec_chan_offs && - elems.sec_chan_offs->sec_chan_offs) { + if (!sta->sta.ht_cap.ht_supported && elems->sec_chan_offs && + elems->sec_chan_offs->sec_chan_offs) { tdls_dbg(sdata, "TDLS chan switch - wide chan unsupported\n"); ret = -ENOTSUPP; goto out; } params.chandef = &chandef; - params.switch_time = le16_to_cpu(elems.ch_sw_timing->switch_time); - params.switch_timeout = le16_to_cpu(elems.ch_sw_timing->switch_timeout); + params.switch_time = le16_to_cpu(elems->ch_sw_timing->switch_time); + params.switch_timeout = le16_to_cpu(elems->ch_sw_timing->switch_timeout); params.tmpl_skb = ieee80211_tdls_ch_sw_resp_tmpl_get(sta, @@ -1917,6 +1928,8 @@ ieee80211_process_tdls_channel_switch_req(struct ieee80211_sub_if_data *sdata, out: mutex_unlock(&local->sta_mtx); dev_kfree_skb_any(params.tmpl_skb); +free: + kfree(elems); return ret; } diff --git a/net/mac80211/util.c b/net/mac80211/util.c index dce841228297..ca8008ba9b1f 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1391,8 +1391,8 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action, static size_t ieee802_11_find_bssid_profile(const u8 *start, size_t len, struct ieee802_11_elems *elems, - u8 *transmitter_bssid, - u8 *bss_bssid, + const u8 *transmitter_bssid, + const u8 *bss_bssid, u8 *nontransmitted_profile) { const struct element *elem, *sub; @@ -1457,16 +1457,20 @@ static size_t ieee802_11_find_bssid_profile(const u8 *start, size_t len, return found ? profile_len : 0; } -void ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action, - struct ieee802_11_elems *elems, - u64 filter, u32 crc, u8 *transmitter_bssid, - u8 *bss_bssid) +struct ieee802_11_elems *ieee802_11_parse_elems_crc(const u8 *start, size_t len, + bool action, u64 filter, + u32 crc, + const u8 *transmitter_bssid, + const u8 *bss_bssid) { + struct ieee802_11_elems *elems; const struct element *non_inherit = NULL; u8 *nontransmitted_profile; int nontransmitted_profile_len = 0; - memset(elems, 0, sizeof(*elems)); + elems = kzalloc(sizeof(*elems), GFP_ATOMIC); + if (!elems) + return NULL; elems->ie_start = start; elems->total_len = len; @@ -1513,6 +1517,8 @@ void ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action, kfree(nontransmitted_profile); elems->crc = crc; + + return elems; } void ieee80211_regulatory_limit_wmm_params(struct ieee80211_sub_if_data *sdata, From 9e263e193af73d2509dc3102a680a11130f44e20 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 23 Sep 2021 16:18:37 +0200 Subject: [PATCH 009/440] nl80211: don't put struct cfg80211_ap_settings on stack This struct has grown quite a bit, so dynamically allocate it instead of putting it on the stack. Link: https://lore.kernel.org/r/20210923161836.5813d881eae3.I0fc0f83905b0bfa332c4f1505e00c13abfca3545@changeid Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 188 ++++++++++++++++++++++++----------------- 1 file changed, 109 insertions(+), 79 deletions(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index e4787c74e80b..0b4f29d689d2 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -5323,7 +5323,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_ap_settings params; + struct cfg80211_ap_settings *params; int err; if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && @@ -5336,27 +5336,29 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) if (wdev->beacon_interval) return -EALREADY; - memset(¶ms, 0, sizeof(params)); - /* these are required for START_AP */ if (!info->attrs[NL80211_ATTR_BEACON_INTERVAL] || !info->attrs[NL80211_ATTR_DTIM_PERIOD] || !info->attrs[NL80211_ATTR_BEACON_HEAD]) return -EINVAL; - err = nl80211_parse_beacon(rdev, info->attrs, ¶ms.beacon); - if (err) - return err; + params = kzalloc(sizeof(*params), GFP_KERNEL); + if (!params) + return -ENOMEM; - params.beacon_interval = + err = nl80211_parse_beacon(rdev, info->attrs, ¶ms->beacon); + if (err) + goto out; + + params->beacon_interval = nla_get_u32(info->attrs[NL80211_ATTR_BEACON_INTERVAL]); - params.dtim_period = + params->dtim_period = nla_get_u32(info->attrs[NL80211_ATTR_DTIM_PERIOD]); err = cfg80211_validate_beacon_int(rdev, dev->ieee80211_ptr->iftype, - params.beacon_interval); + params->beacon_interval); if (err) - return err; + goto out; /* * In theory, some of these attributes should be required here @@ -5366,129 +5368,156 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) * additional information -- drivers must check! */ if (info->attrs[NL80211_ATTR_SSID]) { - params.ssid = nla_data(info->attrs[NL80211_ATTR_SSID]); - params.ssid_len = + params->ssid = nla_data(info->attrs[NL80211_ATTR_SSID]); + params->ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]); - if (params.ssid_len == 0) - return -EINVAL; + if (params->ssid_len == 0) { + err = -EINVAL; + goto out; + } } if (info->attrs[NL80211_ATTR_HIDDEN_SSID]) - params.hidden_ssid = nla_get_u32( + params->hidden_ssid = nla_get_u32( info->attrs[NL80211_ATTR_HIDDEN_SSID]); - params.privacy = !!info->attrs[NL80211_ATTR_PRIVACY]; + params->privacy = !!info->attrs[NL80211_ATTR_PRIVACY]; if (info->attrs[NL80211_ATTR_AUTH_TYPE]) { - params.auth_type = nla_get_u32( + params->auth_type = nla_get_u32( info->attrs[NL80211_ATTR_AUTH_TYPE]); - if (!nl80211_valid_auth_type(rdev, params.auth_type, - NL80211_CMD_START_AP)) - return -EINVAL; + if (!nl80211_valid_auth_type(rdev, params->auth_type, + NL80211_CMD_START_AP)) { + err = -EINVAL; + goto out; + } } else - params.auth_type = NL80211_AUTHTYPE_AUTOMATIC; + params->auth_type = NL80211_AUTHTYPE_AUTOMATIC; - err = nl80211_crypto_settings(rdev, info, ¶ms.crypto, + err = nl80211_crypto_settings(rdev, info, ¶ms->crypto, NL80211_MAX_NR_CIPHER_SUITES); if (err) - return err; + goto out; if (info->attrs[NL80211_ATTR_INACTIVITY_TIMEOUT]) { - if (!(rdev->wiphy.features & NL80211_FEATURE_INACTIVITY_TIMER)) - return -EOPNOTSUPP; - params.inactivity_timeout = nla_get_u16( + if (!(rdev->wiphy.features & NL80211_FEATURE_INACTIVITY_TIMER)) { + err = -EOPNOTSUPP; + goto out; + } + params->inactivity_timeout = nla_get_u16( info->attrs[NL80211_ATTR_INACTIVITY_TIMEOUT]); } if (info->attrs[NL80211_ATTR_P2P_CTWINDOW]) { - if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) - return -EINVAL; - params.p2p_ctwindow = + if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) { + err = -EINVAL; + goto out; + } + params->p2p_ctwindow = nla_get_u8(info->attrs[NL80211_ATTR_P2P_CTWINDOW]); - if (params.p2p_ctwindow != 0 && - !(rdev->wiphy.features & NL80211_FEATURE_P2P_GO_CTWIN)) - return -EINVAL; + if (params->p2p_ctwindow != 0 && + !(rdev->wiphy.features & NL80211_FEATURE_P2P_GO_CTWIN)) { + err = -EINVAL; + goto out; + } } if (info->attrs[NL80211_ATTR_P2P_OPPPS]) { u8 tmp; - if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) - return -EINVAL; + if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) { + err = -EINVAL; + goto out; + } tmp = nla_get_u8(info->attrs[NL80211_ATTR_P2P_OPPPS]); - params.p2p_opp_ps = tmp; - if (params.p2p_opp_ps != 0 && - !(rdev->wiphy.features & NL80211_FEATURE_P2P_GO_OPPPS)) - return -EINVAL; + params->p2p_opp_ps = tmp; + if (params->p2p_opp_ps != 0 && + !(rdev->wiphy.features & NL80211_FEATURE_P2P_GO_OPPPS)) { + err = -EINVAL; + goto out; + } } if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) { - err = nl80211_parse_chandef(rdev, info, ¶ms.chandef); + err = nl80211_parse_chandef(rdev, info, ¶ms->chandef); if (err) - return err; + goto out; } else if (wdev->preset_chandef.chan) { - params.chandef = wdev->preset_chandef; - } else if (!nl80211_get_ap_channel(rdev, ¶ms)) - return -EINVAL; + params->chandef = wdev->preset_chandef; + } else if (!nl80211_get_ap_channel(rdev, params)) { + err = -EINVAL; + goto out; + } - if (!cfg80211_reg_can_beacon_relax(&rdev->wiphy, ¶ms.chandef, - wdev->iftype)) - return -EINVAL; + if (!cfg80211_reg_can_beacon_relax(&rdev->wiphy, ¶ms->chandef, + wdev->iftype)) { + err = -EINVAL; + goto out; + } if (info->attrs[NL80211_ATTR_TX_RATES]) { err = nl80211_parse_tx_bitrate_mask(info, info->attrs, NL80211_ATTR_TX_RATES, - ¶ms.beacon_rate, + ¶ms->beacon_rate, dev, false); if (err) - return err; + goto out; - err = validate_beacon_tx_rate(rdev, params.chandef.chan->band, - ¶ms.beacon_rate); + err = validate_beacon_tx_rate(rdev, params->chandef.chan->band, + ¶ms->beacon_rate); if (err) - return err; + goto out; } if (info->attrs[NL80211_ATTR_SMPS_MODE]) { - params.smps_mode = + params->smps_mode = nla_get_u8(info->attrs[NL80211_ATTR_SMPS_MODE]); - switch (params.smps_mode) { + switch (params->smps_mode) { case NL80211_SMPS_OFF: break; case NL80211_SMPS_STATIC: if (!(rdev->wiphy.features & - NL80211_FEATURE_STATIC_SMPS)) - return -EINVAL; + NL80211_FEATURE_STATIC_SMPS)) { + err = -EINVAL; + goto out; + } break; case NL80211_SMPS_DYNAMIC: if (!(rdev->wiphy.features & - NL80211_FEATURE_DYNAMIC_SMPS)) - return -EINVAL; + NL80211_FEATURE_DYNAMIC_SMPS)) { + err = -EINVAL; + goto out; + } break; default: - return -EINVAL; + err = -EINVAL; + goto out; } } else { - params.smps_mode = NL80211_SMPS_OFF; + params->smps_mode = NL80211_SMPS_OFF; } - params.pbss = nla_get_flag(info->attrs[NL80211_ATTR_PBSS]); - if (params.pbss && !rdev->wiphy.bands[NL80211_BAND_60GHZ]) - return -EOPNOTSUPP; + params->pbss = nla_get_flag(info->attrs[NL80211_ATTR_PBSS]); + if (params->pbss && !rdev->wiphy.bands[NL80211_BAND_60GHZ]) { + err = -EOPNOTSUPP; + goto out; + } if (info->attrs[NL80211_ATTR_ACL_POLICY]) { - params.acl = parse_acl_data(&rdev->wiphy, info); - if (IS_ERR(params.acl)) - return PTR_ERR(params.acl); + params->acl = parse_acl_data(&rdev->wiphy, info); + if (IS_ERR(params->acl)) { + err = PTR_ERR(params->acl); + goto out; + } } - params.twt_responder = + params->twt_responder = nla_get_flag(info->attrs[NL80211_ATTR_TWT_RESPONDER]); if (info->attrs[NL80211_ATTR_HE_OBSS_PD]) { err = nl80211_parse_he_obss_pd( info->attrs[NL80211_ATTR_HE_OBSS_PD], - ¶ms.he_obss_pd); + ¶ms->he_obss_pd); if (err) goto out; } @@ -5496,7 +5525,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_HE_BSS_COLOR]) { err = nl80211_parse_he_bss_color( info->attrs[NL80211_ATTR_HE_BSS_COLOR], - ¶ms.he_bss_color); + ¶ms->he_bss_color); if (err) goto out; } @@ -5504,7 +5533,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_FILS_DISCOVERY]) { err = nl80211_parse_fils_discovery(rdev, info->attrs[NL80211_ATTR_FILS_DISCOVERY], - ¶ms); + params); if (err) goto out; } @@ -5512,24 +5541,24 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_UNSOL_BCAST_PROBE_RESP]) { err = nl80211_parse_unsol_bcast_probe_resp( rdev, info->attrs[NL80211_ATTR_UNSOL_BCAST_PROBE_RESP], - ¶ms); + params); if (err) goto out; } - nl80211_calculate_ap_params(¶ms); + nl80211_calculate_ap_params(params); if (info->attrs[NL80211_ATTR_EXTERNAL_AUTH_SUPPORT]) - params.flags |= AP_SETTINGS_EXTERNAL_AUTH_SUPPORT; + params->flags |= AP_SETTINGS_EXTERNAL_AUTH_SUPPORT; wdev_lock(wdev); - err = rdev_start_ap(rdev, dev, ¶ms); + err = rdev_start_ap(rdev, dev, params); if (!err) { - wdev->preset_chandef = params.chandef; - wdev->beacon_interval = params.beacon_interval; - wdev->chandef = params.chandef; - wdev->ssid_len = params.ssid_len; - memcpy(wdev->ssid, params.ssid, wdev->ssid_len); + wdev->preset_chandef = params->chandef; + wdev->beacon_interval = params->beacon_interval; + wdev->chandef = params->chandef; + wdev->ssid_len = params->ssid_len; + memcpy(wdev->ssid, params->ssid, wdev->ssid_len); if (info->attrs[NL80211_ATTR_SOCKET_OWNER]) wdev->conn_owner_nlportid = info->snd_portid; @@ -5537,7 +5566,8 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) wdev_unlock(wdev); out: - kfree(params.acl); + kfree(params->acl); + kfree(params); return err; } From 37123c3baaee4d6a189ad4abad804770d4a607e8 Mon Sep 17 00:00:00 2001 From: Wen Gong Date: Fri, 24 Sep 2021 06:00:51 -0400 Subject: [PATCH 010/440] mac80211: use ieee802_11_parse_elems() in ieee80211_prep_channel() In function ieee80211_prep_channel(), it has some ieee80211_bss_get_ie() and cfg80211_find_ext_ie() to get the IE, this is to use another function ieee802_11_parse_elems() to get all the IEs in one time. Signed-off-by: Wen Gong Link: https://lore.kernel.org/r/20210924100052.32029-6-wgong@codeaurora.org [remove now unnecessary size validation, use -ENOMEM, free elems earlier for less error handling code] Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 59 +++++++++++++++++---------------------------- 1 file changed, 22 insertions(+), 37 deletions(-) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index e80f3388b0c5..29388204889f 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -4998,10 +4998,22 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, bool is_6ghz = cbss->channel->band == NL80211_BAND_6GHZ; bool is_5ghz = cbss->channel->band == NL80211_BAND_5GHZ; struct ieee80211_bss *bss = (void *)cbss->priv; + struct ieee802_11_elems *elems; + const struct cfg80211_bss_ies *ies; int ret; u32 i; bool have_80mhz; + rcu_read_lock(); + + ies = rcu_dereference(cbss->ies); + elems = ieee802_11_parse_elems(ies->data, ies->len, false, + NULL, NULL); + if (!elems) { + rcu_read_unlock(); + return -ENOMEM; + } + sband = local->hw.wiphy->bands[cbss->channel->band]; ifmgd->flags &= ~(IEEE80211_STA_DISABLE_40MHZ | @@ -5024,18 +5036,9 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, ieee80211_vif_type_p2p(&sdata->vif))) ifmgd->flags |= IEEE80211_STA_DISABLE_HE; - rcu_read_lock(); - if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HT) && !is_6ghz) { - const u8 *ht_oper_ie, *ht_cap_ie; - - ht_oper_ie = ieee80211_bss_get_ie(cbss, WLAN_EID_HT_OPERATION); - if (ht_oper_ie && ht_oper_ie[1] >= sizeof(*ht_oper)) - ht_oper = (void *)(ht_oper_ie + 2); - - ht_cap_ie = ieee80211_bss_get_ie(cbss, WLAN_EID_HT_CAPABILITY); - if (ht_cap_ie && ht_cap_ie[1] >= sizeof(*ht_cap)) - ht_cap = (void *)(ht_cap_ie + 2); + ht_oper = elems->ht_operation; + ht_cap = elems->ht_cap_elem; if (!ht_cap) { ifmgd->flags |= IEEE80211_STA_DISABLE_HT; @@ -5044,12 +5047,7 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, } if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT) && !is_6ghz) { - const u8 *vht_oper_ie, *vht_cap; - - vht_oper_ie = ieee80211_bss_get_ie(cbss, - WLAN_EID_VHT_OPERATION); - if (vht_oper_ie && vht_oper_ie[1] >= sizeof(*vht_oper)) - vht_oper = (void *)(vht_oper_ie + 2); + vht_oper = elems->vht_operation; if (vht_oper && !ht_oper) { vht_oper = NULL; sdata_info(sdata, @@ -5059,25 +5057,14 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, ifmgd->flags |= IEEE80211_STA_DISABLE_HE; } - vht_cap = ieee80211_bss_get_ie(cbss, WLAN_EID_VHT_CAPABILITY); - if (!vht_cap || vht_cap[1] < sizeof(struct ieee80211_vht_cap)) { + if (!elems->vht_cap_elem) { ifmgd->flags |= IEEE80211_STA_DISABLE_VHT; vht_oper = NULL; } } if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HE)) { - const struct cfg80211_bss_ies *ies; - const u8 *he_oper_ie; - - ies = rcu_dereference(cbss->ies); - he_oper_ie = cfg80211_find_ext_ie(WLAN_EID_EXT_HE_OPERATION, - ies->data, ies->len); - if (he_oper_ie && - he_oper_ie[1] >= ieee80211_he_oper_size(&he_oper_ie[3])) - he_oper = (void *)(he_oper_ie + 3); - else - he_oper = NULL; + he_oper = elems->he_operation; if (!ieee80211_verify_sta_he_mcs_support(sdata, sband, he_oper)) ifmgd->flags |= IEEE80211_STA_DISABLE_HE; @@ -5098,13 +5085,8 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, ifmgd->flags |= IEEE80211_STA_DISABLE_VHT; if (sband->band == NL80211_BAND_S1GHZ) { - const u8 *s1g_oper_ie; - - s1g_oper_ie = ieee80211_bss_get_ie(cbss, - WLAN_EID_S1G_OPERATION); - if (s1g_oper_ie && s1g_oper_ie[1] >= sizeof(*s1g_oper)) - s1g_oper = (void *)(s1g_oper_ie + 2); - else + s1g_oper = elems->s1g_oper; + if (!s1g_oper) sdata_info(sdata, "AP missing S1G operation element?\n"); } @@ -5120,6 +5102,9 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, local->rx_chains); rcu_read_unlock(); + /* the element data was RCU protected so no longer valid anyway */ + kfree(elems); + elems = NULL; if (ifmgd->flags & IEEE80211_STA_DISABLE_HE && is_6ghz) { sdata_info(sdata, "Rejecting non-HE 6/7 GHz connection"); From cd8793f97f5f7772bdad8338f239cb7d9446e88b Mon Sep 17 00:00:00 2001 From: Ramon Fontes Date: Thu, 23 Sep 2021 15:28:05 -0300 Subject: [PATCH 011/440] mac80211_hwsim: enable 6GHz channels Advertise 6 GHz channels with HE (and without HT/VHT) capabilities. Signed-off-by: Ramon Fontes Link: https://lore.kernel.org/r/20210923182805.27122-1-ramonreisfontes@gmail.com Signed-off-by: Johannes Berg --- drivers/net/wireless/mac80211_hwsim.c | 156 +++++++++++++++++++++++--- 1 file changed, 141 insertions(+), 15 deletions(-) diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index ffa894f7312a..2d09e998d643 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -2802,7 +2802,6 @@ out_err: static const struct ieee80211_sband_iftype_data he_capa_2ghz[] = { { - /* TODO: should we support other types, e.g., P2P?*/ .types_mask = BIT(NL80211_IFTYPE_STATION) | BIT(NL80211_IFTYPE_AP), .he_cap = { @@ -2850,7 +2849,6 @@ static const struct ieee80211_sband_iftype_data he_capa_2ghz[] = { }, #ifdef CONFIG_MAC80211_MESH { - /* TODO: should we support other types, e.g., IBSS?*/ .types_mask = BIT(NL80211_IFTYPE_MESH_POINT), .he_cap = { .has_he = true, @@ -2988,6 +2986,122 @@ static const struct ieee80211_sband_iftype_data he_capa_5ghz[] = { #endif }; +static const struct ieee80211_sband_iftype_data he_capa_6ghz[] = { + { + /* TODO: should we support other types, e.g., P2P?*/ + .types_mask = BIT(NL80211_IFTYPE_STATION) | + BIT(NL80211_IFTYPE_AP), + .he_6ghz_capa = { + .capa = cpu_to_le16(IEEE80211_HE_6GHZ_CAP_MIN_MPDU_START | + IEEE80211_HE_6GHZ_CAP_MAX_AMPDU_LEN_EXP | + IEEE80211_HE_6GHZ_CAP_MAX_MPDU_LEN | + IEEE80211_HE_6GHZ_CAP_SM_PS | + IEEE80211_HE_6GHZ_CAP_RD_RESPONDER | + IEEE80211_HE_6GHZ_CAP_TX_ANTPAT_CONS | + IEEE80211_HE_6GHZ_CAP_RX_ANTPAT_CONS), + }, + .he_cap = { + .has_he = true, + .he_cap_elem = { + .mac_cap_info[0] = + IEEE80211_HE_MAC_CAP0_HTC_HE, + .mac_cap_info[1] = + IEEE80211_HE_MAC_CAP1_TF_MAC_PAD_DUR_16US | + IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_RX_QOS_8, + .mac_cap_info[2] = + IEEE80211_HE_MAC_CAP2_BSR | + IEEE80211_HE_MAC_CAP2_MU_CASCADING | + IEEE80211_HE_MAC_CAP2_ACK_EN, + .mac_cap_info[3] = + IEEE80211_HE_MAC_CAP3_OMI_CONTROL | + IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_EXT_3, + .mac_cap_info[4] = IEEE80211_HE_MAC_CAP4_AMSDU_IN_AMPDU, + .phy_cap_info[0] = + IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_80MHZ_IN_5G | + IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G | + IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_80PLUS80_MHZ_IN_5G, + .phy_cap_info[1] = + IEEE80211_HE_PHY_CAP1_PREAMBLE_PUNC_RX_MASK | + IEEE80211_HE_PHY_CAP1_DEVICE_CLASS_A | + IEEE80211_HE_PHY_CAP1_LDPC_CODING_IN_PAYLOAD | + IEEE80211_HE_PHY_CAP1_MIDAMBLE_RX_TX_MAX_NSTS, + .phy_cap_info[2] = + IEEE80211_HE_PHY_CAP2_NDP_4x_LTF_AND_3_2US | + IEEE80211_HE_PHY_CAP2_STBC_TX_UNDER_80MHZ | + IEEE80211_HE_PHY_CAP2_STBC_RX_UNDER_80MHZ | + IEEE80211_HE_PHY_CAP2_UL_MU_FULL_MU_MIMO | + IEEE80211_HE_PHY_CAP2_UL_MU_PARTIAL_MU_MIMO, + + /* Leave all the other PHY capability bytes + * unset, as DCM, beam forming, RU and PPE + * threshold information are not supported + */ + }, + .he_mcs_nss_supp = { + .rx_mcs_80 = cpu_to_le16(0xfffa), + .tx_mcs_80 = cpu_to_le16(0xfffa), + .rx_mcs_160 = cpu_to_le16(0xfffa), + .tx_mcs_160 = cpu_to_le16(0xfffa), + .rx_mcs_80p80 = cpu_to_le16(0xfffa), + .tx_mcs_80p80 = cpu_to_le16(0xfffa), + }, + }, + }, +#ifdef CONFIG_MAC80211_MESH + { + /* TODO: should we support other types, e.g., IBSS?*/ + .types_mask = BIT(NL80211_IFTYPE_MESH_POINT), + .he_6ghz_capa = { + .capa = cpu_to_le16(IEEE80211_HE_6GHZ_CAP_MIN_MPDU_START | + IEEE80211_HE_6GHZ_CAP_MAX_AMPDU_LEN_EXP | + IEEE80211_HE_6GHZ_CAP_MAX_MPDU_LEN | + IEEE80211_HE_6GHZ_CAP_SM_PS | + IEEE80211_HE_6GHZ_CAP_RD_RESPONDER | + IEEE80211_HE_6GHZ_CAP_TX_ANTPAT_CONS | + IEEE80211_HE_6GHZ_CAP_RX_ANTPAT_CONS), + }, + .he_cap = { + .has_he = true, + .he_cap_elem = { + .mac_cap_info[0] = + IEEE80211_HE_MAC_CAP0_HTC_HE, + .mac_cap_info[1] = + IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_RX_QOS_8, + .mac_cap_info[2] = + IEEE80211_HE_MAC_CAP2_ACK_EN, + .mac_cap_info[3] = + IEEE80211_HE_MAC_CAP3_OMI_CONTROL | + IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_EXT_3, + .mac_cap_info[4] = IEEE80211_HE_MAC_CAP4_AMSDU_IN_AMPDU, + .phy_cap_info[0] = + IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_80MHZ_IN_5G | + IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G | + IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_80PLUS80_MHZ_IN_5G, + .phy_cap_info[1] = + IEEE80211_HE_PHY_CAP1_PREAMBLE_PUNC_RX_MASK | + IEEE80211_HE_PHY_CAP1_DEVICE_CLASS_A | + IEEE80211_HE_PHY_CAP1_LDPC_CODING_IN_PAYLOAD | + IEEE80211_HE_PHY_CAP1_MIDAMBLE_RX_TX_MAX_NSTS, + .phy_cap_info[2] = 0, + + /* Leave all the other PHY capability bytes + * unset, as DCM, beam forming, RU and PPE + * threshold information are not supported + */ + }, + .he_mcs_nss_supp = { + .rx_mcs_80 = cpu_to_le16(0xfffa), + .tx_mcs_80 = cpu_to_le16(0xfffa), + .rx_mcs_160 = cpu_to_le16(0xfffa), + .tx_mcs_160 = cpu_to_le16(0xfffa), + .rx_mcs_80p80 = cpu_to_le16(0xfffa), + .tx_mcs_80p80 = cpu_to_le16(0xfffa), + }, + }, + }, +#endif +}; + static void mac80211_hwsim_he_capab(struct ieee80211_supported_band *sband) { u16 n_iftype_data; @@ -3000,6 +3114,10 @@ static void mac80211_hwsim_he_capab(struct ieee80211_supported_band *sband) n_iftype_data = ARRAY_SIZE(he_capa_5ghz); sband->iftype_data = (struct ieee80211_sband_iftype_data *)he_capa_5ghz; + } else if (sband->band == NL80211_BAND_6GHZ) { + n_iftype_data = ARRAY_SIZE(he_capa_6ghz); + sband->iftype_data = + (struct ieee80211_sband_iftype_data *)he_capa_6ghz; } else { return; } @@ -3290,6 +3408,12 @@ static int mac80211_hwsim_new_radio(struct genl_info *info, sband->vht_cap.vht_mcs.tx_mcs_map = sband->vht_cap.vht_mcs.rx_mcs_map; break; + case NL80211_BAND_6GHZ: + sband->channels = data->channels_6ghz; + sband->n_channels = ARRAY_SIZE(hwsim_channels_6ghz); + sband->bitrates = data->rates + 4; + sband->n_bitrates = ARRAY_SIZE(hwsim_rates) - 4; + break; case NL80211_BAND_S1GHZ: memcpy(&sband->s1g_cap, &hwsim_s1g_cap, sizeof(sband->s1g_cap)); @@ -3300,19 +3424,21 @@ static int mac80211_hwsim_new_radio(struct genl_info *info, continue; } - sband->ht_cap.ht_supported = true; - sband->ht_cap.cap = IEEE80211_HT_CAP_SUP_WIDTH_20_40 | - IEEE80211_HT_CAP_GRN_FLD | - IEEE80211_HT_CAP_SGI_20 | - IEEE80211_HT_CAP_SGI_40 | - IEEE80211_HT_CAP_DSSSCCK40; - sband->ht_cap.ampdu_factor = 0x3; - sband->ht_cap.ampdu_density = 0x6; - memset(&sband->ht_cap.mcs, 0, - sizeof(sband->ht_cap.mcs)); - sband->ht_cap.mcs.rx_mask[0] = 0xff; - sband->ht_cap.mcs.rx_mask[1] = 0xff; - sband->ht_cap.mcs.tx_params = IEEE80211_HT_MCS_TX_DEFINED; + if (band != NL80211_BAND_6GHZ){ + sband->ht_cap.ht_supported = true; + sband->ht_cap.cap = IEEE80211_HT_CAP_SUP_WIDTH_20_40 | + IEEE80211_HT_CAP_GRN_FLD | + IEEE80211_HT_CAP_SGI_20 | + IEEE80211_HT_CAP_SGI_40 | + IEEE80211_HT_CAP_DSSSCCK40; + sband->ht_cap.ampdu_factor = 0x3; + sband->ht_cap.ampdu_density = 0x6; + memset(&sband->ht_cap.mcs, 0, + sizeof(sband->ht_cap.mcs)); + sband->ht_cap.mcs.rx_mask[0] = 0xff; + sband->ht_cap.mcs.rx_mask[1] = 0xff; + sband->ht_cap.mcs.tx_params = IEEE80211_HT_MCS_TX_DEFINED; + } mac80211_hwsim_he_capab(sband); From e306784a8de08868d0ecbf78dd42a0051d0e14ce Mon Sep 17 00:00:00 2001 From: Subrat Mishra Date: Wed, 15 Sep 2021 11:22:23 +0530 Subject: [PATCH 012/440] cfg80211: AP mode driver offload for FILS association crypto Add a driver FILS crypto offload extended capability flag to indicate that the driver running in AP mode is capable of handling encryption and decryption of (Re)Association request and response frames. Add a command to set FILS AAD data to driver. This feature is supported on drivers running in AP mode only. This extended capability is exchanged with hostapd during cfg80211 init. If the driver indicates this capability, then before sending the Authentication response frame, hostapd sets FILS AAD data to the driver. This allows the driver to decrypt (Re)Association Request frame and encrypt (Re)Association Response frame. FILS Key derivation will still be done in hostapd. Signed-off-by: Subrat Mishra Link: https://lore.kernel.org/r/1631685143-13530-1-git-send-email-subratm@codeaurora.org [fix whitespace] Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 22 +++++++++++++++++++++ include/uapi/linux/nl80211.h | 37 ++++++++++++++++++++++++++++++++++++ net/wireless/nl80211.c | 30 +++++++++++++++++++++++++++++ net/wireless/rdev-ops.h | 14 ++++++++++++++ net/wireless/trace.h | 31 ++++++++++++++++++++++++++++++ 5 files changed, 134 insertions(+) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 62dd8422e0dc..125f563d66a1 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -739,6 +739,22 @@ struct cfg80211_tid_config { struct cfg80211_tid_cfg tid_conf[]; }; +/** + * struct cfg80211_fils_aad - FILS AAD data + * @macaddr: STA MAC address + * @kek: FILS KEK + * @kek_len: FILS KEK length + * @snonce: STA Nonce + * @anonce: AP Nonce + */ +struct cfg80211_fils_aad { + const u8 *macaddr; + const u8 *kek; + u8 kek_len; + const u8 *snonce; + const u8 *anonce; +}; + /** * cfg80211_get_chandef_type - return old channel type from chandef * @chandef: the channel definition @@ -4018,6 +4034,10 @@ struct mgmt_frame_regs { * @set_sar_specs: Update the SAR (TX power) settings. * * @color_change: Initiate a color change. + * + * @set_fils_aad: Set FILS AAD data to the AP driver so that the driver can use + * those to decrypt (Re)Association Request and encrypt (Re)Association + * Response frame. */ struct cfg80211_ops { int (*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow); @@ -4348,6 +4368,8 @@ struct cfg80211_ops { int (*color_change)(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_color_change_settings *params); + int (*set_fils_aad)(struct wiphy *wiphy, struct net_device *dev, + struct cfg80211_fils_aad *fils_aad); }; /* diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index c2efea98e060..e89bbf856228 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -300,6 +300,29 @@ * the interface goes down. */ +/** + * DOC: FILS shared key crypto offload + * + * This feature is applicable to drivers running in AP mode. + * + * FILS shared key crypto offload can be advertised by drivers by setting + * @NL80211_EXT_FEATURE_FILS_CRYPTO_OFFLOAD flag. The drivers that support + * FILS shared key crypto offload should be able to encrypt and decrypt + * association frames for FILS shared key authentication as per IEEE 802.11ai. + * With this capability, for FILS key derivation, drivers depend on userspace. + * + * After FILS key derivation, userspace shares the FILS AAD details with the + * driver and the driver stores the same to use in decryption of association + * request and in encryption of association response. The below parameters + * should be given to the driver in %NL80211_CMD_SET_FILS_AAD. + * %NL80211_ATTR_MAC - STA MAC address, used for storing FILS AAD per STA + * %NL80211_ATTR_FILS_KEK - Used for encryption or decryption + * %NL80211_ATTR_FILS_NONCES - Used for encryption or decryption + * (STA Nonce 16 bytes followed by AP Nonce 16 bytes) + * + * Once the association is done, the driver cleans the FILS AAD data. + */ + /** * enum nl80211_commands - supported nl80211 commands * @@ -1200,6 +1223,12 @@ * @NL80211_CMD_COLOR_CHANGE_COMPLETED: Notify userland that the color change * has completed * + * @NL80211_CMD_SET_FILS_AAD: Set FILS AAD data to the driver using - + * &NL80211_ATTR_MAC - for STA MAC address + * &NL80211_ATTR_FILS_KEK - for KEK + * &NL80211_ATTR_FILS_NONCES - for FILS Nonces + * (STA Nonce 16 bytes followed by AP Nonce 16 bytes) + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -1440,6 +1469,8 @@ enum nl80211_commands { NL80211_CMD_COLOR_CHANGE_ABORTED, NL80211_CMD_COLOR_CHANGE_COMPLETED, + NL80211_CMD_SET_FILS_AAD, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ @@ -5995,6 +6026,11 @@ enum nl80211_feature_flags { * @NL80211_EXT_FEATURE_BSS_COLOR: The driver supports BSS color collision * detection and change announcemnts. * + * @NL80211_EXT_FEATURE_FILS_CRYPTO_OFFLOAD: Driver running in AP mode supports + * FILS encryption and decryption for (Re)Association Request and Response + * frames. Userspace has to share FILS AAD details to the driver by using + * @NL80211_CMD_SET_FILS_AAD. + * * @NUM_NL80211_EXT_FEATURES: number of extended features. * @MAX_NL80211_EXT_FEATURES: highest extended feature index. */ @@ -6060,6 +6096,7 @@ enum nl80211_ext_feature_index { NL80211_EXT_FEATURE_SECURE_RTT, NL80211_EXT_FEATURE_PROT_RANGE_NEGO_AND_MEASURE, NL80211_EXT_FEATURE_BSS_COLOR, + NL80211_EXT_FEATURE_FILS_CRYPTO_OFFLOAD, /* add new features before the definition below */ NUM_NL80211_EXT_FEATURES, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 0b4f29d689d2..d7c03f6145f0 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -14936,6 +14936,29 @@ out: return err; } +static int nl80211_set_fils_aad(struct sk_buff *skb, + struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct net_device *dev = info->user_ptr[1]; + struct cfg80211_fils_aad fils_aad = {}; + u8 *nonces; + + if (!info->attrs[NL80211_ATTR_MAC] || + !info->attrs[NL80211_ATTR_FILS_KEK] || + !info->attrs[NL80211_ATTR_FILS_NONCES]) + return -EINVAL; + + fils_aad.macaddr = nla_data(info->attrs[NL80211_ATTR_MAC]); + fils_aad.kek_len = nla_len(info->attrs[NL80211_ATTR_FILS_KEK]); + fils_aad.kek = nla_data(info->attrs[NL80211_ATTR_FILS_KEK]); + nonces = nla_data(info->attrs[NL80211_ATTR_FILS_NONCES]); + fils_aad.snonce = nonces; + fils_aad.anonce = nonces + FILS_NONCE_LEN; + + return rdev_set_fils_aad(rdev, dev, &fils_aad); +} + #define NL80211_FLAG_NEED_WIPHY 0x01 #define NL80211_FLAG_NEED_NETDEV 0x02 #define NL80211_FLAG_NEED_RTNL 0x04 @@ -15937,6 +15960,13 @@ static const struct genl_small_ops nl80211_small_ops[] = { .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, + { + .cmd = NL80211_CMD_SET_FILS_AAD, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .doit = nl80211_set_fils_aad, + .flags = GENL_UNS_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, + }, }; static struct genl_family nl80211_fam __ro_after_init = { diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index ce6bf218a1a3..cc1efec4b27b 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -1381,4 +1381,18 @@ static inline int rdev_color_change(struct cfg80211_registered_device *rdev, return ret; } +static inline int +rdev_set_fils_aad(struct cfg80211_registered_device *rdev, + struct net_device *dev, struct cfg80211_fils_aad *fils_aad) +{ + int ret = -EOPNOTSUPP; + + trace_rdev_set_fils_aad(&rdev->wiphy, dev, fils_aad); + if (rdev->ops->set_fils_aad) + ret = rdev->ops->set_fils_aad(&rdev->wiphy, dev, fils_aad); + trace_rdev_return_int(&rdev->wiphy, ret); + + return ret; +} + #endif /* __CFG80211_RDEV_OPS */ diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 19b78d472283..ad6c16a06bcb 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -167,6 +167,19 @@ __entry->center_freq1, __entry->freq1_offset, \ __entry->center_freq2 +#define FILS_AAD_ASSIGN(fa) \ + do { \ + if (fa) { \ + ether_addr_copy(__entry->macaddr, fa->macaddr); \ + __entry->kek_len = fa->kek_len; \ + } else { \ + eth_zero_addr(__entry->macaddr); \ + __entry->kek_len = 0; \ + } \ + } while (0) +#define FILS_AAD_PR_FMT \ + "macaddr: %pM, kek_len: %d" + #define SINFO_ENTRY __field(int, generation) \ __field(u32, connected_time) \ __field(u32, inactive_time) \ @@ -2614,6 +2627,24 @@ DEFINE_EVENT(wiphy_wdev_cookie_evt, rdev_abort_pmsr, TP_ARGS(wiphy, wdev, cookie) ); +TRACE_EVENT(rdev_set_fils_aad, + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, + struct cfg80211_fils_aad *fils_aad), + TP_ARGS(wiphy, netdev, fils_aad), + TP_STRUCT__entry(WIPHY_ENTRY + NETDEV_ENTRY + __array(u8, macaddr, ETH_ALEN) + __field(u8, kek_len) + ), + TP_fast_assign(WIPHY_ASSIGN; + NETDEV_ASSIGN; + FILS_AAD_ASSIGN(fils_aad); + ), + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " FILS_AAD_PR_FMT, + WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->macaddr, + __entry->kek_len) +); + /************************************************************* * cfg80211 exported functions traces * *************************************************************/ From 7ff379ba2d4b7b205240e666601fe302207d73f8 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 27 Sep 2021 11:51:24 +0200 Subject: [PATCH 013/440] mac80211: twt: don't use potentially unaligned pointer Since we're pointing into a frame, the pointer to the twt_agrt->req_type struct member is potentially not aligned properly. Open-code le16p_replace_bits() to avoid passing an unaligned pointer. Reported-by: kernel test robot Fixes: f5a4c24e689f ("mac80211: introduce individual TWT support in AP mode") Signed-off-by: Johannes Berg Link: https://lore.kernel.org/r/20210927115124.e1208694f37b.Ie3de9bcc5dde5a79e3ac81f3185beafe4d214e57@changeid Signed-off-by: Johannes Berg --- net/mac80211/s1g.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/mac80211/s1g.c b/net/mac80211/s1g.c index 7e35ab5b6166..4141bc80cdfd 100644 --- a/net/mac80211/s1g.c +++ b/net/mac80211/s1g.c @@ -104,9 +104,11 @@ ieee80211_s1g_rx_twt_setup(struct ieee80211_sub_if_data *sdata, /* broadcast TWT not supported yet */ if (twt->control & IEEE80211_TWT_CONTROL_NEG_TYPE_BROADCAST) { - le16p_replace_bits(&twt_agrt->req_type, - TWT_SETUP_CMD_REJECT, - IEEE80211_TWT_REQTYPE_SETUP_CMD); + twt_agrt->req_type &= + ~cpu_to_le16(IEEE80211_TWT_REQTYPE_SETUP_CMD); + twt_agrt->req_type |= + le16_encode_bits(TWT_SETUP_CMD_REJECT, + IEEE80211_TWT_REQTYPE_SETUP_CMD); goto out; } From 405fca8a946168e71c04b82cc80727c3ea686e08 Mon Sep 17 00:00:00 2001 From: Wen Gong Date: Fri, 24 Sep 2021 06:00:47 -0400 Subject: [PATCH 014/440] ieee80211: add power type definition for 6 GHz 6 GHz regulatory domains introduces different modes for 6 GHz AP operations: Low Power Indoor (LPI), Standard Power (SP) and Very Low Power (VLP). 6 GHz STAs could be operated as either Regular or Subordinate clients. Define the flags for power type of AP and STATION mode. Signed-off-by: Wen Gong Link: https://lore.kernel.org/r/20210924100052.32029-2-wgong@codeaurora.org Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 38 ++++++++++++++++++++++++++++++++++++++ include/net/mac80211.h | 2 ++ 2 files changed, 40 insertions(+) diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 694264503119..420dea9aa648 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1988,6 +1988,44 @@ int ieee80211_get_vht_max_nss(struct ieee80211_vht_cap *cap, int mcs, bool ext_nss_bw_capable, unsigned int max_vht_nss); +/** + * enum ieee80211_ap_reg_power - regulatory power for a Access Point + * + * @IEEE80211_REG_UNSET_AP: Access Point has no regulatory power mode + * @IEEE80211_REG_LPI: Indoor Access Point + * @IEEE80211_REG_SP: Standard power Access Point + * @IEEE80211_REG_VLP: Very low power Access Point + * @IEEE80211_REG_AP_POWER_AFTER_LAST: internal + * @IEEE80211_REG_AP_POWER_MAX: maximum value + */ +enum ieee80211_ap_reg_power { + IEEE80211_REG_UNSET_AP, + IEEE80211_REG_LPI_AP, + IEEE80211_REG_SP_AP, + IEEE80211_REG_VLP_AP, + IEEE80211_REG_AP_POWER_AFTER_LAST, + IEEE80211_REG_AP_POWER_MAX = + IEEE80211_REG_AP_POWER_AFTER_LAST - 1, +}; + +/** + * enum ieee80211_client_reg_power - regulatory power for a client + * + * @IEEE80211_REG_UNSET_CLIENT: Client has no regulatory power mode + * @IEEE80211_REG_DEFAULT_CLIENT: Default Client + * @IEEE80211_REG_SUBORDINATE_CLIENT: Subordinate Client + * @IEEE80211_REG_CLIENT_POWER_AFTER_LAST: internal + * @IEEE80211_REG_CLIENT_POWER_MAX: maximum value + */ +enum ieee80211_client_reg_power { + IEEE80211_REG_UNSET_CLIENT, + IEEE80211_REG_DEFAULT_CLIENT, + IEEE80211_REG_SUBORDINATE_CLIENT, + IEEE80211_REG_CLIENT_POWER_AFTER_LAST, + IEEE80211_REG_CLIENT_POWER_MAX = + IEEE80211_REG_CLIENT_POWER_AFTER_LAST - 1, +}; + /* 802.11ax HE MAC capabilities */ #define IEEE80211_HE_MAC_CAP0_HTC_HE 0x01 #define IEEE80211_HE_MAC_CAP0_TWT_REQ 0x02 diff --git a/include/net/mac80211.h b/include/net/mac80211.h index af0fc13cea34..8923a9fc4126 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -632,6 +632,7 @@ struct ieee80211_fils_discovery { * @s1g: BSS is S1G BSS (affects Association Request format). * @beacon_tx_rate: The configured beacon transmit rate that needs to be passed * to driver when rate control is offloaded to firmware. + * @power_type: power type of BSS for 6 GHz */ struct ieee80211_bss_conf { const u8 *bssid; @@ -702,6 +703,7 @@ struct ieee80211_bss_conf { u32 unsol_bcast_probe_resp_interval; bool s1g; struct cfg80211_bitrate_mask beacon_tx_rate; + enum ieee80211_ap_reg_power power_type; }; /** From cb751b7a57e50d356ec8fc7712c245a05515e787 Mon Sep 17 00:00:00 2001 From: Wen Gong Date: Fri, 24 Sep 2021 06:00:48 -0400 Subject: [PATCH 015/440] mac80211: add parse regulatory info in 6 GHz operation information This patch is to convert the regulatory info subfield in HE operation element to power type and save in struct cfg80211_chan_def. Signed-off-by: Wen Gong Link: https://lore.kernel.org/r/20210924100052.32029-3-wgong@codeaurora.org Signed-off-by: Johannes Berg --- net/mac80211/util.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/net/mac80211/util.c b/net/mac80211/util.c index ca8008ba9b1f..39fa2a50385d 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -3385,6 +3385,7 @@ bool ieee80211_chandef_he_6ghz_oper(struct ieee80211_sub_if_data *sdata, const struct ieee80211_sta_he_cap *he_cap; struct cfg80211_chan_def he_chandef = *chandef; const struct ieee80211_he_6ghz_oper *he_6ghz_oper; + struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf; bool support_80_80, support_160; u8 he_phy_cap; u32 freq; @@ -3428,6 +3429,19 @@ bool ieee80211_chandef_he_6ghz_oper(struct ieee80211_sub_if_data *sdata, NL80211_BAND_6GHZ); he_chandef.chan = ieee80211_get_channel(sdata->local->hw.wiphy, freq); + switch (u8_get_bits(he_6ghz_oper->control, + IEEE80211_HE_6GHZ_OPER_CTRL_REG_INFO)) { + case IEEE80211_6GHZ_CTRL_REG_LPI_AP: + bss_conf->power_type = IEEE80211_REG_LPI_AP; + break; + case IEEE80211_6GHZ_CTRL_REG_SP_AP: + bss_conf->power_type = IEEE80211_REG_SP_AP; + break; + default: + bss_conf->power_type = IEEE80211_REG_UNSET_AP; + break; + } + switch (u8_get_bits(he_6ghz_oper->control, IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH)) { case IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_20MHZ: From 63214f02cff9ebd57be00e143de12107c66f5394 Mon Sep 17 00:00:00 2001 From: Wen Gong Date: Fri, 24 Sep 2021 06:00:52 -0400 Subject: [PATCH 016/440] mac80211: save transmit power envelope element and power constraint This is to save the transmit power envelope element and power constraint in struct ieee80211_bss_conf for 6 GHz. Lower driver will use this info to calculate the power limit. Signed-off-by: Wen Gong Link: https://lore.kernel.org/r/20210924100052.32029-7-wgong@codeaurora.org Signed-off-by: Johannes Berg --- include/net/mac80211.h | 6 ++++++ net/mac80211/mlme.c | 29 +++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 8923a9fc4126..16a965262a4f 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -633,6 +633,9 @@ struct ieee80211_fils_discovery { * @beacon_tx_rate: The configured beacon transmit rate that needs to be passed * to driver when rate control is offloaded to firmware. * @power_type: power type of BSS for 6 GHz + * @tx_pwr_env: transmit power envelope array of BSS. + * @tx_pwr_env_num: number of @tx_pwr_env. + * @pwr_reduction: power constraint of BSS. */ struct ieee80211_bss_conf { const u8 *bssid; @@ -704,6 +707,9 @@ struct ieee80211_bss_conf { bool s1g; struct cfg80211_bitrate_mask beacon_tx_rate; enum ieee80211_ap_reg_power power_type; + struct ieee80211_tx_pwr_env tx_pwr_env[IEEE80211_TPE_MAX_IE_COUNT]; + u8 tx_pwr_env_num; + u8 pwr_reduction; }; /** diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 29388204889f..0ec183a92a01 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2258,6 +2258,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_local *local = sdata->local; + struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf; u32 changed = 0; struct ieee80211_prep_tx_info info = { .subtype = stype, @@ -2407,6 +2408,10 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, cancel_delayed_work_sync(&ifmgd->tx_tspec_wk); sdata->encrypt_headroom = IEEE80211_ENCRYPT_HEADROOM; + + bss_conf->pwr_reduction = 0; + bss_conf->tx_pwr_env_num = 0; + memset(bss_conf->tx_pwr_env, 0, sizeof(bss_conf->tx_pwr_env)); } static void ieee80211_reset_ap_probe(struct ieee80211_sub_if_data *sdata) @@ -5066,6 +5071,30 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HE)) { he_oper = elems->he_operation; + if (is_6ghz) { + struct ieee80211_bss_conf *bss_conf; + u8 i, j = 0; + + bss_conf = &sdata->vif.bss_conf; + + if (elems->pwr_constr_elem) + bss_conf->pwr_reduction = *elems->pwr_constr_elem; + + BUILD_BUG_ON(ARRAY_SIZE(bss_conf->tx_pwr_env) != + ARRAY_SIZE(elems->tx_pwr_env)); + + for (i = 0; i < elems->tx_pwr_env_num; i++) { + if (elems->tx_pwr_env_len[i] > + sizeof(bss_conf->tx_pwr_env[j])) + continue; + + bss_conf->tx_pwr_env_num++; + memcpy(&bss_conf->tx_pwr_env[j], elems->tx_pwr_env[i], + elems->tx_pwr_env_len[i]); + j++; + } + } + if (!ieee80211_verify_sta_he_mcs_support(sdata, sband, he_oper)) ifmgd->flags |= IEEE80211_STA_DISABLE_HE; } From e53e9828a8d2c6545e01ff9711f1221f2fd199ce Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 27 Sep 2021 13:11:06 +0200 Subject: [PATCH 017/440] cfg80211: always free wiphy specific regdomain In the (somewhat unlikely) event that we allocate a wiphy, then add a regdomain to it, and then fail registration, we leak the regdomain. Fix this by just always freeing it at the end, in the normal cases we'll free (and NULL) it during wiphy_unregister(). This happened when the wiphy settings were bad, and since they can be controlled by userspace with hwsim, syzbot was able to find this issue. Reported-by: syzbot+1638e7c770eef6b6c0d0@syzkaller.appspotmail.com Fixes: 3e0c3ff36c4c ("cfg80211: allow multiple driver regulatory_hints()") Signed-off-by: Johannes Berg Link: https://lore.kernel.org/r/20210927131105.68b70cef4674.I4b9f0aa08c2af28555963b9fe3d34395bb72e0cc@changeid Signed-off-by: Johannes Berg --- net/wireless/core.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/net/wireless/core.c b/net/wireless/core.c index 03323121ca50..45be124a98f1 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -1080,6 +1080,16 @@ void cfg80211_dev_free(struct cfg80211_registered_device *rdev) list_for_each_entry_safe(scan, tmp, &rdev->bss_list, list) cfg80211_put_bss(&rdev->wiphy, &scan->pub); mutex_destroy(&rdev->wiphy.mtx); + + /* + * The 'regd' can only be non-NULL if we never finished + * initializing the wiphy and thus never went through the + * unregister path - e.g. in failure scenarios. Thus, it + * cannot have been visible to anyone if non-NULL, so we + * can just free it here. + */ + kfree(rcu_dereference_raw(rdev->wiphy.regd)); + kfree(rdev); } From 05075fe7455a210769b266e62a0040ddc98b2739 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 27 Sep 2021 13:44:03 +0200 Subject: [PATCH 018/440] nl80211: don't kfree() ERR_PTR() value When parse_acl_data() fails, we get an ERR_PTR() value and then "goto out;", in this case we'd attempt to kfree() it. Fix that. Fixes: 9e263e193af7 ("nl80211: don't put struct cfg80211_ap_settings on stack") Link: https://lore.kernel.org/r/20210927134402.86c5ae06c952.Ic51e234d998b9045665e5ff8b6db7e29f25d70c0@changeid Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index d7c03f6145f0..0f728de36f33 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -5507,6 +5507,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) params->acl = parse_acl_data(&rdev->wiphy, info); if (IS_ERR(params->acl)) { err = PTR_ERR(params->acl); + params->acl = NULL; goto out; } } From dc1e3cb8da8b414b37208b2fb6755fef8122504b Mon Sep 17 00:00:00 2001 From: John Crispin Date: Wed, 15 Sep 2021 19:54:34 -0700 Subject: [PATCH 019/440] nl80211: MBSSID and EMA support in AP mode Add new attributes to configure support for multiple BSSID and advanced multi-BSSID advertisements (EMA) in AP mode. - NL80211_ATTR_MBSSID_CONFIG used for per interface configuration. - NL80211_ATTR_MBSSID_ELEMS used to MBSSID elements for beacons. Memory for the elements is allocated dynamically. This change frees the memory in existing functions which call nl80211_parse_beacon(), a comment is added to indicate the new references to do the same. Signed-off-by: John Crispin Co-developed-by: Aloka Dixit Signed-off-by: Aloka Dixit Link: https://lore.kernel.org/r/20210916025437.29138-2-alokad@codeaurora.org [don't leave ERR_PTR hanging around] Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 44 +++++++++ include/uapi/linux/nl80211.h | 76 ++++++++++++++- net/wireless/nl80211.c | 178 ++++++++++++++++++++++++++++++++++- 3 files changed, 293 insertions(+), 5 deletions(-) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 125f563d66a1..e9e313aa991f 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1056,6 +1056,36 @@ struct cfg80211_crypto_settings { enum nl80211_sae_pwe_mechanism sae_pwe; }; +/** + * struct cfg80211_mbssid_config - AP settings for multi bssid + * + * @tx_wdev: pointer to the transmitted interface in the MBSSID set + * @index: index of this AP in the multi bssid group. + * @ema: set to true if the beacons should be sent out in EMA mode. + */ +struct cfg80211_mbssid_config { + struct wireless_dev *tx_wdev; + u8 index; + bool ema; +}; + +/** + * struct cfg80211_mbssid_elems - Multiple BSSID elements + * + * @cnt: Number of elements in array %elems. + * + * @elem: Array of multiple BSSID element(s) to be added into Beacon frames. + * @elem.data: Data for multiple BSSID elements. + * @elem.len: Length of data. + */ +struct cfg80211_mbssid_elems { + u8 cnt; + struct { + const u8 *data; + size_t len; + } elem[]; +}; + /** * struct cfg80211_beacon_data - beacon data * @head: head portion of beacon (before TIM IE) @@ -1074,6 +1104,7 @@ struct cfg80211_crypto_settings { * @assocresp_ies_len: length of assocresp_ies in octets * @probe_resp_len: length of probe response template (@probe_resp) * @probe_resp: probe response template (AP mode only) + * @mbssid_ies: multiple BSSID elements * @ftm_responder: enable FTM responder functionality; -1 for no change * (which also implies no change in LCI/civic location data) * @lci: Measurement Report element content, starting with Measurement Token @@ -1091,6 +1122,7 @@ struct cfg80211_beacon_data { const u8 *probe_resp; const u8 *lci; const u8 *civicloc; + struct cfg80211_mbssid_elems *mbssid_ies; s8 ftm_responder; size_t head_len, tail_len; @@ -1205,6 +1237,7 @@ enum cfg80211_ap_settings_flags { * @he_oper: HE operation IE (or %NULL if HE isn't enabled) * @fils_discovery: FILS discovery transmission parameters * @unsol_bcast_probe_resp: Unsolicited broadcast probe response parameters + * @mbssid_config: AP settings for multiple bssid */ struct cfg80211_ap_settings { struct cfg80211_chan_def chandef; @@ -1237,6 +1270,7 @@ struct cfg80211_ap_settings { struct cfg80211_he_bss_color he_bss_color; struct cfg80211_fils_discovery fils_discovery; struct cfg80211_unsol_bcast_probe_resp unsol_bcast_probe_resp; + struct cfg80211_mbssid_config mbssid_config; }; /** @@ -5003,6 +5037,13 @@ struct wiphy_iftype_akm_suites { * %NL80211_TID_CONFIG_ATTR_RETRY_LONG attributes * @sar_capa: SAR control capabilities * @rfkill: a pointer to the rfkill structure + * + * @mbssid_max_interfaces: maximum number of interfaces supported by the driver + * in a multiple BSSID set. This field must be set to a non-zero value + * by the driver to advertise MBSSID support. + * @mbssid_max_ema_profile_periodicity: maximum profile periodicity supported by + * the driver. Setting this field to a non-zero value indicates that the + * driver supports enhanced multi-BSSID advertisements (EMA AP). */ struct wiphy { struct mutex mtx; @@ -5147,6 +5188,9 @@ struct wiphy { struct rfkill *rfkill; + u8 mbssid_max_interfaces; + u8 ema_max_profile_periodicity; + char priv[] __aligned(NETDEV_ALIGN); }; diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index e89bbf856228..eda608b1eb09 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -360,7 +360,10 @@ * @NL80211_CMD_DEL_INTERFACE: Virtual interface was deleted, has attributes * %NL80211_ATTR_IFINDEX and %NL80211_ATTR_WIPHY. Can also be sent from * userspace to request deletion of a virtual interface, then requires - * attribute %NL80211_ATTR_IFINDEX. + * attribute %NL80211_ATTR_IFINDEX. If multiple BSSID advertisements are + * enabled using %NL80211_ATTR_MBSSID_CONFIG, %NL80211_ATTR_MBSSID_ELEMS, + * and if this command is used for the transmitting interface, then all + * the non-transmitting interfaces are deleted as well. * * @NL80211_CMD_GET_KEY: Get sequence counter information for a key specified * by %NL80211_ATTR_KEY_IDX and/or %NL80211_ATTR_MAC. @@ -2624,6 +2627,18 @@ enum nl80211_commands { * @NL80211_ATTR_COLOR_CHANGE_ELEMS: Nested set of attributes containing the IE * information for the time while performing a color switch. * + * @NL80211_ATTR_MBSSID_CONFIG: Nested attribute for multiple BSSID + * advertisements (MBSSID) parameters in AP mode. + * Kernel uses this attribute to indicate the driver's support for MBSSID + * and enhanced multi-BSSID advertisements (EMA AP) to the userspace. + * Userspace should use this attribute to configure per interface MBSSID + * parameters. + * See &enum nl80211_mbssid_config_attributes for details. + * + * @NL80211_ATTR_MBSSID_ELEMS: Nested parameter to pass multiple BSSID elements. + * Mandatory parameter for the transmitting interface to enable MBSSID. + * Optional for the non-transmitting interfaces. + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -3127,6 +3142,9 @@ enum nl80211_attrs { NL80211_ATTR_COLOR_CHANGE_COLOR, NL80211_ATTR_COLOR_CHANGE_ELEMS, + NL80211_ATTR_MBSSID_CONFIG, + NL80211_ATTR_MBSSID_ELEMS, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -7386,4 +7404,60 @@ enum nl80211_sar_specs_attrs { NL80211_SAR_ATTR_SPECS_MAX = __NL80211_SAR_ATTR_SPECS_LAST - 1, }; +/** + * enum nl80211_mbssid_config_attributes - multiple BSSID (MBSSID) and enhanced + * multi-BSSID advertisements (EMA) in AP mode. + * Kernel uses some of these attributes to advertise driver's support for + * MBSSID and EMA. + * Remaining attributes should be used by the userspace to configure the + * features. + * + * @__NL80211_MBSSID_CONFIG_ATTR_INVALID: Invalid + * + * @NL80211_MBSSID_CONFIG_ATTR_MAX_INTERFACES: Used by the kernel to advertise + * the maximum number of MBSSID interfaces supported by the driver. + * Driver should indicate MBSSID support by setting + * wiphy->mbssid_max_interfaces to a value more than or equal to 2. + * + * @NL80211_MBSSID_CONFIG_ATTR_MAX_EMA_PROFILE_PERIODICITY: Used by the kernel + * to advertise the maximum profile periodicity supported by the driver + * if EMA is enabled. Driver should indicate EMA support to the userspace + * by setting wiphy->mbssid_max_ema_profile_periodicity to + * a non-zero value. + * + * @NL80211_MBSSID_CONFIG_ATTR_INDEX: Mandatory parameter to pass the index of + * this BSS (u8) in the multiple BSSID set. + * Value must be set to 0 for the transmitting interface and non-zero for + * all non-transmitting interfaces. The userspace will be responsible + * for using unique indices for the interfaces. + * Range: 0 to wiphy->mbssid_max_interfaces-1. + * + * @NL80211_MBSSID_CONFIG_ATTR_TX_IFINDEX: Mandatory parameter for + * a non-transmitted profile which provides the interface index (u32) of + * the transmitted profile. The value must match one of the interface + * indices advertised by the kernel. Optional if the interface being set up + * is the transmitting one, however, if provided then the value must match + * the interface index of the same. + * + * @NL80211_MBSSID_CONFIG_ATTR_EMA: Flag used to enable EMA AP feature. + * Setting this flag is permitted only if the driver advertises EMA support + * by setting wiphy->mbssid_max_ema_profile_periodicity to non-zero. + * + * @__NL80211_MBSSID_CONFIG_ATTR_LAST: Internal + * @NL80211_MBSSID_CONFIG_ATTR_MAX: highest attribute + */ +enum nl80211_mbssid_config_attributes { + __NL80211_MBSSID_CONFIG_ATTR_INVALID, + + NL80211_MBSSID_CONFIG_ATTR_MAX_INTERFACES, + NL80211_MBSSID_CONFIG_ATTR_MAX_EMA_PROFILE_PERIODICITY, + NL80211_MBSSID_CONFIG_ATTR_INDEX, + NL80211_MBSSID_CONFIG_ATTR_TX_IFINDEX, + NL80211_MBSSID_CONFIG_ATTR_EMA, + + /* keep last */ + __NL80211_MBSSID_CONFIG_ATTR_LAST, + NL80211_MBSSID_CONFIG_ATTR_MAX = __NL80211_MBSSID_CONFIG_ATTR_LAST - 1, +}; + #endif /* __LINUX_NL80211_H */ diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 0f728de36f33..3f37e4d5c5d2 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -437,6 +437,16 @@ sar_policy[NL80211_SAR_ATTR_MAX + 1] = { [NL80211_SAR_ATTR_SPECS] = NLA_POLICY_NESTED_ARRAY(sar_specs_policy), }; +static const struct nla_policy +nl80211_mbssid_config_policy[NL80211_MBSSID_CONFIG_ATTR_MAX + 1] = { + [NL80211_MBSSID_CONFIG_ATTR_MAX_INTERFACES] = NLA_POLICY_MIN(NLA_U8, 2), + [NL80211_MBSSID_CONFIG_ATTR_MAX_EMA_PROFILE_PERIODICITY] = + NLA_POLICY_MIN(NLA_U8, 1), + [NL80211_MBSSID_CONFIG_ATTR_INDEX] = { .type = NLA_U8 }, + [NL80211_MBSSID_CONFIG_ATTR_TX_IFINDEX] = { .type = NLA_U32 }, + [NL80211_MBSSID_CONFIG_ATTR_EMA] = { .type = NLA_FLAG }, +}; + static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [0] = { .strict_start_type = NL80211_ATTR_HE_OBSS_PD }, [NL80211_ATTR_WIPHY] = { .type = NLA_U32 }, @@ -763,6 +773,9 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_COLOR_CHANGE_COUNT] = { .type = NLA_U8 }, [NL80211_ATTR_COLOR_CHANGE_COLOR] = { .type = NLA_U8 }, [NL80211_ATTR_COLOR_CHANGE_ELEMS] = NLA_POLICY_NESTED(nl80211_policy), + [NL80211_ATTR_MBSSID_CONFIG] = + NLA_POLICY_NESTED(nl80211_mbssid_config_policy), + [NL80211_ATTR_MBSSID_ELEMS] = { .type = NLA_NESTED }, }; /* policy for the key attributes */ @@ -2207,6 +2220,35 @@ fail: return -ENOBUFS; } +static int nl80211_put_mbssid_support(struct wiphy *wiphy, struct sk_buff *msg) +{ + struct nlattr *config; + + if (!wiphy->mbssid_max_interfaces) + return 0; + + config = nla_nest_start(msg, NL80211_ATTR_MBSSID_CONFIG); + if (!config) + return -ENOBUFS; + + if (nla_put_u8(msg, NL80211_MBSSID_CONFIG_ATTR_MAX_INTERFACES, + wiphy->mbssid_max_interfaces)) + goto fail; + + if (wiphy->ema_max_profile_periodicity && + nla_put_u8(msg, + NL80211_MBSSID_CONFIG_ATTR_MAX_EMA_PROFILE_PERIODICITY, + wiphy->ema_max_profile_periodicity)) + goto fail; + + nla_nest_end(msg, config); + return 0; + +fail: + nla_nest_cancel(msg, config); + return -ENOBUFS; +} + struct nl80211_dump_wiphy_state { s64 filter_wiphy; long start; @@ -2792,6 +2834,9 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev, if (nl80211_put_sar_specs(rdev, msg)) goto nla_put_failure; + if (nl80211_put_mbssid_support(&rdev->wiphy, msg)) + goto nla_put_failure; + /* done */ state->split_start = 0; break; @@ -4981,6 +5026,96 @@ static int validate_beacon_tx_rate(struct cfg80211_registered_device *rdev, return 0; } +static int nl80211_parse_mbssid_config(struct wiphy *wiphy, + struct net_device *dev, + struct nlattr *attrs, + struct cfg80211_mbssid_config *config, + u8 num_elems) +{ + struct nlattr *tb[NL80211_MBSSID_CONFIG_ATTR_MAX + 1]; + + if (!wiphy->mbssid_max_interfaces) + return -EOPNOTSUPP; + + if (nla_parse_nested(tb, NL80211_MBSSID_CONFIG_ATTR_MAX, attrs, NULL, + NULL) || + !tb[NL80211_MBSSID_CONFIG_ATTR_INDEX]) + return -EINVAL; + + config->ema = nla_get_flag(tb[NL80211_MBSSID_CONFIG_ATTR_EMA]); + if (config->ema) { + if (!wiphy->ema_max_profile_periodicity) + return -EOPNOTSUPP; + + if (num_elems > wiphy->ema_max_profile_periodicity) + return -EINVAL; + } + + config->index = nla_get_u8(tb[NL80211_MBSSID_CONFIG_ATTR_INDEX]); + if (config->index >= wiphy->mbssid_max_interfaces || + (!config->index && !num_elems)) + return -EINVAL; + + if (tb[NL80211_MBSSID_CONFIG_ATTR_TX_IFINDEX]) { + u32 tx_ifindex = + nla_get_u32(tb[NL80211_MBSSID_CONFIG_ATTR_TX_IFINDEX]); + + if ((!config->index && tx_ifindex != dev->ifindex) || + (config->index && tx_ifindex == dev->ifindex)) + return -EINVAL; + + if (tx_ifindex != dev->ifindex) { + struct net_device *tx_netdev = + dev_get_by_index(wiphy_net(wiphy), tx_ifindex); + + if (!tx_netdev || !tx_netdev->ieee80211_ptr || + tx_netdev->ieee80211_ptr->wiphy != wiphy || + tx_netdev->ieee80211_ptr->iftype != + NL80211_IFTYPE_AP) { + dev_put(tx_netdev); + return -EINVAL; + } + + config->tx_wdev = tx_netdev->ieee80211_ptr; + } else { + config->tx_wdev = dev->ieee80211_ptr; + } + } else if (!config->index) { + config->tx_wdev = dev->ieee80211_ptr; + } else { + return -EINVAL; + } + + return 0; +} + +static struct cfg80211_mbssid_elems * +nl80211_parse_mbssid_elems(struct wiphy *wiphy, struct nlattr *attrs) +{ + struct nlattr *nl_elems; + struct cfg80211_mbssid_elems *elems; + int rem_elems; + u8 i = 0, num_elems = 0; + + if (!wiphy->mbssid_max_interfaces) + return ERR_PTR(-EINVAL); + + nla_for_each_nested(nl_elems, attrs, rem_elems) + num_elems++; + + elems = kzalloc(struct_size(elems, elem, num_elems), GFP_KERNEL); + if (!elems) + return ERR_PTR(-ENOMEM); + + nla_for_each_nested(nl_elems, attrs, rem_elems) { + elems->elem[i].data = nla_data(nl_elems); + elems->elem[i].len = nla_len(nl_elems); + i++; + } + elems->cnt = num_elems; + return elems; +} + static int nl80211_parse_beacon(struct cfg80211_registered_device *rdev, struct nlattr *attrs[], struct cfg80211_beacon_data *bcn) @@ -5061,6 +5196,17 @@ static int nl80211_parse_beacon(struct cfg80211_registered_device *rdev, bcn->ftm_responder = -1; } + if (attrs[NL80211_ATTR_MBSSID_ELEMS]) { + struct cfg80211_mbssid_elems *mbssid = + nl80211_parse_mbssid_elems(&rdev->wiphy, + attrs[NL80211_ATTR_MBSSID_ELEMS]); + + if (IS_ERR(mbssid)) + return PTR_ERR(mbssid); + + bcn->mbssid_ies = mbssid; + } + return 0; } @@ -5547,6 +5693,17 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) goto out; } + if (info->attrs[NL80211_ATTR_MBSSID_CONFIG]) { + err = nl80211_parse_mbssid_config(&rdev->wiphy, dev, + info->attrs[NL80211_ATTR_MBSSID_CONFIG], + ¶ms->mbssid_config, + params->beacon.mbssid_ies ? + params->beacon.mbssid_ies->cnt : + 0); + if (err) + goto out; + } + nl80211_calculate_ap_params(params); if (info->attrs[NL80211_ATTR_EXTERNAL_AUTH_SUPPORT]) @@ -5568,6 +5725,11 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) out: kfree(params->acl); + kfree(params->beacon.mbssid_ies); + if (params->mbssid_config.tx_wdev && + params->mbssid_config.tx_wdev->netdev && + params->mbssid_config.tx_wdev->netdev != dev) + dev_put(params->mbssid_config.tx_wdev->netdev); kfree(params); return err; @@ -5593,12 +5755,14 @@ static int nl80211_set_beacon(struct sk_buff *skb, struct genl_info *info) err = nl80211_parse_beacon(rdev, info->attrs, ¶ms); if (err) - return err; + goto out; wdev_lock(wdev); err = rdev_change_beacon(rdev, dev, ¶ms); wdev_unlock(wdev); +out: + kfree(params.mbssid_ies); return err; } @@ -9275,12 +9439,14 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info) err = nl80211_parse_beacon(rdev, info->attrs, ¶ms.beacon_after); if (err) - return err; + goto free; csa_attrs = kcalloc(NL80211_ATTR_MAX + 1, sizeof(*csa_attrs), GFP_KERNEL); - if (!csa_attrs) - return -ENOMEM; + if (!csa_attrs) { + err = -ENOMEM; + goto free; + } err = nla_parse_nested_deprecated(csa_attrs, NL80211_ATTR_MAX, info->attrs[NL80211_ATTR_CSA_IES], @@ -9398,6 +9564,8 @@ skip_beacons: wdev_unlock(wdev); free: + kfree(params.beacon_after.mbssid_ies); + kfree(params.beacon_csa.mbssid_ies); kfree(csa_attrs); return err; } @@ -14933,6 +15101,8 @@ static int nl80211_color_change(struct sk_buff *skb, struct genl_info *info) wdev_unlock(wdev); out: + kfree(params.beacon_next.mbssid_ies); + kfree(params.beacon_color_change.mbssid_ies); kfree(tb); return err; } From 171964252189d8ad5672c730f2197aa73092db6e Mon Sep 17 00:00:00 2001 From: John Crispin Date: Wed, 15 Sep 2021 19:54:35 -0700 Subject: [PATCH 020/440] mac80211: MBSSID support in interface handling Configure multiple BSSID and enhanced multi-BSSID advertisement (EMA) parameters in mac80211 for AP mode. For each interface, 'mbssid_tx_vif' points to the transmitting interface of the MBSSID set. The pointer is set to NULL if MBSSID is disabled. Function ieee80211_stop() is modified to always bring down all the non-transmitting interfaces first and the transmitting interface last. Signed-off-by: John Crispin Co-developed-by: Aloka Dixit Signed-off-by: Aloka Dixit Link: https://lore.kernel.org/r/20210916025437.29138-3-alokad@codeaurora.org [slightly change logic to be more obvious] Signed-off-by: Johannes Berg --- include/net/mac80211.h | 3 +++ net/mac80211/cfg.c | 38 ++++++++++++++++++++++++++++++++++++++ net/mac80211/iface.c | 31 ++++++++++++++++++++++++++++++- 3 files changed, 71 insertions(+), 1 deletion(-) diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 16a965262a4f..13cad5e9e6c0 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1723,6 +1723,7 @@ enum ieee80211_offload_flags { * write-protected by sdata_lock and local->mtx so holding either is fine * for read access. * @color_change_color: the bss color that will be used after the change. + * @mbssid_tx_vif: Pointer to the transmitting interface if MBSSID is enabled. */ struct ieee80211_vif { enum nl80211_iftype type; @@ -1754,6 +1755,8 @@ struct ieee80211_vif { bool color_change_active; u8 color_change_color; + struct ieee80211_vif *mbssid_tx_vif; + /* must be last */ u8 drv_priv[] __aligned(sizeof(void *)); }; diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index d69b31c20fe2..e2b791c37591 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -111,6 +111,36 @@ static int ieee80211_set_mon_options(struct ieee80211_sub_if_data *sdata, return 0; } +static int ieee80211_set_ap_mbssid_options(struct ieee80211_sub_if_data *sdata, + struct cfg80211_mbssid_config params) +{ + struct ieee80211_sub_if_data *tx_sdata; + + sdata->vif.mbssid_tx_vif = NULL; + sdata->vif.bss_conf.bssid_index = 0; + sdata->vif.bss_conf.nontransmitted = false; + sdata->vif.bss_conf.ema_ap = false; + + if (sdata->vif.type != NL80211_IFTYPE_AP || !params.tx_wdev) + return -EINVAL; + + tx_sdata = IEEE80211_WDEV_TO_SUB_IF(params.tx_wdev); + if (!tx_sdata) + return -EINVAL; + + if (tx_sdata == sdata) { + sdata->vif.mbssid_tx_vif = &sdata->vif; + } else { + sdata->vif.mbssid_tx_vif = &tx_sdata->vif; + sdata->vif.bss_conf.nontransmitted = true; + sdata->vif.bss_conf.bssid_index = params.index; + } + if (params.ema) + sdata->vif.bss_conf.ema_ap = true; + + return 0; +} + static struct wireless_dev *ieee80211_add_iface(struct wiphy *wiphy, const char *name, unsigned char name_assign_type, @@ -1105,6 +1135,14 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, changed |= BSS_CHANGED_HE_BSS_COLOR; } + if (sdata->vif.type == NL80211_IFTYPE_AP && + params->mbssid_config.tx_wdev) { + err = ieee80211_set_ap_mbssid_options(sdata, + params->mbssid_config); + if (err) + return err; + } + mutex_lock(&local->mtx); err = ieee80211_vif_use_channel(sdata, ¶ms->chandef, IEEE80211_CHANCTX_SHARED); diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 62c95597704b..691b983b762e 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -632,17 +632,46 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do ieee80211_add_virtual_monitor(local); } +static void ieee80211_stop_mbssid(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_sub_if_data *tx_sdata, *non_tx_sdata, *tmp_sdata; + struct ieee80211_vif *tx_vif = sdata->vif.mbssid_tx_vif; + + if (!tx_vif) + return; + + tx_sdata = vif_to_sdata(tx_vif); + sdata->vif.mbssid_tx_vif = NULL; + + list_for_each_entry_safe(non_tx_sdata, tmp_sdata, + &tx_sdata->local->interfaces, list) { + if (non_tx_sdata != sdata && non_tx_sdata != tx_sdata && + non_tx_sdata->vif.mbssid_tx_vif == tx_vif && + ieee80211_sdata_running(non_tx_sdata)) { + non_tx_sdata->vif.mbssid_tx_vif = NULL; + dev_close(non_tx_sdata->wdev.netdev); + } + } + + if (sdata != tx_sdata && ieee80211_sdata_running(tx_sdata)) { + tx_sdata->vif.mbssid_tx_vif = NULL; + dev_close(tx_sdata->wdev.netdev); + } +} + static int ieee80211_stop(struct net_device *dev) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - /* close all dependent VLAN interfaces before locking wiphy */ + /* close dependent VLAN and MBSSID interfaces before locking wiphy */ if (sdata->vif.type == NL80211_IFTYPE_AP) { struct ieee80211_sub_if_data *vlan, *tmpsdata; list_for_each_entry_safe(vlan, tmpsdata, &sdata->u.ap.vlans, u.vlan.list) dev_close(vlan->dev); + + ieee80211_stop_mbssid(sdata); } wiphy_lock(sdata->local->hw.wiphy); From 2232642ec3fb4aad6ae4da1e109f55a0e7f2d204 Mon Sep 17 00:00:00 2001 From: Dust Li Date: Fri, 20 Aug 2021 13:37:52 +0800 Subject: [PATCH 021/440] ipvs: add sysctl_run_estimation to support disable estimation estimation_timer will iterate the est_list to do estimation for each ipvs stats. When there are lots of services, the list can be very large. We found that estimation_timer() run for more then 200ms on a machine with 104 CPU and 50K services. yunhong-cgl jiang report the same phenomenon before: https://www.spinics.net/lists/lvs-devel/msg05426.html In some cases(for example a large K8S cluster with many ipvs services), ipvs estimation may not be needed. So adding a sysctl blob to allow users to disable this completely. Default is: 1 (enable) Cc: yunhong-cgl jiang Signed-off-by: Dust Li Acked-by: Julian Anastasov Acked-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- Documentation/networking/ipvs-sysctl.rst | 11 +++++++++++ include/net/ip_vs.h | 11 +++++++++++ net/netfilter/ipvs/ip_vs_ctl.c | 8 ++++++++ net/netfilter/ipvs/ip_vs_est.c | 5 +++++ 4 files changed, 35 insertions(+) diff --git a/Documentation/networking/ipvs-sysctl.rst b/Documentation/networking/ipvs-sysctl.rst index 2afccc63856e..95ef56d62077 100644 --- a/Documentation/networking/ipvs-sysctl.rst +++ b/Documentation/networking/ipvs-sysctl.rst @@ -300,3 +300,14 @@ sync_version - INTEGER Kernels with this sync_version entry are able to receive messages of both version 1 and version 2 of the synchronisation protocol. + +run_estimation - BOOLEAN + 0 - disabled + not 0 - enabled (default) + + If disabled, the estimation will be stop, and you can't see + any update on speed estimation data. + + You can always re-enable estimation by setting this value to 1. + But be careful, the first estimation after re-enable is not + accurate. diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 7cb5a1aace40..ff1804a0c469 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -931,6 +931,7 @@ struct netns_ipvs { int sysctl_conn_reuse_mode; int sysctl_schedule_icmp; int sysctl_ignore_tunneled; + int sysctl_run_estimation; /* ip_vs_lblc */ int sysctl_lblc_expiration; @@ -1071,6 +1072,11 @@ static inline int sysctl_cache_bypass(struct netns_ipvs *ipvs) return ipvs->sysctl_cache_bypass; } +static inline int sysctl_run_estimation(struct netns_ipvs *ipvs) +{ + return ipvs->sysctl_run_estimation; +} + #else static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs) @@ -1163,6 +1169,11 @@ static inline int sysctl_cache_bypass(struct netns_ipvs *ipvs) return 0; } +static inline int sysctl_run_estimation(struct netns_ipvs *ipvs) +{ + return 1; +} + #endif /* IPVS core functions diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index c25097092a06..cbea5a68afb5 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -2017,6 +2017,12 @@ static struct ctl_table vs_vars[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "run_estimation", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, #ifdef CONFIG_IP_VS_DEBUG { .procname = "debug_level", @@ -4090,6 +4096,8 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs) tbl[idx++].data = &ipvs->sysctl_conn_reuse_mode; tbl[idx++].data = &ipvs->sysctl_schedule_icmp; tbl[idx++].data = &ipvs->sysctl_ignore_tunneled; + ipvs->sysctl_run_estimation = 1; + tbl[idx++].data = &ipvs->sysctl_run_estimation; ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl); if (ipvs->sysctl_hdr == NULL) { diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c index 05b8112ffb37..9a1a7af6a186 100644 --- a/net/netfilter/ipvs/ip_vs_est.c +++ b/net/netfilter/ipvs/ip_vs_est.c @@ -100,6 +100,9 @@ static void estimation_timer(struct timer_list *t) u64 rate; struct netns_ipvs *ipvs = from_timer(ipvs, t, est_timer); + if (!sysctl_run_estimation(ipvs)) + goto skip; + spin_lock(&ipvs->est_lock); list_for_each_entry(e, &ipvs->est_list, list) { s = container_of(e, struct ip_vs_stats, est); @@ -131,6 +134,8 @@ static void estimation_timer(struct timer_list *t) spin_unlock(&s->lock); } spin_unlock(&ipvs->est_lock); + +skip: mod_timer(&ipvs->est_timer, jiffies + 2*HZ); } From 7b1394892de8d95748d05e3ee41e85edb4abbfa1 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 25 Sep 2021 22:40:26 +0200 Subject: [PATCH 022/440] netfilter: nft_dynset: relax superfluous check on set updates Relax this condition to make add and update commands idempotent for sets with no timeout. The eval function already checks if the set element timeout is available and updates it if the update command is used. Fixes: 22fe54d5fefc ("netfilter: nf_tables: add support for dynamic set updates") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_dynset.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index 6ba3256fa844..87f3af4645d9 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -198,17 +198,8 @@ static int nft_dynset_init(const struct nft_ctx *ctx, return -EBUSY; priv->op = ntohl(nla_get_be32(tb[NFTA_DYNSET_OP])); - switch (priv->op) { - case NFT_DYNSET_OP_ADD: - case NFT_DYNSET_OP_DELETE: - break; - case NFT_DYNSET_OP_UPDATE: - if (!(set->flags & NFT_SET_TIMEOUT)) - return -EOPNOTSUPP; - break; - default: + if (priv->op > NFT_DYNSET_OP_DELETE) return -EOPNOTSUPP; - } timeout = 0; if (tb[NFTA_DYNSET_TIMEOUT] != NULL) { From da8f606e15c773c7e6950edab3d12105dc40615d Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 7 Oct 2021 11:18:45 -0700 Subject: [PATCH 023/440] ethernet: un-export nvmem_get_mac_address() nvmem_get_mac_address() is only called from of_net.c we don't need the export. Signed-off-by: Jakub Kicinski Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- net/ethernet/eth.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index d7b8fa10fabb..182de70ac258 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -558,7 +558,6 @@ int nvmem_get_mac_address(struct device *dev, void *addrbuf) return 0; } -EXPORT_SYMBOL(nvmem_get_mac_address); static int fwnode_get_mac_addr(struct fwnode_handle *fwnode, const char *name, char *addr) From ba882580f211dbe4fee7f010c9d38dd879db83a6 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 7 Oct 2021 11:18:46 -0700 Subject: [PATCH 024/440] eth: platform: add a helper for loading netdev->dev_addr Commit 406f42fa0d3c ("net-next: When a bond have a massive amount of VLANs...") introduced a rbtree for faster Ethernet address look up. To maintain netdev->dev_addr in this tree we need to make all the writes to it got through appropriate helpers. There is a handful of drivers which pass netdev->dev_addr as the destination buffer to eth_platform_get_mac_address(). Add a helper which takes a dev pointer instead, so it can call an appropriate helper. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/linux/etherdevice.h | 1 + net/ethernet/eth.c | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index e75116f48cd1..3cf546d2ffd1 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -29,6 +29,7 @@ struct device; struct fwnode_handle; int eth_platform_get_mac_address(struct device *dev, u8 *mac_addr); +int platform_get_ethdev_address(struct device *dev, struct net_device *netdev); unsigned char *arch_get_platform_mac_address(void); int nvmem_get_mac_address(struct device *dev, void *addrbuf); int device_get_mac_address(struct device *dev, char *addr); diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 182de70ac258..c7d9e08107cb 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -523,6 +523,26 @@ int eth_platform_get_mac_address(struct device *dev, u8 *mac_addr) } EXPORT_SYMBOL(eth_platform_get_mac_address); +/** + * platform_get_ethdev_address - Set netdev's MAC address from a given device + * @dev: Pointer to the device + * @netdev: Pointer to netdev to write the address to + * + * Wrapper around eth_platform_get_mac_address() which writes the address + * directly to netdev->dev_addr. + */ +int platform_get_ethdev_address(struct device *dev, struct net_device *netdev) +{ + u8 addr[ETH_ALEN] __aligned(2); + int ret; + + ret = eth_platform_get_mac_address(dev, addr); + if (!ret) + eth_hw_addr_set(netdev, addr); + return ret; +} +EXPORT_SYMBOL(platform_get_ethdev_address); + /** * nvmem_get_mac_address - Obtain the MAC address from an nvmem cell named * 'mac-address' associated with given device. From 4d04cdc5ee4961022b1bdc76b76132787b8c9e5b Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 7 Oct 2021 11:18:47 -0700 Subject: [PATCH 025/440] ethernet: use platform_get_ethdev_address() Use the new platform_get_ethdev_address() helper for the cases where dev->dev_addr is passed in directly as the destination. @@ expression dev, net; @@ - eth_platform_get_mac_address(dev, net->dev_addr) + platform_get_ethdev_address(dev, net) Signed-off-by: Jakub Kicinski Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/ethernet/actions/owl-emac.c | 2 +- drivers/net/ethernet/mediatek/mtk_star_emac.c | 2 +- drivers/net/usb/smsc75xx.c | 3 +-- drivers/net/usb/smsc95xx.c | 3 +-- 4 files changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/actions/owl-emac.c b/drivers/net/ethernet/actions/owl-emac.c index 2c25ff3623cd..dce93acd1644 100644 --- a/drivers/net/ethernet/actions/owl-emac.c +++ b/drivers/net/ethernet/actions/owl-emac.c @@ -1385,7 +1385,7 @@ static void owl_emac_get_mac_addr(struct net_device *netdev) struct device *dev = netdev->dev.parent; int ret; - ret = eth_platform_get_mac_address(dev, netdev->dev_addr); + ret = platform_get_ethdev_address(dev, netdev); if (!ret && is_valid_ether_addr(netdev->dev_addr)) return; diff --git a/drivers/net/ethernet/mediatek/mtk_star_emac.c b/drivers/net/ethernet/mediatek/mtk_star_emac.c index 1d5dd2015453..e2ebfd8115a0 100644 --- a/drivers/net/ethernet/mediatek/mtk_star_emac.c +++ b/drivers/net/ethernet/mediatek/mtk_star_emac.c @@ -1544,7 +1544,7 @@ static int mtk_star_probe(struct platform_device *pdev) if (ret) return ret; - ret = eth_platform_get_mac_address(dev, ndev->dev_addr); + ret = platform_get_ethdev_address(dev, ndev); if (ret || !is_valid_ether_addr(ndev->dev_addr)) eth_hw_addr_random(ndev); diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c index 76f7af161313..3b6987bb4fbe 100644 --- a/drivers/net/usb/smsc75xx.c +++ b/drivers/net/usb/smsc75xx.c @@ -758,8 +758,7 @@ static int smsc75xx_ioctl(struct net_device *netdev, struct ifreq *rq, int cmd) static void smsc75xx_init_mac_address(struct usbnet *dev) { /* maybe the boot loader passed the MAC address in devicetree */ - if (!eth_platform_get_mac_address(&dev->udev->dev, - dev->net->dev_addr)) { + if (!platform_get_ethdev_address(&dev->udev->dev, dev->net)) { if (is_valid_ether_addr(dev->net->dev_addr)) { /* device tree values are valid so use them */ netif_dbg(dev, ifup, dev->net, "MAC address read from the device tree\n"); diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c index 26b1bd8e845b..21a42a6527dc 100644 --- a/drivers/net/usb/smsc95xx.c +++ b/drivers/net/usb/smsc95xx.c @@ -756,8 +756,7 @@ static int smsc95xx_ioctl(struct net_device *netdev, struct ifreq *rq, int cmd) static void smsc95xx_init_mac_address(struct usbnet *dev) { /* maybe the boot loader passed the MAC address in devicetree */ - if (!eth_platform_get_mac_address(&dev->udev->dev, - dev->net->dev_addr)) { + if (!platform_get_ethdev_address(&dev->udev->dev, dev->net)) { if (is_valid_ether_addr(dev->net->dev_addr)) { /* device tree values are valid so use them */ netif_dbg(dev, ifup, dev->net, "MAC address read from the device tree\n"); From 339e75f6b9a076b3a9c01ab4bdd4f58975098445 Mon Sep 17 00:00:00 2001 From: Yang Li Date: Fri, 8 Oct 2021 14:21:17 +0800 Subject: [PATCH 026/440] net: dsa: rtl8366rb: remove unneeded semicolon Eliminate the following coccicheck warning: ./drivers/net/dsa/rtl8366rb.c:1348:2-3: Unneeded semicolon Reported-by: Abaci Robot Signed-off-by: Yang Li Signed-off-by: David S. Miller --- drivers/net/dsa/rtl8366rb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/rtl8366rb.c b/drivers/net/dsa/rtl8366rb.c index d2370cda4be0..03deacd83e61 100644 --- a/drivers/net/dsa/rtl8366rb.c +++ b/drivers/net/dsa/rtl8366rb.c @@ -1345,7 +1345,7 @@ rtl8366rb_port_stp_state_set(struct dsa_switch *ds, int port, u8 state) default: dev_err(smi->dev, "unknown bridge state requested\n"); return; - }; + } /* Set the same status for the port on all the FIDs */ for (i = 0; i < RTL8366RB_NUM_FIDS; i++) { From d5ac07dfbd2b7f05d8ff43924dee2f09675dacb3 Mon Sep 17 00:00:00 2001 From: Tim Gardner Date: Thu, 7 Oct 2021 06:04:13 -0600 Subject: [PATCH 027/440] qed: Initialize debug string array Coverity complains of an uninitialized variable. CID 120847 (#1 of 1): Uninitialized scalar variable (UNINIT) 3. uninit_use_in_call: Using uninitialized value *sw_platform_str when calling qed_dump_str_param. [show details] 1344 offset += qed_dump_str_param(dump_buf + offset, 1345 dump, "sw-platform", sw_platform_str); Fix this by removing dead code that references sw_platform_str. Fixes: 6c95dd8f0aa1d ("qed: Update debug related changes") Cc: Ariel Elior Cc: GR-everest-linux-l2@marvell.com Cc: "David S. Miller" Cc: Jakub Kicinski Cc: Shai Malin Cc: Omkar Kulkarni Cc: Prabhakar Kushwaha Cc: netdev@vger.kernel.org Cc: linux-kernel@vger.kernel.org (open list) Signed-off-by: Tim Gardner Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_debug.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_debug.c b/drivers/net/ethernet/qlogic/qed/qed_debug.c index 6d693ee380f1..f6198b9a1b02 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_debug.c +++ b/drivers/net/ethernet/qlogic/qed/qed_debug.c @@ -1315,7 +1315,6 @@ static u32 qed_dump_common_global_params(struct qed_hwfn *p_hwfn, u8 num_specific_global_params) { struct dbg_tools_data *dev_data = &p_hwfn->dbg_info; - char sw_platform_str[MAX_SW_PLTAFORM_STR_SIZE]; u32 offset = 0; u8 num_params; @@ -1341,8 +1340,6 @@ static u32 qed_dump_common_global_params(struct qed_hwfn *p_hwfn, dump, "platform", s_hw_type_defs[dev_data->hw_type].name); - offset += qed_dump_str_param(dump_buf + offset, - dump, "sw-platform", sw_platform_str); offset += qed_dump_num_param(dump_buf + offset, dump, "pci-func", p_hwfn->abs_pf_id); offset += qed_dump_num_param(dump_buf + offset, From 454d3e1ae057a1e09a15905b06b860f60d6c14d0 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Thu, 7 Oct 2021 15:05:02 +0200 Subject: [PATCH 028/440] net/sched: sch_ets: properly init all active DRR list handles leaf classes of ETS qdiscs are served in strict priority or deficit round robin (DRR), depending on the value of 'nstrict'. Since this value can be changed while traffic is running, we need to be sure that the active list of DRR classes can be updated at any time, so: 1) call INIT_LIST_HEAD(&alist) on all leaf classes in .init(), before the first packet hits any of them. 2) ensure that 'alist' is not overwritten with zeros when a leaf class is no more strict priority nor DRR (i.e. array elements beyond 'nbands'). Link: https://lore.kernel.org/netdev/YS%2FoZ+f0Nr8eQkzH@dcaratti.users.ipa.redhat.com Suggested-by: Cong Wang Signed-off-by: Davide Caratti Signed-off-by: David S. Miller --- net/sched/sch_ets.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c index 1f857ffd1ac2..ed86b7021f6d 100644 --- a/net/sched/sch_ets.c +++ b/net/sched/sch_ets.c @@ -661,7 +661,6 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt, q->nbands = nbands; for (i = nstrict; i < q->nstrict; i++) { - INIT_LIST_HEAD(&q->classes[i].alist); if (q->classes[i].qdisc->q.qlen) { list_add_tail(&q->classes[i].alist, &q->active); q->classes[i].deficit = quanta[i]; @@ -687,7 +686,11 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt, ets_offload_change(sch); for (i = q->nbands; i < oldbands; i++) { qdisc_put(q->classes[i].qdisc); - memset(&q->classes[i], 0, sizeof(q->classes[i])); + q->classes[i].qdisc = NULL; + q->classes[i].quantum = 0; + q->classes[i].deficit = 0; + memset(&q->classes[i].bstats, 0, sizeof(q->classes[i].bstats)); + memset(&q->classes[i].qstats, 0, sizeof(q->classes[i].qstats)); } return 0; } @@ -696,7 +699,7 @@ static int ets_qdisc_init(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { struct ets_sched *q = qdisc_priv(sch); - int err; + int err, i; if (!opt) return -EINVAL; @@ -706,6 +709,9 @@ static int ets_qdisc_init(struct Qdisc *sch, struct nlattr *opt, return err; INIT_LIST_HEAD(&q->active); + for (i = 0; i < TCQ_ETS_MAX_BANDS; i++) + INIT_LIST_HEAD(&q->classes[i].alist); + return ets_qdisc_change(sch, opt, extack); } From 94114d90037fe730bab5ea76d388bc294034fa39 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 7 Oct 2021 14:23:27 +0100 Subject: [PATCH 029/440] net: mdio: ensure the type of mdio devices match mdio drivers On the MDIO bus, we have PHYLIB devices and drivers, and we have non- PHYLIB devices and drivers. PHYLIB devices are MDIO devices that are wrapped with a struct phy_device. Trying to bind a MDIO device with a PHYLIB driver results in out-of- bounds accesses as we attempt to access struct phy_device members. So, let's prevent this by ensuring that the type of the MDIO device (indicated by the MDIO_DEVICE_FLAG_PHY flag) matches the type of the MDIO driver (indicated by the MDIO_DEVICE_IS_PHY flag.) Link: https://lore.kernel.org/r/2b1dc053-8c9a-e3e4-b450-eecdfca3fe16@gmail.com Tested-by: Andrew Lunn Reviewed-by: Andrew Lunn Signed-off-by: Russell King (Oracle) Signed-off-by: David S. Miller --- drivers/net/phy/mdio_bus.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index daf8356b54f3..c204067f1890 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -971,8 +971,14 @@ EXPORT_SYMBOL_GPL(mdiobus_modify_changed); */ static int mdio_bus_match(struct device *dev, struct device_driver *drv) { + struct mdio_driver *mdiodrv = to_mdio_driver(drv); struct mdio_device *mdio = to_mdio_device(dev); + /* Both the driver and device must type-match */ + if (!(mdiodrv->mdiodrv.flags & MDIO_DEVICE_IS_PHY) != + !(mdio->flags & MDIO_DEVICE_FLAG_PHY)) + return 0; + if (of_driver_match_device(dev, drv)) return 1; From 2b12d51c4fa8dab6a879681cbf52f30e667c928c Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 7 Oct 2021 14:23:32 +0100 Subject: [PATCH 030/440] net: phylib: ensure phy device drivers do not match by DT PHYLIB device drivers must match by either numerical PHY ID or by their .match_phy_device method. Matching by DT is not permitted. Link: https://lore.kernel.org/r/2b1dc053-8c9a-e3e4-b450-eecdfca3fe16@gmail.com Tested-by: Andrew Lunn Reviewed-by: Andrew Lunn Signed-off-by: Russell King (Oracle) Signed-off-by: David S. Miller --- drivers/net/phy/phy_device.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index ba5ad86ec826..ba295b8ad8fe 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -3146,6 +3146,16 @@ int phy_driver_register(struct phy_driver *new_driver, struct module *owner) return -EINVAL; } + /* PHYLIB device drivers must not match using a DT compatible table + * as this bypasses our checks that the mdiodev that is being matched + * is backed by a struct phy_device. If such a case happens, we will + * make out-of-bounds accesses and lockup in phydev->lock. + */ + if (WARN(new_driver->mdiodrv.driver.of_match_table, + "%s: driver must not provide a DT match table\n", + new_driver->name)) + return -EINVAL; + new_driver->mdiodrv.flags |= MDIO_DEVICE_IS_PHY; new_driver->mdiodrv.driver.name = new_driver->name; new_driver->mdiodrv.driver.bus = &mdio_bus_type; From 146e5e733310379f51924111068f08a3af0db830 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Thu, 7 Oct 2021 16:00:51 +0200 Subject: [PATCH 031/440] net-sysfs: try not to restart the syscall if it will fail eventually Due to deadlocks in the networking subsystem spotted 12 years ago[1], a workaround was put in place[2] to avoid taking the rtnl lock when it was not available and restarting the syscall (back to VFS, letting userspace spin). The following construction is found a lot in the net sysfs and sysctl code: if (!rtnl_trylock()) return restart_syscall(); This can be problematic when multiple userspace threads use such interfaces in a short period, making them to spin a lot. This happens for example when adding and moving virtual interfaces: userspace programs listening on events, such as systemd-udevd and NetworkManager, do trigger actions reading files in sysfs. It gets worse when a lot of virtual interfaces are created concurrently, say when creating containers at boot time. Returning early without hitting the above pattern when the syscall will fail eventually does make things better. While it is not a fix for the issue, it does ease things. [1] https://lore.kernel.org/netdev/49A4D5D5.5090602@trash.net/ https://lore.kernel.org/netdev/m14oyhis31.fsf@fess.ebiederm.org/ and https://lore.kernel.org/netdev/20090226084924.16cb3e08@nehalam/ [2] Rightfully, those deadlocks are *hard* to solve. Signed-off-by: Antoine Tenart Reviewed-by: Paolo Abeni Signed-off-by: David S. Miller --- net/core/net-sysfs.c | 55 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index ae001c2ca2af..d6e4e0b43beb 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -175,6 +175,14 @@ static int change_carrier(struct net_device *dev, unsigned long new_carrier) static ssize_t carrier_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { + struct net_device *netdev = to_net_dev(dev); + + /* The check is also done in change_carrier; this helps returning early + * without hitting the trylock/restart in netdev_store. + */ + if (!netdev->netdev_ops->ndo_change_carrier) + return -EOPNOTSUPP; + return netdev_store(dev, attr, buf, len, change_carrier); } @@ -196,6 +204,12 @@ static ssize_t speed_show(struct device *dev, struct net_device *netdev = to_net_dev(dev); int ret = -EINVAL; + /* The check is also done in __ethtool_get_link_ksettings; this helps + * returning early without hitting the trylock/restart below. + */ + if (!netdev->ethtool_ops->get_link_ksettings) + return ret; + if (!rtnl_trylock()) return restart_syscall(); @@ -216,6 +230,12 @@ static ssize_t duplex_show(struct device *dev, struct net_device *netdev = to_net_dev(dev); int ret = -EINVAL; + /* The check is also done in __ethtool_get_link_ksettings; this helps + * returning early without hitting the trylock/restart below. + */ + if (!netdev->ethtool_ops->get_link_ksettings) + return ret; + if (!rtnl_trylock()) return restart_syscall(); @@ -468,6 +488,14 @@ static ssize_t proto_down_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { + struct net_device *netdev = to_net_dev(dev); + + /* The check is also done in change_proto_down; this helps returning + * early without hitting the trylock/restart in netdev_store. + */ + if (!netdev->netdev_ops->ndo_change_proto_down) + return -EOPNOTSUPP; + return netdev_store(dev, attr, buf, len, change_proto_down); } NETDEVICE_SHOW_RW(proto_down, fmt_dec); @@ -478,6 +506,12 @@ static ssize_t phys_port_id_show(struct device *dev, struct net_device *netdev = to_net_dev(dev); ssize_t ret = -EINVAL; + /* The check is also done in dev_get_phys_port_id; this helps returning + * early without hitting the trylock/restart below. + */ + if (!netdev->netdev_ops->ndo_get_phys_port_id) + return -EOPNOTSUPP; + if (!rtnl_trylock()) return restart_syscall(); @@ -500,6 +534,13 @@ static ssize_t phys_port_name_show(struct device *dev, struct net_device *netdev = to_net_dev(dev); ssize_t ret = -EINVAL; + /* The checks are also done in dev_get_phys_port_name; this helps + * returning early without hitting the trylock/restart below. + */ + if (!netdev->netdev_ops->ndo_get_phys_port_name && + !netdev->netdev_ops->ndo_get_devlink_port) + return -EOPNOTSUPP; + if (!rtnl_trylock()) return restart_syscall(); @@ -522,6 +563,14 @@ static ssize_t phys_switch_id_show(struct device *dev, struct net_device *netdev = to_net_dev(dev); ssize_t ret = -EINVAL; + /* The checks are also done in dev_get_phys_port_name; this helps + * returning early without hitting the trylock/restart below. This works + * because recurse is false when calling dev_get_port_parent_id. + */ + if (!netdev->netdev_ops->ndo_get_port_parent_id && + !netdev->netdev_ops->ndo_get_devlink_port) + return -EOPNOTSUPP; + if (!rtnl_trylock()) return restart_syscall(); @@ -1226,6 +1275,12 @@ static ssize_t tx_maxrate_store(struct netdev_queue *queue, if (!capable(CAP_NET_ADMIN)) return -EPERM; + /* The check is also done later; this helps returning early without + * hitting the trylock/restart below. + */ + if (!dev->netdev_ops->ndo_set_tx_maxrate) + return -EOPNOTSUPP; + err = kstrtou32(buf, 10, &rate); if (err < 0) return err; From 16bdce2ada5a4c3c91b7c4e81780d2de50bd6ab5 Mon Sep 17 00:00:00 2001 From: Kalle Valo Date: Fri, 8 Oct 2021 17:39:32 +0300 Subject: [PATCH 032/440] ath11k: fix m68k and xtensa build failure in ath11k_peer_assoc_h_smps() Stephen reported that ath11k was failing to build on m68k and xtensa: In file included from :0:0: In function 'ath11k_peer_assoc_h_smps', inlined from 'ath11k_peer_assoc_prepare' at drivers/net/wireless/ath/ath11k/mac.c:2362:2: include/linux/compiler_types.h:317:38: error: call to '__compiletime_assert_650' declared with attribute error: FIELD_GET: type of reg too small for mask _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) ^ include/linux/compiler_types.h:298:4: note: in definition of macro '__compiletime_assert' prefix ## suffix(); \ ^ include/linux/compiler_types.h:317:2: note: in expansion of macro '_compiletime_assert' _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) ^ include/linux/build_bug.h:39:37: note: in expansion of macro 'compiletime_assert' #define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg) ^ include/linux/bitfield.h:52:3: note: in expansion of macro 'BUILD_BUG_ON_MSG' BUILD_BUG_ON_MSG((_mask) > (typeof(_reg))~0ull, \ ^ include/linux/bitfield.h:108:3: note: in expansion of macro '__BF_FIELD_CHECK' __BF_FIELD_CHECK(_mask, _reg, 0U, "FIELD_GET: "); \ ^ drivers/net/wireless/ath/ath11k/mac.c:2079:10: note: in expansion of macro 'FIELD_GET' smps = FIELD_GET(IEEE80211_HE_6GHZ_CAP_SM_PS, Fix the issue by using le16_get_bits() to specify the size explicitly. Fixes: 6f4d70308e5e ("ath11k: support SMPS configuration for 6 GHz") Reported-by: Stephen Rothwell Tested-by: Geert Uytterhoeven Signed-off-by: Kalle Valo Signed-off-by: David S. Miller --- drivers/net/wireless/ath/ath11k/mac.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index 31f0cfba5bf5..da850f4b2919 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -2076,8 +2076,8 @@ static void ath11k_peer_assoc_h_smps(struct ieee80211_sta *sta, smps = ht_cap->cap & IEEE80211_HT_CAP_SM_PS; smps >>= IEEE80211_HT_CAP_SM_PS_SHIFT; } else { - smps = FIELD_GET(IEEE80211_HE_6GHZ_CAP_SM_PS, - le16_to_cpu(sta->he_6ghz_capa.capa)); + smps = le16_get_bits(sta->he_6ghz_capa.capa, + IEEE80211_HE_6GHZ_CAP_SM_PS); } switch (smps) { From 685c3f2fba298458865ade3bb310c5ddddef87f0 Mon Sep 17 00:00:00 2001 From: Richard Palethorpe Date: Fri, 8 Oct 2021 11:00:52 +0100 Subject: [PATCH 033/440] vsock: Refactor vsock_*_getsockopt to resemble sock_getsockopt In preparation for sharing the implementation of sock_get_timeout. Signed-off-by: Richard Palethorpe Cc: Richard Palethorpe Reviewed-by: Arnd Bergmann Signed-off-by: David S. Miller --- net/vmw_vsock/af_vsock.c | 65 +++++++++++++++++----------------------- 1 file changed, 28 insertions(+), 37 deletions(-) diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index e2c0cfb334d2..9d0de37b9ec0 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1648,68 +1648,59 @@ static int vsock_connectible_getsockopt(struct socket *sock, char __user *optval, int __user *optlen) { - int err; + struct sock *sk = sock->sk; + struct vsock_sock *vsk = vsock_sk(sk); + + union { + u64 val64; + struct __kernel_old_timeval tm; + } v; + + int lv = sizeof(v.val64); int len; - struct sock *sk; - struct vsock_sock *vsk; - u64 val; if (level != AF_VSOCK) return -ENOPROTOOPT; - err = get_user(len, optlen); - if (err != 0) - return err; + if (get_user(len, optlen)) + return -EFAULT; -#define COPY_OUT(_v) \ - do { \ - if (len < sizeof(_v)) \ - return -EINVAL; \ - \ - len = sizeof(_v); \ - if (copy_to_user(optval, &_v, len) != 0) \ - return -EFAULT; \ - \ - } while (0) - - err = 0; - sk = sock->sk; - vsk = vsock_sk(sk); + memset(&v, 0, sizeof(v)); switch (optname) { case SO_VM_SOCKETS_BUFFER_SIZE: - val = vsk->buffer_size; - COPY_OUT(val); + v.val64 = vsk->buffer_size; break; case SO_VM_SOCKETS_BUFFER_MAX_SIZE: - val = vsk->buffer_max_size; - COPY_OUT(val); + v.val64 = vsk->buffer_max_size; break; case SO_VM_SOCKETS_BUFFER_MIN_SIZE: - val = vsk->buffer_min_size; - COPY_OUT(val); + v.val64 = vsk->buffer_min_size; break; - case SO_VM_SOCKETS_CONNECT_TIMEOUT: { - struct __kernel_old_timeval tv; - tv.tv_sec = vsk->connect_timeout / HZ; - tv.tv_usec = + case SO_VM_SOCKETS_CONNECT_TIMEOUT: + lv = sizeof(v.tm); + v.tm.tv_sec = vsk->connect_timeout / HZ; + v.tm.tv_usec = (vsk->connect_timeout - - tv.tv_sec * HZ) * (1000000 / HZ); - COPY_OUT(tv); + v.tm.tv_sec * HZ) * (1000000 / HZ); break; - } + default: return -ENOPROTOOPT; } - err = put_user(len, optlen); - if (err != 0) + if (len < lv) + return -EINVAL; + if (len > lv) + len = lv; + if (copy_to_user(optval, &v, len)) return -EFAULT; -#undef COPY_OUT + if (put_user(len, optlen)) + return -EFAULT; return 0; } From 4c1e34c0dbffb17accdfe16ac97ab432df9024ff Mon Sep 17 00:00:00 2001 From: Richard Palethorpe Date: Fri, 8 Oct 2021 11:00:53 +0100 Subject: [PATCH 034/440] vsock: Enable y2038 safe timeval for timeout Reuse the timeval compat code from core/sock to handle 32-bit and 64-bit timeval structures. Also introduce a new socket option define to allow using y2038 safe timeval under 32-bit. The existing behavior of sock_set_timeout and vsock's timeout setter differ when the time value is out of bounds. vsocks current behavior is retained at the expense of not being able to share the full implementation. This allows the LTP test vsock01 to pass under 32-bit compat mode. Fixes: fe0c72f3db11 ("socket: move compat timeout handling into sock.c") Signed-off-by: Richard Palethorpe Cc: Richard Palethorpe Reviewed-by: Arnd Bergmann Signed-off-by: David S. Miller --- include/net/sock.h | 4 ++++ include/uapi/linux/vm_sockets.h | 13 +++++++++++- net/core/sock.c | 35 ++++++++++++++++++++++----------- net/vmw_vsock/af_vsock.c | 25 +++++++++++++---------- 4 files changed, 55 insertions(+), 22 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index caaec7c55e8e..d08ab55fa4a0 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2836,4 +2836,8 @@ void sock_set_sndtimeo(struct sock *sk, s64 secs); int sock_bind_add(struct sock *sk, struct sockaddr *addr, int addr_len); +int sock_get_timeout(long timeo, void *optval, bool old_timeval); +int sock_copy_user_timeval(struct __kernel_sock_timeval *tv, + sockptr_t optval, int optlen, bool old_timeval); + #endif /* _SOCK_H */ diff --git a/include/uapi/linux/vm_sockets.h b/include/uapi/linux/vm_sockets.h index 46918a1852d7..c60ca33eac59 100644 --- a/include/uapi/linux/vm_sockets.h +++ b/include/uapi/linux/vm_sockets.h @@ -64,7 +64,7 @@ * timeout for a STREAM socket. */ -#define SO_VM_SOCKETS_CONNECT_TIMEOUT 6 +#define SO_VM_SOCKETS_CONNECT_TIMEOUT_OLD 6 /* Option name for using non-blocking send/receive. Use as the option name * for setsockopt(3) or getsockopt(3) to set or get the non-blocking @@ -81,6 +81,17 @@ #define SO_VM_SOCKETS_NONBLOCK_TXRX 7 +#define SO_VM_SOCKETS_CONNECT_TIMEOUT_NEW 8 + +#if !defined(__KERNEL__) +#if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__)) +#define SO_VM_SOCKETS_CONNECT_TIMEOUT SO_VM_SOCKETS_CONNECT_TIMEOUT_OLD +#else +#define SO_VM_SOCKETS_CONNECT_TIMEOUT \ + (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_VM_SOCKETS_CONNECT_TIMEOUT_OLD : SO_VM_SOCKETS_CONNECT_TIMEOUT_NEW) +#endif +#endif + /* The vSocket equivalent of INADDR_ANY. This works for the svm_cid field of * sockaddr_vm and indicates the context ID of the current endpoint. */ diff --git a/net/core/sock.c b/net/core/sock.c index beda31764df9..9862eefce21e 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -350,7 +350,7 @@ void sk_error_report(struct sock *sk) } EXPORT_SYMBOL(sk_error_report); -static int sock_get_timeout(long timeo, void *optval, bool old_timeval) +int sock_get_timeout(long timeo, void *optval, bool old_timeval) { struct __kernel_sock_timeval tv; @@ -379,12 +379,11 @@ static int sock_get_timeout(long timeo, void *optval, bool old_timeval) *(struct __kernel_sock_timeval *)optval = tv; return sizeof(tv); } +EXPORT_SYMBOL(sock_get_timeout); -static int sock_set_timeout(long *timeo_p, sockptr_t optval, int optlen, - bool old_timeval) +int sock_copy_user_timeval(struct __kernel_sock_timeval *tv, + sockptr_t optval, int optlen, bool old_timeval) { - struct __kernel_sock_timeval tv; - if (old_timeval && in_compat_syscall() && !COMPAT_USE_64BIT_TIME) { struct old_timeval32 tv32; @@ -393,8 +392,8 @@ static int sock_set_timeout(long *timeo_p, sockptr_t optval, int optlen, if (copy_from_sockptr(&tv32, optval, sizeof(tv32))) return -EFAULT; - tv.tv_sec = tv32.tv_sec; - tv.tv_usec = tv32.tv_usec; + tv->tv_sec = tv32.tv_sec; + tv->tv_usec = tv32.tv_usec; } else if (old_timeval) { struct __kernel_old_timeval old_tv; @@ -402,14 +401,28 @@ static int sock_set_timeout(long *timeo_p, sockptr_t optval, int optlen, return -EINVAL; if (copy_from_sockptr(&old_tv, optval, sizeof(old_tv))) return -EFAULT; - tv.tv_sec = old_tv.tv_sec; - tv.tv_usec = old_tv.tv_usec; + tv->tv_sec = old_tv.tv_sec; + tv->tv_usec = old_tv.tv_usec; } else { - if (optlen < sizeof(tv)) + if (optlen < sizeof(*tv)) return -EINVAL; - if (copy_from_sockptr(&tv, optval, sizeof(tv))) + if (copy_from_sockptr(tv, optval, sizeof(*tv))) return -EFAULT; } + + return 0; +} +EXPORT_SYMBOL(sock_copy_user_timeval); + +static int sock_set_timeout(long *timeo_p, sockptr_t optval, int optlen, + bool old_timeval) +{ + struct __kernel_sock_timeval tv; + int err = sock_copy_user_timeval(&tv, optval, optlen, old_timeval); + + if (err) + return err; + if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC) return -EDOM; diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 9d0de37b9ec0..7d851eb3a683 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1614,13 +1614,18 @@ static int vsock_connectible_setsockopt(struct socket *sock, vsock_update_buffer_size(vsk, transport, vsk->buffer_size); break; - case SO_VM_SOCKETS_CONNECT_TIMEOUT: { - struct __kernel_old_timeval tv; - COPY_IN(tv); + case SO_VM_SOCKETS_CONNECT_TIMEOUT_NEW: + case SO_VM_SOCKETS_CONNECT_TIMEOUT_OLD: { + struct __kernel_sock_timeval tv; + + err = sock_copy_user_timeval(&tv, optval, optlen, + optname == SO_VM_SOCKETS_CONNECT_TIMEOUT_OLD); + if (err) + break; if (tv.tv_sec >= 0 && tv.tv_usec < USEC_PER_SEC && tv.tv_sec < (MAX_SCHEDULE_TIMEOUT / HZ - 1)) { vsk->connect_timeout = tv.tv_sec * HZ + - DIV_ROUND_UP(tv.tv_usec, (1000000 / HZ)); + DIV_ROUND_UP((unsigned long)tv.tv_usec, (USEC_PER_SEC / HZ)); if (vsk->connect_timeout == 0) vsk->connect_timeout = VSOCK_DEFAULT_CONNECT_TIMEOUT; @@ -1653,7 +1658,9 @@ static int vsock_connectible_getsockopt(struct socket *sock, union { u64 val64; + struct old_timeval32 tm32; struct __kernel_old_timeval tm; + struct __kernel_sock_timeval stm; } v; int lv = sizeof(v.val64); @@ -1680,12 +1687,10 @@ static int vsock_connectible_getsockopt(struct socket *sock, v.val64 = vsk->buffer_min_size; break; - case SO_VM_SOCKETS_CONNECT_TIMEOUT: - lv = sizeof(v.tm); - v.tm.tv_sec = vsk->connect_timeout / HZ; - v.tm.tv_usec = - (vsk->connect_timeout - - v.tm.tv_sec * HZ) * (1000000 / HZ); + case SO_VM_SOCKETS_CONNECT_TIMEOUT_NEW: + case SO_VM_SOCKETS_CONNECT_TIMEOUT_OLD: + lv = sock_get_timeout(vsk->connect_timeout, &v, + optname == SO_VM_SOCKETS_CONNECT_TIMEOUT_OLD); break; default: From 45d45e5323a9dab88ab1b50264d1e19242935176 Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Fri, 8 Oct 2021 16:12:34 +0300 Subject: [PATCH 035/440] testing: selftests: forwarding.config.sample: Add tc flag Add TC_FLAG value to tests topology. This flag supposed to be skip_sw/skip_hw which means do not filter by software/hardware. This can be useful for adding tests to forwarding directory, and be able to verify that packets go through the hardware. When the flag is not set or set to 'skip_hw', tests can still be executed with veth pairs. Signed-off-by: Amit Cohen Reviewed-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../testing/selftests/net/forwarding/forwarding.config.sample | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/testing/selftests/net/forwarding/forwarding.config.sample b/tools/testing/selftests/net/forwarding/forwarding.config.sample index b802c14d2950..10ce3720aa6f 100644 --- a/tools/testing/selftests/net/forwarding/forwarding.config.sample +++ b/tools/testing/selftests/net/forwarding/forwarding.config.sample @@ -39,3 +39,6 @@ NETIF_CREATE=yes # Timeout (in seconds) before ping exits regardless of how many packets have # been sent or received PING_TIMEOUT=5 +# Flag for tc match, supposed to be skip_sw/skip_hw which means do not process +# filter by software/hardware +TC_FLAG=skip_hw From c08d227290f618ada4028f0303743263cc026dd9 Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Fri, 8 Oct 2021 16:12:35 +0300 Subject: [PATCH 036/440] testing: selftests: tc_common: Add tc_check_at_least_x_packets() Add function that checks that at least X packets hit the tc rule. There are cases that it is not possible to catch only the interesting packets, so then, it is possible to send many packets and verify that at least this amount of packets hit the rule. This function will be used in the next patch for general tc rule that can be used to test both software and hardware. Signed-off-by: Amit Cohen Reviewed-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- tools/testing/selftests/net/forwarding/tc_common.sh | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tools/testing/selftests/net/forwarding/tc_common.sh b/tools/testing/selftests/net/forwarding/tc_common.sh index 0e18e8be6e2a..bce8bb8d2b6f 100644 --- a/tools/testing/selftests/net/forwarding/tc_common.sh +++ b/tools/testing/selftests/net/forwarding/tc_common.sh @@ -16,6 +16,16 @@ tc_check_packets() tc_rule_handle_stats_get "$id" "$handle" > /dev/null } +tc_check_at_least_x_packets() +{ + local id=$1 + local handle=$2 + local count=$3 + + busywait "$TC_HIT_TIMEOUT" until_counter_is ">= $count" \ + tc_rule_handle_stats_get "$id" "$handle" > /dev/null +} + tc_check_packets_hitting() { local id=$1 From 7df29960fa65bb11a5571468056155ca04d8f6df Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Fri, 8 Oct 2021 16:12:36 +0300 Subject: [PATCH 037/440] selftests: forwarding: Add IPv6 GRE flat tests Add tests that check IPv6-in-IPv6, IPv4-in-IPv6 and MTU change of GRE tunnel. The tests use flat model - overlay and underlay share the same VRF. These tests can be run with TC_FLAG=skip_sw, so then they will verify that packets go through hardware as part of enacp and decap phases. Signed-off-by: Amit Cohen Reviewed-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../selftests/net/forwarding/ip6gre_flat.sh | 65 +++++ .../net/forwarding/ip6gre_flat_key.sh | 65 +++++ .../net/forwarding/ip6gre_flat_keys.sh | 65 +++++ .../selftests/net/forwarding/ip6gre_lib.sh | 270 ++++++++++++++++++ 4 files changed, 465 insertions(+) create mode 100755 tools/testing/selftests/net/forwarding/ip6gre_flat.sh create mode 100755 tools/testing/selftests/net/forwarding/ip6gre_flat_key.sh create mode 100755 tools/testing/selftests/net/forwarding/ip6gre_flat_keys.sh create mode 100644 tools/testing/selftests/net/forwarding/ip6gre_lib.sh diff --git a/tools/testing/selftests/net/forwarding/ip6gre_flat.sh b/tools/testing/selftests/net/forwarding/ip6gre_flat.sh new file mode 100755 index 000000000000..96c97064f2d3 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/ip6gre_flat.sh @@ -0,0 +1,65 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Test IP-in-IP GRE tunnel without key. +# This test uses flat topology for IP tunneling tests. See ip6gre_lib.sh for +# more details. + +ALL_TESTS=" + gre_flat + gre_mtu_change +" + +NUM_NETIFS=6 +source lib.sh +source ip6gre_lib.sh + +setup_prepare() +{ + h1=${NETIFS[p1]} + ol1=${NETIFS[p2]} + + ul1=${NETIFS[p3]} + ul2=${NETIFS[p4]} + + ol2=${NETIFS[p5]} + h2=${NETIFS[p6]} + + forwarding_enable + vrf_prepare + h1_create + h2_create + sw1_flat_create $ol1 $ul1 + sw2_flat_create $ol2 $ul2 +} + +gre_flat() +{ + test_traffic_ip4ip6 "GRE flat IPv4-in-IPv6" + test_traffic_ip6ip6 "GRE flat IPv6-in-IPv6" +} + +gre_mtu_change() +{ + test_mtu_change +} + +cleanup() +{ + pre_cleanup + + sw2_flat_destroy $ol2 $ul2 + sw1_flat_destroy $ol1 $ul1 + h2_destroy + h1_destroy + vrf_cleanup + forwarding_restore +} + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/ip6gre_flat_key.sh b/tools/testing/selftests/net/forwarding/ip6gre_flat_key.sh new file mode 100755 index 000000000000..ff9fb0db9bd1 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/ip6gre_flat_key.sh @@ -0,0 +1,65 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Test IP-in-IP GRE tunnel with key. +# This test uses flat topology for IP tunneling tests. See ip6gre_lib.sh for +# more details. + +ALL_TESTS=" + gre_flat + gre_mtu_change +" + +NUM_NETIFS=6 +source lib.sh +source ip6gre_lib.sh + +setup_prepare() +{ + h1=${NETIFS[p1]} + ol1=${NETIFS[p2]} + + ul1=${NETIFS[p3]} + ul2=${NETIFS[p4]} + + ol2=${NETIFS[p5]} + h2=${NETIFS[p6]} + + forwarding_enable + vrf_prepare + h1_create + h2_create + sw1_flat_create $ol1 $ul1 key 233 + sw2_flat_create $ol2 $ul2 key 233 +} + +gre_flat() +{ + test_traffic_ip4ip6 "GRE flat IPv4-in-IPv6 with key" + test_traffic_ip6ip6 "GRE flat IPv6-in-IPv6 with key" +} + +gre_mtu_change() +{ + test_mtu_change +} + +cleanup() +{ + pre_cleanup + + sw2_flat_destroy $ol2 $ul2 + sw1_flat_destroy $ol1 $ul1 + h2_destroy + h1_destroy + vrf_cleanup + forwarding_restore +} + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/ip6gre_flat_keys.sh b/tools/testing/selftests/net/forwarding/ip6gre_flat_keys.sh new file mode 100755 index 000000000000..12c138785242 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/ip6gre_flat_keys.sh @@ -0,0 +1,65 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Test IP-in-IP GRE tunnel with keys. +# This test uses flat topology for IP tunneling tests. See ip6gre_lib.sh for +# more details. + +ALL_TESTS=" + gre_flat + gre_mtu_change +" + +NUM_NETIFS=6 +source lib.sh +source ip6gre_lib.sh + +setup_prepare() +{ + h1=${NETIFS[p1]} + ol1=${NETIFS[p2]} + + ul1=${NETIFS[p3]} + ul2=${NETIFS[p4]} + + ol2=${NETIFS[p5]} + h2=${NETIFS[p6]} + + forwarding_enable + vrf_prepare + h1_create + h2_create + sw1_flat_create $ol1 $ul1 ikey 111 okey 222 + sw2_flat_create $ol2 $ul2 ikey 222 okey 111 +} + +gre_flat() +{ + test_traffic_ip4ip6 "GRE flat IPv4-in-IPv6 with ikey/okey" + test_traffic_ip6ip6 "GRE flat IPv6-in-IPv6 with ikey/okey" +} + +gre_mtu_change() +{ + test_mtu_change gre +} + +cleanup() +{ + pre_cleanup + + sw2_flat_destroy $ol2 $ul2 + sw1_flat_destroy $ol1 $ul1 + h2_destroy + h1_destroy + vrf_cleanup + forwarding_restore +} + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/ip6gre_lib.sh b/tools/testing/selftests/net/forwarding/ip6gre_lib.sh new file mode 100644 index 000000000000..2a5b2126b674 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/ip6gre_lib.sh @@ -0,0 +1,270 @@ +# SPDX-License-Identifier: GPL-2.0 +#!/bin/bash + +# Handles creation and destruction of IP-in-IP or GRE tunnels over the given +# topology. +# +# Flat Model: +# Overlay and underlay share the same VRF. +# SW1 uses default VRF so tunnel has no bound dev. +# SW2 uses non-default VRF tunnel has a bound dev. +# +--------------------------------+ +# | H1 | +# | $h1 + | +# | 198.51.100.1/24 | | +# | 2001:db8:1::1/64 | | +# +-------------------------|------+ +# | +# +-------------------------|-------------------+ +# | SW1 | | +# | $ol1 + | +# | 198.51.100.2/24 | +# | 2001:db8:1::2/64 | +# | | +# | + g1a (ip6gre) | +# | loc=2001:db8:3::1 | +# | rem=2001:db8:3::2 --. | +# | tos=inherit | | +# | . | +# | .--------------------- | +# | | | +# | v | +# | + $ul1.111 (vlan) | +# | | 2001:db8:10::1/64 | +# | \ | +# | \____________ | +# | | | +# | VRF default + $ul1 | +# +---------------------|-----------------------+ +# | +# +---------------------|-----------------------+ +# | SW2 | | +# | $ul2 + | +# | ___________| | +# | / | +# | / | +# | + $ul2.111 (vlan) | +# | ^ 2001:db8:10::2/64 | +# | | | +# | | | +# | '----------------------. | +# | + g2a (ip6gre) | | +# | loc=2001:db8:3::2 | | +# | rem=2001:db8:3::1 --' | +# | tos=inherit | +# | | +# | + $ol2 | +# | | 203.0.113.2/24 | +# | VRF v$ol2 | 2001:db8:2::2/64 | +# +---------------------|-----------------------+ +# +---------------------|----------+ +# | H2 | | +# | $h2 + | +# | 203.0.113.1/24 | +# | 2001:db8:2::1/64 | +# +--------------------------------+ + +source lib.sh +source tc_common.sh + +h1_create() +{ + simple_if_init $h1 198.51.100.1/24 2001:db8:1::1/64 + ip route add vrf v$h1 203.0.113.0/24 via 198.51.100.2 + ip -6 route add vrf v$h1 2001:db8:2::/64 via 2001:db8:1::2 +} + +h1_destroy() +{ + ip -6 route del vrf v$h1 2001:db8:2::/64 via 2001:db8:1::2 + ip route del vrf v$h1 203.0.113.0/24 via 198.51.100.2 + simple_if_fini $h1 198.51.100.1/24 2001:db8:1::1/64 +} + +h2_create() +{ + simple_if_init $h2 203.0.113.1/24 2001:db8:2::1/64 + ip route add vrf v$h2 198.51.100.0/24 via 203.0.113.2 + ip -6 route add vrf v$h2 2001:db8:1::/64 via 2001:db8:2::2 +} + +h2_destroy() +{ + ip -6 route del vrf v$h2 2001:db8:1::/64 via 2001:db8:2::2 + ip route del vrf v$h2 198.51.100.0/24 via 203.0.113.2 + simple_if_fini $h2 203.0.113.1/24 2001:db8:2::1/64 +} + +sw1_flat_create() +{ + local ol1=$1; shift + local ul1=$1; shift + + ip link set dev $ol1 up + __addr_add_del $ol1 add 198.51.100.2/24 2001:db8:1::2/64 + + ip link set dev $ul1 up + vlan_create $ul1 111 "" 2001:db8:10::1/64 + + tunnel_create g1a ip6gre 2001:db8:3::1 2001:db8:3::2 tos inherit \ + ttl inherit "$@" + ip link set dev g1a up + __addr_add_del g1a add "2001:db8:3::1/128" + + ip -6 route add 2001:db8:3::2/128 via 2001:db8:10::2 + ip route add 203.0.113.0/24 dev g1a + ip -6 route add 2001:db8:2::/64 dev g1a +} + +sw1_flat_destroy() +{ + local ol1=$1; shift + local ul1=$1; shift + + ip -6 route del 2001:db8:2::/64 + ip route del 203.0.113.0/24 + ip -6 route del 2001:db8:3::2/128 via 2001:db8:10::2 + + __simple_if_fini g1a 2001:db8:3::1/128 + tunnel_destroy g1a + + vlan_destroy $ul1 111 + __simple_if_fini $ul1 + __simple_if_fini $ol1 198.51.100.2/24 2001:db8:1::2/64 +} + +sw2_flat_create() +{ + local ol2=$1; shift + local ul2=$1; shift + + simple_if_init $ol2 203.0.113.2/24 2001:db8:2::2/64 + __simple_if_init $ul2 v$ol2 + vlan_create $ul2 111 v$ol2 2001:db8:10::2/64 + + tunnel_create g2a ip6gre 2001:db8:3::2 2001:db8:3::1 tos inherit \ + ttl inherit dev v$ol2 "$@" + __simple_if_init g2a v$ol2 2001:db8:3::2/128 + + # Replace neighbor to avoid 1 dropped packet due to "unresolved neigh" + ip neigh replace dev $ol2 203.0.113.1 lladdr $(mac_get $h2) + ip -6 neigh replace dev $ol2 2001:db8:2::1 lladdr $(mac_get $h2) + + ip -6 route add vrf v$ol2 2001:db8:3::1/128 via 2001:db8:10::1 + ip route add vrf v$ol2 198.51.100.0/24 dev g2a + ip -6 route add vrf v$ol2 2001:db8:1::/64 dev g2a +} + +sw2_flat_destroy() +{ + local ol2=$1; shift + local ul2=$1; shift + + ip -6 route del vrf v$ol2 2001:db8:2::/64 + ip route del vrf v$ol2 198.51.100.0/24 + ip -6 route del vrf v$ol2 2001:db8:3::1/128 via 2001:db8:10::1 + + __simple_if_fini g2a 2001:db8:3::2/128 + tunnel_destroy g2a + + vlan_destroy $ul2 111 + __simple_if_fini $ul2 + simple_if_fini $ol2 203.0.113.2/24 2001:db8:2::2/64 +} + +test_traffic_ip4ip6() +{ + RET=0 + + h1mac=$(mac_get $h1) + ol1mac=$(mac_get $ol1) + + tc qdisc add dev $ul1 clsact + tc filter add dev $ul1 egress proto all pref 1 handle 101 \ + flower $TC_FLAG action pass + + tc qdisc add dev $ol2 clsact + tc filter add dev $ol2 egress protocol ipv4 pref 1 handle 101 \ + flower $TC_FLAG dst_ip 203.0.113.1 action pass + + $MZ $h1 -c 1000 -p 64 -a $h1mac -b $ol1mac -A 198.51.100.1 \ + -B 203.0.113.1 -t ip -q -d 1msec + + # Check ports after encap and after decap. + tc_check_at_least_x_packets "dev $ul1 egress" 101 1000 + check_err $? "Packets did not go through $ul1, tc_flag = $TC_FLAG" + + tc_check_at_least_x_packets "dev $ol2 egress" 101 1000 + check_err $? "Packets did not go through $ol2, tc_flag = $TC_FLAG" + + log_test "$@" + + tc filter del dev $ol2 egress protocol ipv4 pref 1 handle 101 flower + tc qdisc del dev $ol2 clsact + tc filter del dev $ul1 egress proto all pref 1 handle 101 flower + tc qdisc del dev $ul1 clsact +} + +test_traffic_ip6ip6() +{ + RET=0 + + h1mac=$(mac_get $h1) + ol1mac=$(mac_get $ol1) + + tc qdisc add dev $ul1 clsact + tc filter add dev $ul1 egress proto all pref 1 handle 101 \ + flower $TC_FLAG action pass + + tc qdisc add dev $ol2 clsact + tc filter add dev $ol2 egress protocol ipv6 pref 1 handle 101 \ + flower $TC_FLAG dst_ip 2001:db8:2::1 action pass + + $MZ -6 $h1 -c 1000 -p 64 -a $h1mac -b $ol1mac -A 2001:db8:1::1 \ + -B 2001:db8:2::1 -t ip -q -d 1msec + + # Check ports after encap and after decap. + tc_check_at_least_x_packets "dev $ul1 egress" 101 1000 + check_err $? "Packets did not go through $ul1, tc_flag = $TC_FLAG" + + tc_check_at_least_x_packets "dev $ol2 egress" 101 1000 + check_err $? "Packets did not go through $ol2, tc_flag = $TC_FLAG" + + log_test "$@" + + tc filter del dev $ol2 egress protocol ipv6 pref 1 handle 101 flower + tc qdisc del dev $ol2 clsact + tc filter del dev $ul1 egress proto all pref 1 handle 101 flower + tc qdisc del dev $ul1 clsact +} + +topo_mtu_change() +{ + local mtu=$1 + + ip link set mtu $mtu dev $h1 + ip link set mtu $mtu dev $ol1 + ip link set mtu $mtu dev g1a + ip link set mtu $mtu dev $ul1 + ip link set mtu $mtu dev $ul1.111 + ip link set mtu $mtu dev $h2 + ip link set mtu $mtu dev $ol2 + ip link set mtu $mtu dev g2a + ip link set mtu $mtu dev $ul2 + ip link set mtu $mtu dev $ul2.111 +} + +test_mtu_change() +{ + RET=0 + + ping6_do $h1 2001:db8:2::1 "-s 1800 -w 3" + check_fail $? "ping GRE IPv6 should not pass with packet size 1800" + + RET=0 + + topo_mtu_change 2000 + ping6_do $h1 2001:db8:2::1 "-s 1800 -w 3" + check_err $? + log_test "ping GRE IPv6, packet size 1800 after MTU change" +} From 4b3d967b5cb99a445a2d55478035308f000cd568 Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Fri, 8 Oct 2021 16:12:37 +0300 Subject: [PATCH 038/440] selftests: forwarding: Add IPv6 GRE hierarchical tests Add tests that check IPv6-in-IPv6, IPv4-in-IPv6 and MTU change of GRE tunnel. The tests use hierarchical model - the tunnel is bound to a device in a different VRF. These tests can be run with TC_FLAG=skip_sw, so then they will verify that packets go through hardware as part of enacp and decap phases. Signed-off-by: Amit Cohen Reviewed-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../selftests/net/forwarding/ip6gre_hier.sh | 65 +++++++ .../net/forwarding/ip6gre_hier_key.sh | 65 +++++++ .../net/forwarding/ip6gre_hier_keys.sh | 65 +++++++ .../selftests/net/forwarding/ip6gre_lib.sh | 170 +++++++++++++++++- 4 files changed, 364 insertions(+), 1 deletion(-) create mode 100755 tools/testing/selftests/net/forwarding/ip6gre_hier.sh create mode 100755 tools/testing/selftests/net/forwarding/ip6gre_hier_key.sh create mode 100755 tools/testing/selftests/net/forwarding/ip6gre_hier_keys.sh diff --git a/tools/testing/selftests/net/forwarding/ip6gre_hier.sh b/tools/testing/selftests/net/forwarding/ip6gre_hier.sh new file mode 100755 index 000000000000..83b55c30a5c3 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/ip6gre_hier.sh @@ -0,0 +1,65 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Test IP-in-IP GRE tunnels without key. +# This test uses hierarchical topology for IP tunneling tests. See +# ip6gre_lib.sh for more details. + +ALL_TESTS=" + gre_hier + gre_mtu_change +" + +NUM_NETIFS=6 +source lib.sh +source ip6gre_lib.sh + +setup_prepare() +{ + h1=${NETIFS[p1]} + ol1=${NETIFS[p2]} + + ul1=${NETIFS[p3]} + ul2=${NETIFS[p4]} + + ol2=${NETIFS[p5]} + h2=${NETIFS[p6]} + + forwarding_enable + vrf_prepare + h1_create + h2_create + sw1_hierarchical_create $ol1 $ul1 + sw2_hierarchical_create $ol2 $ul2 +} + +gre_hier() +{ + test_traffic_ip4ip6 "GRE hierarchical IPv4-in-IPv6" + test_traffic_ip6ip6 "GRE hierarchical IPv6-in-IPv6" +} + +gre_mtu_change() +{ + test_mtu_change gre +} + +cleanup() +{ + pre_cleanup + + sw2_hierarchical_destroy $ol2 $ul2 + sw1_hierarchical_destroy $ol1 $ul1 + h2_destroy + h1_destroy + vrf_cleanup + forwarding_restore +} + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/ip6gre_hier_key.sh b/tools/testing/selftests/net/forwarding/ip6gre_hier_key.sh new file mode 100755 index 000000000000..256607916d92 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/ip6gre_hier_key.sh @@ -0,0 +1,65 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Test IP-in-IP GRE tunnels without key. +# This test uses hierarchical topology for IP tunneling tests. See +# ip6gre_lib.sh for more details. + +ALL_TESTS=" + gre_hier + gre_mtu_change +" + +NUM_NETIFS=6 +source lib.sh +source ip6gre_lib.sh + +setup_prepare() +{ + h1=${NETIFS[p1]} + ol1=${NETIFS[p2]} + + ul1=${NETIFS[p3]} + ul2=${NETIFS[p4]} + + ol2=${NETIFS[p5]} + h2=${NETIFS[p6]} + + forwarding_enable + vrf_prepare + h1_create + h2_create + sw1_hierarchical_create $ol1 $ul1 key 22 + sw2_hierarchical_create $ol2 $ul2 key 22 +} + +gre_hier() +{ + test_traffic_ip4ip6 "GRE hierarchical IPv4-in-IPv6 with key" + test_traffic_ip6ip6 "GRE hierarchical IPv6-in-IPv6 with key" +} + +gre_mtu_change() +{ + test_mtu_change gre +} + +cleanup() +{ + pre_cleanup + + sw2_hierarchical_destroy $ol2 $ul2 + sw1_hierarchical_destroy $ol1 $ul1 + h2_destroy + h1_destroy + vrf_cleanup + forwarding_restore +} + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/ip6gre_hier_keys.sh b/tools/testing/selftests/net/forwarding/ip6gre_hier_keys.sh new file mode 100755 index 000000000000..ad1bcd6334a8 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/ip6gre_hier_keys.sh @@ -0,0 +1,65 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Test IP-in-IP GRE tunnels without key. +# This test uses hierarchical topology for IP tunneling tests. See +# ip6gre_lib.sh for more details. + +ALL_TESTS=" + gre_hier + gre_mtu_change +" + +NUM_NETIFS=6 +source lib.sh +source ip6gre_lib.sh + +setup_prepare() +{ + h1=${NETIFS[p1]} + ol1=${NETIFS[p2]} + + ul1=${NETIFS[p3]} + ul2=${NETIFS[p4]} + + ol2=${NETIFS[p5]} + h2=${NETIFS[p6]} + + forwarding_enable + vrf_prepare + h1_create + h2_create + sw1_hierarchical_create $ol1 $ul1 ikey 111 okey 222 + sw2_hierarchical_create $ol2 $ul2 ikey 222 okey 111 +} + +gre_hier() +{ + test_traffic_ip4ip6 "GRE hierarchical IPv4-in-IPv6 with ikey/okey" + test_traffic_ip6ip6 "GRE hierarchical IPv6-in-IPv6 with ikey/okey" +} + +gre_mtu_change() +{ + test_mtu_change gre +} + +cleanup() +{ + pre_cleanup + + sw2_hierarchical_destroy $ol2 $ul2 + sw1_hierarchical_destroy $ol1 $ul1 + h2_destroy + h1_destroy + vrf_cleanup + forwarding_restore +} + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/ip6gre_lib.sh b/tools/testing/selftests/net/forwarding/ip6gre_lib.sh index 2a5b2126b674..58a3597037b1 100644 --- a/tools/testing/selftests/net/forwarding/ip6gre_lib.sh +++ b/tools/testing/selftests/net/forwarding/ip6gre_lib.sh @@ -2,7 +2,7 @@ #!/bin/bash # Handles creation and destruction of IP-in-IP or GRE tunnels over the given -# topology. +# topology. Supports both flat and hierarchical models. # # Flat Model: # Overlay and underlay share the same VRF. @@ -63,6 +63,91 @@ # | 203.0.113.1/24 | # | 2001:db8:2::1/64 | # +--------------------------------+ +# +# Hierarchical model: +# The tunnel is bound to a device in a different VRF +# +# +--------------------------------+ +# | H1 | +# | $h1 + | +# | 198.51.100.1/24 | | +# | 2001:db8:1::1/64 | | +# +-------------------------|------+ +# | +# +-------------------------|-------------------+ +# | SW1 | | +# | +-----------------------|-----------------+ | +# | | $ol1 + | | +# | | 198.51.100.2/24 | | +# | | 2001:db8:1::2/64 | | +# | | | | +# | | + g1a (ip6gre) | | +# | | loc=2001:db8:3::1 | | +# | | rem=2001:db8:3::2 | | +# | | tos=inherit | | +# | | ^ | | +# | | VRF v$ol1 | | | +# | +--------------------|--------------------+ | +# | | | +# | +--------------------|--------------------+ | +# | | VRF v$ul1 | | | +# | | | | | +# | | v | | +# | | dummy1 + | | +# | | 2001:db8:3::1/64 | | +# | | .-----------' | | +# | | | | | +# | | v | | +# | | + $ul1.111 (vlan) | | +# | | | 2001:db8:10::1/64 | | +# | | \ | | +# | | \__________ | | +# | | | | | +# | | + $ul1 | | +# | +---------------------|-------------------+ | +# +-----------------------|---------------------+ +# | +# +-----------------------|---------------------+ +# | SW2 | | +# | +---------------------|-------------------+ | +# | | + $ul2 | | +# | | _____| | | +# | | / | | +# | | / | | +# | | | $ul2.111 (vlan) | | +# | | + 2001:db8:10::2/64 | | +# | | ^ | | +# | | | | | +# | | '------. | | +# | | dummy2 + | | +# | | 2001:db8:3::2/64 | | +# | | ^ | | +# | | | | | +# | | | | | +# | | VRF v$ul2 | | | +# | +---------------------|-------------------+ | +# | | | +# | +---------------------|-------------------+ | +# | | VRF v$ol2 | | | +# | | | | | +# | | v | | +# | | g2a (ip6gre) + | | +# | | loc=2001:db8:3::2 | | +# | | rem=2001:db8:3::1 | | +# | | tos=inherit | | +# | | | | +# | | $ol2 + | | +# | | 203.0.113.2/24 | | | +# | | 2001:db8:2::2/64 | | | +# | +---------------------|-------------------+ | +# +-----------------------|---------------------+ +# | +# +-----------------------|--------+ +# | H2 | | +# | $h2 + | +# | 203.0.113.1/24 | +# | 2001:db8:2::1/64 | +# +--------------------------------+ source lib.sh source tc_common.sh @@ -172,6 +257,89 @@ sw2_flat_destroy() simple_if_fini $ol2 203.0.113.2/24 2001:db8:2::2/64 } +sw1_hierarchical_create() +{ + local ol1=$1; shift + local ul1=$1; shift + + simple_if_init $ol1 198.51.100.2/24 2001:db8:1::2/64 + simple_if_init $ul1 + ip link add name dummy1 type dummy + __simple_if_init dummy1 v$ul1 2001:db8:3::1/64 + + vlan_create $ul1 111 v$ul1 2001:db8:10::1/64 + tunnel_create g1a ip6gre 2001:db8:3::1 2001:db8:3::2 tos inherit \ + ttl inherit dev dummy1 "$@" + ip link set dev g1a master v$ol1 + + ip -6 route add vrf v$ul1 2001:db8:3::2/128 via 2001:db8:10::2 + ip route add vrf v$ol1 203.0.113.0/24 dev g1a + ip -6 route add vrf v$ol1 2001:db8:2::/64 dev g1a +} + +sw1_hierarchical_destroy() +{ + local ol1=$1; shift + local ul1=$1; shift + + ip -6 route del vrf v$ol1 2001:db8:2::/64 + ip route del vrf v$ol1 203.0.113.0/24 + ip -6 route del vrf v$ul1 2001:db8:3::2/128 + + tunnel_destroy g1a + vlan_destroy $ul1 111 + + __simple_if_fini dummy1 2001:db8:3::1/64 + ip link del dev dummy1 + + simple_if_fini $ul1 + simple_if_fini $ol1 198.51.100.2/24 2001:db8:1::2/64 +} + +sw2_hierarchical_create() +{ + local ol2=$1; shift + local ul2=$1; shift + + simple_if_init $ol2 203.0.113.2/24 2001:db8:2::2/64 + simple_if_init $ul2 + + ip link add name dummy2 type dummy + __simple_if_init dummy2 v$ul2 2001:db8:3::2/64 + + vlan_create $ul2 111 v$ul2 2001:db8:10::2/64 + tunnel_create g2a ip6gre 2001:db8:3::2 2001:db8:3::1 tos inherit \ + ttl inherit dev dummy2 "$@" + ip link set dev g2a master v$ol2 + + # Replace neighbor to avoid 1 dropped packet due to "unresolved neigh" + ip neigh replace dev $ol2 203.0.113.1 lladdr $(mac_get $h2) + ip -6 neigh replace dev $ol2 2001:db8:2::1 lladdr $(mac_get $h2) + + ip -6 route add vrf v$ul2 2001:db8:3::1/128 via 2001:db8:10::1 + ip route add vrf v$ol2 198.51.100.0/24 dev g2a + ip -6 route add vrf v$ol2 2001:db8:1::/64 dev g2a +} + +sw2_hierarchical_destroy() +{ + local ol2=$1; shift + local ul2=$1; shift + + ip -6 route del vrf v$ol2 2001:db8:2::/64 + ip route del vrf v$ol2 198.51.100.0/24 + ip -6 route del vrf v$ul2 2001:db8:3::1/128 + + tunnel_destroy g2a + vlan_destroy $ul2 111 + + __simple_if_fini dummy2 2001:db8:3::2/64 + ip link del dev dummy2 + + simple_if_fini $ul2 + simple_if_fini $ol2 203.0.113.2/24 2001:db8:2::2/64 +} + test_traffic_ip4ip6() { RET=0 From 4bb6cce00a2bef813a2457cd998f90ec9c30b6d0 Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Fri, 8 Oct 2021 16:12:38 +0300 Subject: [PATCH 039/440] selftests: mlxsw: devlink_trap_tunnel_ipip6: Add test case for IPv6 decap_error IPv6 underlay support was added, add test to check that "decap_error" trap is triggered under the right conditions and that devlink counters increase. Signed-off-by: Amit Cohen Reviewed-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../spectrum-2/devlink_trap_tunnel_ipip6.sh | 250 ++++++++++++++++++ 1 file changed, 250 insertions(+) create mode 100755 tools/testing/selftests/drivers/net/mlxsw/spectrum-2/devlink_trap_tunnel_ipip6.sh diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/devlink_trap_tunnel_ipip6.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/devlink_trap_tunnel_ipip6.sh new file mode 100755 index 000000000000..f62ce479c266 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/devlink_trap_tunnel_ipip6.sh @@ -0,0 +1,250 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test devlink-trap tunnel exceptions functionality over mlxsw. +# Check all exception traps to make sure they are triggered under the right +# conditions. + +# +-------------------------+ +# | H1 | +# | $h1 + | +# | 2001:db8:1::1/64 | | +# +-------------------|-----+ +# | +# +-------------------|-----+ +# | SW1 | | +# | $swp1 + | +# | 2001:db8:1::2/64 | +# | | +# | + g1 (ip6gre) | +# | loc=2001:db8:3::1 | +# | rem=2001:db8:3::2 | +# | tos=inherit | +# | | +# | + $rp1 | +# | | 2001:db8:10::1/64 | +# +--|----------------------+ +# | +# +--|----------------------+ +# | | VRF2 | +# | + $rp2 | +# | 2001:db8:10::2/64 | +# +-------------------------+ + +lib_dir=$(dirname $0)/../../../../net/forwarding + +ALL_TESTS=" + decap_error_test +" + +NUM_NETIFS=4 +source $lib_dir/lib.sh +source $lib_dir/tc_common.sh +source $lib_dir/devlink_lib.sh + +h1_create() +{ + simple_if_init $h1 2001:db8:1::1/64 +} + +h1_destroy() +{ + simple_if_fini $h1 2001:db8:1::1/64 +} + +vrf2_create() +{ + simple_if_init $rp2 2001:db8:10::2/64 +} + +vrf2_destroy() +{ + simple_if_fini $rp2 2001:db8:10::2/64 +} + +switch_create() +{ + ip link set dev $swp1 up + __addr_add_del $swp1 add 2001:db8:1::2/64 + tc qdisc add dev $swp1 clsact + + tunnel_create g1 ip6gre 2001:db8:3::1 2001:db8:3::2 tos inherit \ + ttl inherit + ip link set dev g1 up + __addr_add_del g1 add 2001:db8:3::1/128 + + ip link set dev $rp1 up + __addr_add_del $rp1 add 2001:db8:10::1/64 +} + +switch_destroy() +{ + __addr_add_del $rp1 del 2001:db8:10::1/64 + ip link set dev $rp1 down + + __addr_add_del g1 del 2001:db8:3::1/128 + ip link set dev g1 down + tunnel_destroy g1 + + tc qdisc del dev $swp1 clsact + __addr_add_del $swp1 del 2001:db8:1::2/64 + ip link set dev $swp1 down +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + rp1=${NETIFS[p3]} + rp2=${NETIFS[p4]} + + forwarding_enable + vrf_prepare + h1_create + switch_create + vrf2_create +} + +cleanup() +{ + pre_cleanup + + vrf2_destroy + switch_destroy + h1_destroy + vrf_cleanup + forwarding_restore +} + +ipip_payload_get() +{ + local saddr="20:01:0d:b8:00:02:00:00:00:00:00:00:00:00:00:01" + local daddr="20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01" + local flags=$1; shift + local key=$1; shift + + p=$(: + )"$flags"$( : GRE flags + )"0:00:"$( : Reserved + version + )"86:dd:"$( : ETH protocol type + )"$key"$( : Key + )"6"$( : IP version + )"0:0"$( : Traffic class + )"0:00:00:"$( : Flow label + )"00:00:"$( : Payload length + )"3a:"$( : Next header + )"04:"$( : Hop limit + )"$saddr:"$( : IP saddr + )"$daddr:"$( : IP daddr + ) + echo $p +} + +ecn_payload_get() +{ + echo $(ipip_payload_get "0") +} + +ecn_decap_test() +{ + local trap_name="decap_error" + local desc=$1; shift + local ecn_desc=$1; shift + local outer_tos=$1; shift + local mz_pid + + RET=0 + + tc filter add dev $swp1 egress protocol ipv6 pref 1 handle 101 \ + flower src_ip 2001:db8:2::1 dst_ip 2001:db8:1::1 skip_sw \ + action pass + + rp1_mac=$(mac_get $rp1) + rp2_mac=$(mac_get $rp2) + payload=$(ecn_payload_get) + + ip vrf exec v$rp2 $MZ -6 $rp2 -c 0 -d 1msec -a $rp2_mac -b $rp1_mac \ + -A 2001:db8:3::2 -B 2001:db8:3::1 -t ip \ + tos=$outer_tos,next=47,p=$payload -q & + mz_pid=$! + + devlink_trap_exception_test $trap_name + + tc_check_packets "dev $swp1 egress" 101 0 + check_err $? "Packets were not dropped" + + log_test "$desc: Inner ECN is not ECT and outer is $ecn_desc" + + kill $mz_pid && wait $mz_pid &> /dev/null + tc filter del dev $swp1 egress protocol ipv6 pref 1 handle 101 flower +} + +no_matching_tunnel_test() +{ + local trap_name="decap_error" + local desc=$1; shift + local sip=$1; shift + local mz_pid + + RET=0 + + tc filter add dev $swp1 egress protocol ipv6 pref 1 handle 101 \ + flower src_ip 2001:db8:2::1 dst_ip 2001:db8:1::1 action pass + + rp1_mac=$(mac_get $rp1) + rp2_mac=$(mac_get $rp2) + payload=$(ipip_payload_get "$@") + + ip vrf exec v$rp2 $MZ -6 $rp2 -c 0 -d 1msec -a $rp2_mac -b $rp1_mac \ + -A $sip -B 2001:db8:3::1 -t ip next=47,p=$payload -q & + mz_pid=$! + + devlink_trap_exception_test $trap_name + + tc_check_packets "dev $swp1 egress" 101 0 + check_err $? "Packets were not dropped" + + log_test "$desc" + + kill $mz_pid && wait $mz_pid &> /dev/null + tc filter del dev $swp1 egress protocol ipv6 pref 1 handle 101 flower +} + +decap_error_test() +{ + # Correct source IP - the remote address + local sip=2001:db8:3::2 + + ecn_decap_test "Decap error" "ECT(1)" 01 + ecn_decap_test "Decap error" "ECT(0)" 02 + ecn_decap_test "Decap error" "CE" 03 + + no_matching_tunnel_test "Decap error: Source IP check failed" \ + 2001:db8:4::2 "0" + no_matching_tunnel_test \ + "Decap error: Key exists but was not expected" $sip "2" \ + "00:00:00:E9:" + + # Destroy the tunnel and create new one with key + __addr_add_del g1 del 2001:db8:3::1/128 + tunnel_destroy g1 + + tunnel_create g1 ip6gre 2001:db8:3::1 2001:db8:3::2 tos inherit \ + ttl inherit key 233 + __addr_add_del g1 add 2001:db8:3::1/128 + + no_matching_tunnel_test \ + "Decap error: Key does not exist but was expected" $sip "0" + no_matching_tunnel_test \ + "Decap error: Packet has a wrong key field" $sip "2" \ + "00:00:00:E8:" +} + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS From c473f723f97a0b3a2d0b3ae75bfcf090882afa85 Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Fri, 8 Oct 2021 16:12:39 +0300 Subject: [PATCH 040/440] selftests: mlxsw: devlink_trap_tunnel_ipip: Align topology drawing correctly As part of adding same test for GRE tunnel with IPv6 underlay, wrong alignments were found, fix them. Signed-off-by: Amit Cohen Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh index 8817851da7a9..e2ab26b790a0 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh @@ -13,7 +13,7 @@ # | # +-------------------|-----+ # | SW1 | | -# | $swp1 + | +# | $swp1 + | # | 192.0.2.2/28 | # | | # | + g1a (gre) | @@ -27,8 +27,8 @@ # | # +--|----------------------+ # | | VRF2 | -# | + $rp2 | -# | 198.51.100.2/28 | +# | + $rp2 | +# | 198.51.100.2/28 | # +-------------------------+ lib_dir=$(dirname $0)/../../../net/forwarding From 8bb0ebd52238972fd4c60280669103560838ab0d Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Fri, 8 Oct 2021 16:12:40 +0300 Subject: [PATCH 041/440] selftests: mlxsw: devlink_trap_tunnel_ipip: Remove code duplication As part of adding same test for GRE tunnel with IPv6 underlay, an optional improvement was found - call ipip_payload_get from ecn_payload_get, so do not duplicate the code which creates the payload. Signed-off-by: Amit Cohen Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../net/mlxsw/devlink_trap_tunnel_ipip.sh | 38 ++++++------------- 1 file changed, 11 insertions(+), 27 deletions(-) diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh index e2ab26b790a0..c072c1633f1d 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh @@ -116,12 +116,16 @@ cleanup() forwarding_restore } -ecn_payload_get() +ipip_payload_get() { + local flags=$1; shift + local key=$1; shift + p=$(: - )"0"$( : GRE flags + )"$flags"$( : GRE flags )"0:00:"$( : Reserved + version )"08:00:"$( : ETH protocol type + )"$key"$( : Key )"4"$( : IP version )"5:"$( : IHL )"00:"$( : IP TOS @@ -137,6 +141,11 @@ ecn_payload_get() echo $p } +ecn_payload_get() +{ + echo $(ipip_payload_get "0") +} + ecn_decap_test() { local trap_name="decap_error" @@ -171,31 +180,6 @@ ecn_decap_test() tc filter del dev $swp1 egress protocol ip pref 1 handle 101 flower } -ipip_payload_get() -{ - local flags=$1; shift - local key=$1; shift - - p=$(: - )"$flags"$( : GRE flags - )"0:00:"$( : Reserved + version - )"08:00:"$( : ETH protocol type - )"$key"$( : Key - )"4"$( : IP version - )"5:"$( : IHL - )"00:"$( : IP TOS - )"00:14:"$( : IP total length - )"00:00:"$( : IP identification - )"20:00:"$( : IP flags + frag off - )"30:"$( : IP TTL - )"01:"$( : IP proto - )"E7:E6:"$( : IP header csum - )"C0:00:01:01:"$( : IP saddr : 192.0.1.1 - )"C0:00:02:01:"$( : IP daddr : 192.0.2.1 - ) - echo $p -} - no_matching_tunnel_test() { local trap_name="decap_error" From 7f63cdde50302bcdf48c35afbb057a8d3d3e0d85 Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Fri, 8 Oct 2021 16:12:41 +0300 Subject: [PATCH 042/440] selftests: mlxsw: devlink_trap_tunnel_ipip: Send a full-length key As part of adding same test for GRE tunnel with IPv6 underlay, missing bytes for key were found. mausezahn does not fill zeros between two colons, so send them explicitly. For example, use "00:00:00:E9:" instead of ":E9:" Signed-off-by: Amit Cohen Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh index c072c1633f1d..e9a82cae8c9a 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh @@ -223,7 +223,8 @@ decap_error_test() no_matching_tunnel_test "Decap error: Source IP check failed" \ 192.0.2.68 "0" no_matching_tunnel_test \ - "Decap error: Key exists but was not expected" $sip "2" ":E9:" + "Decap error: Key exists but was not expected" $sip "2" \ + "00:00:00:E9:" # Destroy the tunnel and create new one with key __addr_add_del g1 del 192.0.2.65/32 @@ -235,7 +236,8 @@ decap_error_test() no_matching_tunnel_test \ "Decap error: Key does not exist but was expected" $sip "0" no_matching_tunnel_test \ - "Decap error: Packet has a wrong key field" $sip "2" "E8:" + "Decap error: Packet has a wrong key field" $sip "2" \ + "00:00:00:E8:" } trap cleanup EXIT From acede3c5dad5b83c75c624514bd57171062b4c46 Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Thu, 7 Oct 2021 18:30:42 +0300 Subject: [PATCH 043/440] net: enetc: declare NETIF_F_HW_CSUM and do it in software This is just a preparation patch for software TSO in the enetc driver. Unfortunately, ENETC does not support Tx checksum offload which would normally render TSO, even software, impossible. Declare NETIF_F_HW_CSUM as part of the feature set and do it at driver level using skb_csum_hwoffload_help() so that we can move forward and also add support for TSO in the next patch. Signed-off-by: Ioana Ciornei Reviewed-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/enetc/enetc.c | 8 +++++++- drivers/net/ethernet/freescale/enetc/enetc_pf.c | 6 ++++-- drivers/net/ethernet/freescale/enetc/enetc_vf.c | 6 ++++-- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index 3cbfa8b4e265..17d8e04c10a8 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -319,7 +319,7 @@ static netdev_tx_t enetc_start_xmit(struct sk_buff *skb, { struct enetc_ndev_priv *priv = netdev_priv(ndev); struct enetc_bdr *tx_ring; - int count; + int count, err; /* Queue one-step Sync packet if already locked */ if (skb->cb[0] & ENETC_F_TX_ONESTEP_SYNC_TSTAMP) { @@ -342,6 +342,12 @@ static netdev_tx_t enetc_start_xmit(struct sk_buff *skb, return NETDEV_TX_BUSY; } + if (skb->ip_summed == CHECKSUM_PARTIAL) { + err = skb_checksum_help(skb); + if (err) + goto drop_packet_err; + } + enetc_lock_mdio(); count = enetc_map_tx_buffs(tx_ring, skb); enetc_unlock_mdio(); diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c index 628a74ba630c..1e3b2e191562 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c @@ -759,10 +759,12 @@ static void enetc_pf_netdev_setup(struct enetc_si *si, struct net_device *ndev, ndev->hw_features = NETIF_F_SG | NETIF_F_RXCSUM | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | - NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_LOOPBACK; + NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_LOOPBACK | + NETIF_F_HW_CSUM; ndev->features = NETIF_F_HIGHDMA | NETIF_F_SG | NETIF_F_RXCSUM | NETIF_F_HW_VLAN_CTAG_TX | - NETIF_F_HW_VLAN_CTAG_RX; + NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_CSUM; if (si->num_rss) ndev->hw_features |= NETIF_F_RXHASH; diff --git a/drivers/net/ethernet/freescale/enetc/enetc_vf.c b/drivers/net/ethernet/freescale/enetc/enetc_vf.c index 0704f6bf12fd..f2a0c0f9fe1d 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_vf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_vf.c @@ -122,10 +122,12 @@ static void enetc_vf_netdev_setup(struct enetc_si *si, struct net_device *ndev, ndev->hw_features = NETIF_F_SG | NETIF_F_RXCSUM | NETIF_F_HW_VLAN_CTAG_TX | - NETIF_F_HW_VLAN_CTAG_RX; + NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_CSUM; ndev->features = NETIF_F_HIGHDMA | NETIF_F_SG | NETIF_F_RXCSUM | NETIF_F_HW_VLAN_CTAG_TX | - NETIF_F_HW_VLAN_CTAG_RX; + NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_CSUM; if (si->num_rss) ndev->hw_features |= NETIF_F_RXHASH; From fb8629e2cbfce2b695521d6d33d029117ceda007 Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Thu, 7 Oct 2021 18:30:43 +0300 Subject: [PATCH 044/440] net: enetc: add support for software TSO This patch adds support for driver level TSO in the enetc driver using the TSO API. Beside using the usual tso_build_hdr(), tso_build_data() this specific implementation also has to compute the checksum, both IP and L4, for each resulted segment. This is because the ENETC controller does not support Tx checksum offload which is needed in order to take advantage of TSO. With the workaround for the ENETC MDIO erratum in place the Tx path of the driver is forced to lock/unlock for each skb sent. This is why, even though we are computing the checksum by hand we see the following improvement in TCP termination on the LS1028A SoC, on a single A72 core running at 1.3GHz: before: 1.63 Gbits/sec after: 2.34 Gbits/sec Signed-off-by: Ioana Ciornei Reviewed-by: Claudiu Manoil Reviewed-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/enetc/enetc.c | 321 ++++++++++++++++-- drivers/net/ethernet/freescale/enetc/enetc.h | 4 + .../net/ethernet/freescale/enetc/enetc_pf.c | 6 +- .../net/ethernet/freescale/enetc/enetc_vf.c | 6 +- 4 files changed, 312 insertions(+), 25 deletions(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index 17d8e04c10a8..09193b478ab3 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -8,6 +8,7 @@ #include #include #include +#include static int enetc_num_stack_tx_queues(struct enetc_ndev_priv *priv) { @@ -314,6 +315,255 @@ dma_err: return 0; } +static void enetc_map_tx_tso_hdr(struct enetc_bdr *tx_ring, struct sk_buff *skb, + struct enetc_tx_swbd *tx_swbd, + union enetc_tx_bd *txbd, int *i, int hdr_len, + int data_len) +{ + union enetc_tx_bd txbd_tmp; + u8 flags = 0, e_flags = 0; + dma_addr_t addr; + + enetc_clear_tx_bd(&txbd_tmp); + addr = tx_ring->tso_headers_dma + *i * TSO_HEADER_SIZE; + + if (skb_vlan_tag_present(skb)) + flags |= ENETC_TXBD_FLAGS_EX; + + txbd_tmp.addr = cpu_to_le64(addr); + txbd_tmp.buf_len = cpu_to_le16(hdr_len); + + /* first BD needs frm_len and offload flags set */ + txbd_tmp.frm_len = cpu_to_le16(hdr_len + data_len); + txbd_tmp.flags = flags; + + /* For the TSO header we do not set the dma address since we do not + * want it unmapped when we do cleanup. We still set len so that we + * count the bytes sent. + */ + tx_swbd->len = hdr_len; + tx_swbd->do_twostep_tstamp = false; + tx_swbd->check_wb = false; + + /* Actually write the header in the BD */ + *txbd = txbd_tmp; + + /* Add extension BD for VLAN */ + if (flags & ENETC_TXBD_FLAGS_EX) { + /* Get the next BD */ + enetc_bdr_idx_inc(tx_ring, i); + txbd = ENETC_TXBD(*tx_ring, *i); + tx_swbd = &tx_ring->tx_swbd[*i]; + prefetchw(txbd); + + /* Setup the VLAN fields */ + enetc_clear_tx_bd(&txbd_tmp); + txbd_tmp.ext.vid = cpu_to_le16(skb_vlan_tag_get(skb)); + txbd_tmp.ext.tpid = 0; /* < C-TAG */ + e_flags |= ENETC_TXBD_E_FLAGS_VLAN_INS; + + /* Write the BD */ + txbd_tmp.ext.e_flags = e_flags; + *txbd = txbd_tmp; + } +} + +static int enetc_map_tx_tso_data(struct enetc_bdr *tx_ring, struct sk_buff *skb, + struct enetc_tx_swbd *tx_swbd, + union enetc_tx_bd *txbd, char *data, + int size, bool last_bd) +{ + union enetc_tx_bd txbd_tmp; + dma_addr_t addr; + u8 flags = 0; + + enetc_clear_tx_bd(&txbd_tmp); + + addr = dma_map_single(tx_ring->dev, data, size, DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(tx_ring->dev, addr))) { + netdev_err(tx_ring->ndev, "DMA map error\n"); + return -ENOMEM; + } + + if (last_bd) { + flags |= ENETC_TXBD_FLAGS_F; + tx_swbd->is_eof = 1; + } + + txbd_tmp.addr = cpu_to_le64(addr); + txbd_tmp.buf_len = cpu_to_le16(size); + txbd_tmp.flags = flags; + + tx_swbd->dma = addr; + tx_swbd->len = size; + tx_swbd->dir = DMA_TO_DEVICE; + + *txbd = txbd_tmp; + + return 0; +} + +static __wsum enetc_tso_hdr_csum(struct tso_t *tso, struct sk_buff *skb, + char *hdr, int hdr_len, int *l4_hdr_len) +{ + char *l4_hdr = hdr + skb_transport_offset(skb); + int mac_hdr_len = skb_network_offset(skb); + + if (tso->tlen != sizeof(struct udphdr)) { + struct tcphdr *tcph = (struct tcphdr *)(l4_hdr); + + tcph->check = 0; + } else { + struct udphdr *udph = (struct udphdr *)(l4_hdr); + + udph->check = 0; + } + + /* Compute the IP checksum. This is necessary since tso_build_hdr() + * already incremented the IP ID field. + */ + if (!tso->ipv6) { + struct iphdr *iph = (void *)(hdr + mac_hdr_len); + + iph->check = 0; + iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); + } + + /* Compute the checksum over the L4 header. */ + *l4_hdr_len = hdr_len - skb_transport_offset(skb); + return csum_partial(l4_hdr, *l4_hdr_len, 0); +} + +static void enetc_tso_complete_csum(struct enetc_bdr *tx_ring, struct tso_t *tso, + struct sk_buff *skb, char *hdr, int len, + __wsum sum) +{ + char *l4_hdr = hdr + skb_transport_offset(skb); + __sum16 csum_final; + + /* Complete the L4 checksum by appending the pseudo-header to the + * already computed checksum. + */ + if (!tso->ipv6) + csum_final = csum_tcpudp_magic(ip_hdr(skb)->saddr, + ip_hdr(skb)->daddr, + len, ip_hdr(skb)->protocol, sum); + else + csum_final = csum_ipv6_magic(&ipv6_hdr(skb)->saddr, + &ipv6_hdr(skb)->daddr, + len, ipv6_hdr(skb)->nexthdr, sum); + + if (tso->tlen != sizeof(struct udphdr)) { + struct tcphdr *tcph = (struct tcphdr *)(l4_hdr); + + tcph->check = csum_final; + } else { + struct udphdr *udph = (struct udphdr *)(l4_hdr); + + udph->check = csum_final; + } +} + +static int enetc_map_tx_tso_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb) +{ + int hdr_len, total_len, data_len; + struct enetc_tx_swbd *tx_swbd; + union enetc_tx_bd *txbd; + struct tso_t tso; + __wsum csum, csum2; + int count = 0, pos; + int err, i, bd_data_num; + + /* Initialize the TSO handler, and prepare the first payload */ + hdr_len = tso_start(skb, &tso); + total_len = skb->len - hdr_len; + i = tx_ring->next_to_use; + + while (total_len > 0) { + char *hdr; + + /* Get the BD */ + txbd = ENETC_TXBD(*tx_ring, i); + tx_swbd = &tx_ring->tx_swbd[i]; + prefetchw(txbd); + + /* Determine the length of this packet */ + data_len = min_t(int, skb_shinfo(skb)->gso_size, total_len); + total_len -= data_len; + + /* prepare packet headers: MAC + IP + TCP */ + hdr = tx_ring->tso_headers + i * TSO_HEADER_SIZE; + tso_build_hdr(skb, hdr, &tso, data_len, total_len == 0); + + /* compute the csum over the L4 header */ + csum = enetc_tso_hdr_csum(&tso, skb, hdr, hdr_len, &pos); + enetc_map_tx_tso_hdr(tx_ring, skb, tx_swbd, txbd, &i, hdr_len, data_len); + bd_data_num = 0; + count++; + + while (data_len > 0) { + int size; + + size = min_t(int, tso.size, data_len); + + /* Advance the index in the BDR */ + enetc_bdr_idx_inc(tx_ring, &i); + txbd = ENETC_TXBD(*tx_ring, i); + tx_swbd = &tx_ring->tx_swbd[i]; + prefetchw(txbd); + + /* Compute the checksum over this segment of data and + * add it to the csum already computed (over the L4 + * header and possible other data segments). + */ + csum2 = csum_partial(tso.data, size, 0); + csum = csum_block_add(csum, csum2, pos); + pos += size; + + err = enetc_map_tx_tso_data(tx_ring, skb, tx_swbd, txbd, + tso.data, size, + size == data_len); + if (err) + goto err_map_data; + + data_len -= size; + count++; + bd_data_num++; + tso_build_data(skb, &tso, size); + + if (unlikely(bd_data_num >= ENETC_MAX_SKB_FRAGS && data_len)) + goto err_chained_bd; + } + + enetc_tso_complete_csum(tx_ring, &tso, skb, hdr, pos, csum); + + if (total_len == 0) + tx_swbd->skb = skb; + + /* Go to the next BD */ + enetc_bdr_idx_inc(tx_ring, &i); + } + + tx_ring->next_to_use = i; + enetc_update_tx_ring_tail(tx_ring); + + return count; + +err_map_data: + dev_err(tx_ring->dev, "DMA map error"); + +err_chained_bd: + do { + tx_swbd = &tx_ring->tx_swbd[i]; + enetc_free_tx_frame(tx_ring, tx_swbd); + if (i == 0) + i = tx_ring->bd_count; + i--; + } while (count--); + + return 0; +} + static netdev_tx_t enetc_start_xmit(struct sk_buff *skb, struct net_device *ndev) { @@ -332,26 +582,36 @@ static netdev_tx_t enetc_start_xmit(struct sk_buff *skb, tx_ring = priv->tx_ring[skb->queue_mapping]; - if (unlikely(skb_shinfo(skb)->nr_frags > ENETC_MAX_SKB_FRAGS)) - if (unlikely(skb_linearize(skb))) - goto drop_packet_err; + if (skb_is_gso(skb)) { + if (enetc_bd_unused(tx_ring) < tso_count_descs(skb)) { + netif_stop_subqueue(ndev, tx_ring->index); + return NETDEV_TX_BUSY; + } - count = skb_shinfo(skb)->nr_frags + 1; /* fragments + head */ - if (enetc_bd_unused(tx_ring) < ENETC_TXBDS_NEEDED(count)) { - netif_stop_subqueue(ndev, tx_ring->index); - return NETDEV_TX_BUSY; + enetc_lock_mdio(); + count = enetc_map_tx_tso_buffs(tx_ring, skb); + enetc_unlock_mdio(); + } else { + if (unlikely(skb_shinfo(skb)->nr_frags > ENETC_MAX_SKB_FRAGS)) + if (unlikely(skb_linearize(skb))) + goto drop_packet_err; + + count = skb_shinfo(skb)->nr_frags + 1; /* fragments + head */ + if (enetc_bd_unused(tx_ring) < ENETC_TXBDS_NEEDED(count)) { + netif_stop_subqueue(ndev, tx_ring->index); + return NETDEV_TX_BUSY; + } + + if (skb->ip_summed == CHECKSUM_PARTIAL) { + err = skb_checksum_help(skb); + if (err) + goto drop_packet_err; + } + enetc_lock_mdio(); + count = enetc_map_tx_buffs(tx_ring, skb); + enetc_unlock_mdio(); } - if (skb->ip_summed == CHECKSUM_PARTIAL) { - err = skb_checksum_help(skb); - if (err) - goto drop_packet_err; - } - - enetc_lock_mdio(); - count = enetc_map_tx_buffs(tx_ring, skb); - enetc_unlock_mdio(); - if (unlikely(!count)) goto drop_packet_err; @@ -1499,15 +1759,30 @@ static int enetc_alloc_txbdr(struct enetc_bdr *txr) return -ENOMEM; err = enetc_dma_alloc_bdr(txr, sizeof(union enetc_tx_bd)); - if (err) { - vfree(txr->tx_swbd); - return err; - } + if (err) + goto err_alloc_bdr; + + txr->tso_headers = dma_alloc_coherent(txr->dev, + txr->bd_count * TSO_HEADER_SIZE, + &txr->tso_headers_dma, + GFP_KERNEL); + if (err) + goto err_alloc_tso; txr->next_to_clean = 0; txr->next_to_use = 0; return 0; + +err_alloc_tso: + dma_free_coherent(txr->dev, txr->bd_count * sizeof(union enetc_tx_bd), + txr->bd_base, txr->bd_dma_base); + txr->bd_base = NULL; +err_alloc_bdr: + vfree(txr->tx_swbd); + txr->tx_swbd = NULL; + + return err; } static void enetc_free_txbdr(struct enetc_bdr *txr) @@ -1519,6 +1794,10 @@ static void enetc_free_txbdr(struct enetc_bdr *txr) size = txr->bd_count * sizeof(union enetc_tx_bd); + dma_free_coherent(txr->dev, txr->bd_count * TSO_HEADER_SIZE, + txr->tso_headers, txr->tso_headers_dma); + txr->tso_headers = NULL; + dma_free_coherent(txr->dev, size, txr->bd_base, txr->bd_dma_base); txr->bd_base = NULL; diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h index 08b283347d9c..fb39e406b7fc 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.h +++ b/drivers/net/ethernet/freescale/enetc/enetc.h @@ -112,6 +112,10 @@ struct enetc_bdr { dma_addr_t bd_dma_base; u8 tsd_enable; /* Time specific departure */ bool ext_en; /* enable h/w descriptor extensions */ + + /* DMA buffer for TSO headers */ + char *tso_headers; + dma_addr_t tso_headers_dma; } ____cacheline_aligned_in_smp; static inline void enetc_bdr_idx_inc(struct enetc_bdr *bdr, int *i) diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c index 1e3b2e191562..8281dd664f4e 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c @@ -760,11 +760,13 @@ static void enetc_pf_netdev_setup(struct enetc_si *si, struct net_device *ndev, ndev->hw_features = NETIF_F_SG | NETIF_F_RXCSUM | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_LOOPBACK | - NETIF_F_HW_CSUM; + NETIF_F_HW_CSUM | NETIF_F_TSO | NETIF_F_TSO6; ndev->features = NETIF_F_HIGHDMA | NETIF_F_SG | NETIF_F_RXCSUM | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | - NETIF_F_HW_CSUM; + NETIF_F_HW_CSUM | NETIF_F_TSO | NETIF_F_TSO6; + ndev->vlan_features = NETIF_F_SG | NETIF_F_HW_CSUM | + NETIF_F_TSO | NETIF_F_TSO6; if (si->num_rss) ndev->hw_features |= NETIF_F_RXHASH; diff --git a/drivers/net/ethernet/freescale/enetc/enetc_vf.c b/drivers/net/ethernet/freescale/enetc/enetc_vf.c index f2a0c0f9fe1d..df312c9f8243 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_vf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_vf.c @@ -123,11 +123,13 @@ static void enetc_vf_netdev_setup(struct enetc_si *si, struct net_device *ndev, ndev->hw_features = NETIF_F_SG | NETIF_F_RXCSUM | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | - NETIF_F_HW_CSUM; + NETIF_F_HW_CSUM | NETIF_F_TSO | NETIF_F_TSO6; ndev->features = NETIF_F_HIGHDMA | NETIF_F_SG | NETIF_F_RXCSUM | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | - NETIF_F_HW_CSUM; + NETIF_F_HW_CSUM | NETIF_F_TSO | NETIF_F_TSO6; + ndev->vlan_features = NETIF_F_SG | NETIF_F_HW_CSUM | + NETIF_F_TSO | NETIF_F_TSO6; if (si->num_rss) ndev->hw_features |= NETIF_F_RXHASH; From 75ea27d0d62281c31ee259c872dfdeb072cf5e39 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Thu, 7 Oct 2021 18:16:50 +0200 Subject: [PATCH 045/440] net: introduce a function to check if a netdev name is in use __dev_get_by_name is currently used to either retrieve a net device reference using its name or to check if a name is already used by a registered net device (per ns). In the later case there is no need to return a reference to a net device. Introduce a new helper, netdev_name_in_use, to check if a name is currently used by a registered net device without leaking a reference the corresponding net device. This helper uses netdev_name_node_lookup instead of __dev_get_by_name as we don't need the extra logic retrieving a reference to the corresponding net device. Signed-off-by: Antoine Tenart Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + net/core/dev.c | 14 ++++++++++---- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d79163208dfd..15f4a658e436 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2955,6 +2955,7 @@ struct net_device *__dev_get_by_flags(struct net *net, unsigned short flags, struct net_device *dev_get_by_name(struct net *net, const char *name); struct net_device *dev_get_by_name_rcu(struct net *net, const char *name); struct net_device *__dev_get_by_name(struct net *net, const char *name); +bool netdev_name_in_use(struct net *net, const char *name); int dev_alloc_name(struct net_device *dev, const char *name); int dev_open(struct net_device *dev, struct netlink_ext_ack *extack); void dev_close(struct net_device *dev); diff --git a/net/core/dev.c b/net/core/dev.c index 16ab09b6a7f8..1594cd2955ba 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -303,6 +303,12 @@ static struct netdev_name_node *netdev_name_node_lookup_rcu(struct net *net, return NULL; } +bool netdev_name_in_use(struct net *net, const char *name) +{ + return netdev_name_node_lookup(net, name); +} +EXPORT_SYMBOL(netdev_name_in_use); + int netdev_name_node_alt_create(struct net_device *dev, const char *name) { struct netdev_name_node *name_node; @@ -1133,7 +1139,7 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf) } snprintf(buf, IFNAMSIZ, name, i); - if (!__dev_get_by_name(net, buf)) + if (!netdev_name_in_use(net, buf)) return i; /* It is possible to run out of possible slots @@ -1187,7 +1193,7 @@ static int dev_get_valid_name(struct net *net, struct net_device *dev, if (strchr(name, '%')) return dev_alloc_name_ns(net, dev, name); - else if (__dev_get_by_name(net, name)) + else if (netdev_name_in_use(net, name)) return -EEXIST; else if (dev->name != name) strlcpy(dev->name, name, IFNAMSIZ); @@ -11153,7 +11159,7 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net, * we can use it in the destination network namespace. */ err = -EEXIST; - if (__dev_get_by_name(net, dev->name)) { + if (netdev_name_in_use(net, dev->name)) { /* We get here if we can't use the current device name */ if (!pat) goto out; @@ -11506,7 +11512,7 @@ static void __net_exit default_device_exit(struct net *net) /* Push remaining network devices to init_net */ snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex); - if (__dev_get_by_name(&init_net, fb_name)) + if (netdev_name_in_use(&init_net, fb_name)) snprintf(fb_name, IFNAMSIZ, "dev%%d"); err = dev_change_net_namespace(dev, &init_net, fb_name); if (err) { From caa9b35fadff57e6ee7499f2fb26f8abd83a133b Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Thu, 7 Oct 2021 18:16:51 +0200 Subject: [PATCH 046/440] bonding: use the correct function to check for netdev name collision A new helper to detect if a net device name is in use was added. Use it here as the return reference from __dev_get_by_name was discarded. Signed-off-by: Antoine Tenart Signed-off-by: David S. Miller --- drivers/net/bonding/bond_sysfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c index b9e9842fed94..c48b77167fab 100644 --- a/drivers/net/bonding/bond_sysfs.c +++ b/drivers/net/bonding/bond_sysfs.c @@ -811,8 +811,8 @@ int bond_create_sysfs(struct bond_net *bn) */ if (ret == -EEXIST) { /* Is someone being kinky and naming a device bonding_master? */ - if (__dev_get_by_name(bn->net, - class_attr_bonding_masters.attr.name)) + if (netdev_name_in_use(bn->net, + class_attr_bonding_masters.attr.name)) pr_err("network device named %s already exists in sysfs\n", class_attr_bonding_masters.attr.name); ret = 0; From d03eb9787d3aa74da0379cd34345686227580000 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Thu, 7 Oct 2021 18:16:52 +0200 Subject: [PATCH 047/440] ppp: use the correct function to check if a netdev name is in use A new helper to detect if a net device name is in use was added. Use it here as the return reference from __dev_get_by_name was discarded. Signed-off-by: Antoine Tenart Signed-off-by: David S. Miller --- drivers/net/ppp/ppp_generic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index fb52cd175b45..1180a0e2445f 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -1161,7 +1161,7 @@ static int ppp_unit_register(struct ppp *ppp, int unit, bool ifname_is_set) if (!ifname_is_set) { while (1) { snprintf(ppp->dev->name, IFNAMSIZ, "ppp%i", ret); - if (!__dev_get_by_name(ppp->ppp_net, ppp->dev->name)) + if (!netdev_name_in_use(ppp->ppp_net, ppp->dev->name)) break; unit_put(&pn->units_idr, ret); ret = unit_get(&pn->units_idr, ppp, ret + 1); From 0316c7e66bbd16cf2d01a4e2f5afa6afb01278f2 Mon Sep 17 00:00:00 2001 From: Francesco Dolcini Date: Thu, 7 Oct 2021 18:45:35 +0200 Subject: [PATCH 048/440] net: phy: micrel: ksz9131 led errata workaround Micrel KSZ9131 PHY LED behavior is not correct when configured in Individual Mode, LED1 (Activity LED) is in the ON state when there is no-link. Workaround this by setting bit 9 of register 0x1e after verifying that the LED configuration is Individual Mode. This issue is described in KSZ9131RNX Silicon Errata DS80000693B [*] and according to that it will not be corrected in a future silicon revision. [*] https://ww1.microchip.com/downloads/en/DeviceDoc/KSZ9131RNX-Silicon-Errata-and-Data-Sheet-Clarification-80000863B.pdf Signed-off-by: Francesco Dolcini Reviewed-by: Philippe Schenker Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/micrel.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index c330a5a9f665..b70f62efdbc3 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -1003,6 +1003,26 @@ static int ksz9131_config_rgmii_delay(struct phy_device *phydev) txcdll_val); } +/* Silicon Errata DS80000693B + * + * When LEDs are configured in Individual Mode, LED1 is ON in a no-link + * condition. Workaround is to set register 0x1e, bit 9, this way LED1 behaves + * according to the datasheet (off if there is no link). + */ +static int ksz9131_led_errata(struct phy_device *phydev) +{ + int reg; + + reg = phy_read_mmd(phydev, 2, 0); + if (reg < 0) + return reg; + + if (!(reg & BIT(4))) + return 0; + + return phy_set_bits(phydev, 0x1e, BIT(9)); +} + static int ksz9131_config_init(struct phy_device *phydev) { struct device_node *of_node; @@ -1058,6 +1078,10 @@ static int ksz9131_config_init(struct phy_device *phydev) if (ret < 0) return ret; + ret = ksz9131_led_errata(phydev); + if (ret < 0) + return ret; + return 0; } From e761523d0b407518d4812ada39957bd11227e95b Mon Sep 17 00:00:00 2001 From: Prabhakar Kushwaha Date: Thu, 7 Oct 2021 18:52:38 +0300 Subject: [PATCH 049/440] qed: Fix compilation for CONFIG_QED_SRIOV undefined scenario MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch fixes below compliation error in case CONFIG_QED_SRIOV not defined. drivers/net/ethernet/qlogic/qed/qed_dev.c: In function ‘qed_fw_err_handler’: drivers/net/ethernet/qlogic/qed/qed_dev.c:2390:3: error: implicit declaration of function ‘qed_sriov_vfpf_malicious’; did you mean ‘qed_iov_vf_task’? [-Werror=implicit-function-declaration] qed_sriov_vfpf_malicious(p_hwfn, &data->err_data); ^~~~~~~~~~~~~~~~~~~~~~~~ qed_iov_vf_task drivers/net/ethernet/qlogic/qed/qed_dev.c: In function ‘qed_common_eqe_event’: drivers/net/ethernet/qlogic/qed/qed_dev.c:2410:10: error: implicit declaration of function ‘qed_sriov_eqe_event’; did you mean ‘qed_common_eqe_event’? [-Werror=implicit-function-declaration] return qed_sriov_eqe_event(p_hwfn, opcode, echo, data, ^~~~~~~~~~~~~~~~~~~ qed_common_eqe_event Fixes: fe40a830dcde ("qed: Update qed_hsi.h for fw 8.59.1.0") Reported-by: Linux Kernel Functional Testing Cc: Naresh Kamboju Signed-off-by: Ariel Elior Signed-off-by: Shai Malin Signed-off-by: Omkar Kulkarni Signed-off-by: Prabhakar Kushwaha Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_sriov.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.h b/drivers/net/ethernet/qlogic/qed/qed_sriov.h index 1edf9c44dc67..f448e3dd6c8b 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sriov.h +++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.h @@ -478,6 +478,18 @@ static inline int qed_sriov_disable(struct qed_dev *cdev, bool pci_enabled) static inline void qed_inform_vf_link_state(struct qed_hwfn *hwfn) { } + +static inline void qed_sriov_vfpf_malicious(struct qed_hwfn *p_hwfn, + struct fw_err_data *p_data) +{ +} + +static inline int qed_sriov_eqe_event(struct qed_hwfn *p_hwfn, u8 opcode, + __le16 echo, union event_ring_data *data, + u8 fw_return_code) +{ + return 0; +} #endif #define qed_for_each_vf(_p_hwfn, _i) \ From 6ed3f61e32007298a0dac406ad6d4e4b30cb3d54 Mon Sep 17 00:00:00 2001 From: Jean Sacren Date: Fri, 8 Oct 2021 00:31:47 -0600 Subject: [PATCH 050/440] net: tg3: fix redundant check of true expression Remove the redundant check of (tg3_asic_rev(tp) == ASIC_REV_5705) after it is checked to be true. Signed-off-by: Jean Sacren Reviewed-by: Michael Chan Link: https://lore.kernel.org/r/20211008063147.1421-1-sakiwit@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/tg3.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 5a50ea75094f..32d486114a6d 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -10273,8 +10273,7 @@ static int tg3_reset_hw(struct tg3 *tp, bool reset_phy) if (tg3_asic_rev(tp) == ASIC_REV_5705 && tg3_chip_rev_id(tp) != CHIPREV_ID_5705_A0) { - if (tg3_flag(tp, TSO_CAPABLE) && - tg3_asic_rev(tp) == ASIC_REV_5705) { + if (tg3_flag(tp, TSO_CAPABLE)) { rdmac_mode |= RDMAC_MODE_FIFO_SIZE_128; } else if (!(tr32(TG3PCI_PCISTATE) & PCISTATE_BUS_SPEED_HIGH) && !tg3_flag(tp, IS_5788)) { From e506342a03c777a8d56389ff2764617648562bcf Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Fri, 8 Oct 2021 17:17:45 +0800 Subject: [PATCH 051/440] selftests/tls: add SM4 GCM/CCM to tls selftests Add new cipher as a variant of standard tls selftests. Signed-off-by: Tianjia Zhang Link: https://lore.kernel.org/r/20211008091745.42917-1-tianjia.zhang@linux.alibaba.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/tls.c | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index 97fceb9be9ed..d3047e251fe9 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -29,6 +29,8 @@ struct tls_crypto_info_keys { union { struct tls12_crypto_info_aes_gcm_128 aes128; struct tls12_crypto_info_chacha20_poly1305 chacha20; + struct tls12_crypto_info_sm4_gcm sm4gcm; + struct tls12_crypto_info_sm4_ccm sm4ccm; }; size_t len; }; @@ -49,6 +51,16 @@ static void tls_crypto_info_init(uint16_t tls_version, uint16_t cipher_type, tls12->aes128.info.version = tls_version; tls12->aes128.info.cipher_type = cipher_type; break; + case TLS_CIPHER_SM4_GCM: + tls12->len = sizeof(struct tls12_crypto_info_sm4_gcm); + tls12->sm4gcm.info.version = tls_version; + tls12->sm4gcm.info.cipher_type = cipher_type; + break; + case TLS_CIPHER_SM4_CCM: + tls12->len = sizeof(struct tls12_crypto_info_sm4_ccm); + tls12->sm4ccm.info.version = tls_version; + tls12->sm4ccm.info.cipher_type = cipher_type; + break; default: break; } @@ -148,13 +160,13 @@ FIXTURE_VARIANT(tls) uint16_t cipher_type; }; -FIXTURE_VARIANT_ADD(tls, 12_gcm) +FIXTURE_VARIANT_ADD(tls, 12_aes_gcm) { .tls_version = TLS_1_2_VERSION, .cipher_type = TLS_CIPHER_AES_GCM_128, }; -FIXTURE_VARIANT_ADD(tls, 13_gcm) +FIXTURE_VARIANT_ADD(tls, 13_aes_gcm) { .tls_version = TLS_1_3_VERSION, .cipher_type = TLS_CIPHER_AES_GCM_128, @@ -172,6 +184,18 @@ FIXTURE_VARIANT_ADD(tls, 13_chacha) .cipher_type = TLS_CIPHER_CHACHA20_POLY1305, }; +FIXTURE_VARIANT_ADD(tls, 13_sm4_gcm) +{ + .tls_version = TLS_1_3_VERSION, + .cipher_type = TLS_CIPHER_SM4_GCM, +}; + +FIXTURE_VARIANT_ADD(tls, 13_sm4_ccm) +{ + .tls_version = TLS_1_3_VERSION, + .cipher_type = TLS_CIPHER_SM4_CCM, +}; + FIXTURE_SETUP(tls) { struct tls_crypto_info_keys tls12; From f12e658c620a925aba4caead54a05eb157728863 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Fri, 8 Oct 2021 16:23:15 +0300 Subject: [PATCH 052/440] mlxsw: item: Annotate item helpers with '__maybe_unused' mlxsw is using helpers to get / set fields in messages exchanged with the device. It is possible that some fields are only set or only get. This causes LLVM to emit warnings such as the following when building with W=1 [1]: drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c:2022:1: warning: unused function 'mlxsw_afa_sampler_mirror_agent_get' The fact that some fields are only set or only get is very much intentional and not indicative of functions that need to be removed. Therefore, annotate the item helpers with '__maybe_unused' to suppress these warnings. [1] https://lkml.org/lkml/2021/9/29/685 Cc: Nathan Chancellor Reported-by: kernel test robot Signed-off-by: Ido Schimmel Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20211008132315.90211-1-idosch@idosch.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/item.h | 56 ++++++++++++---------- 1 file changed, 32 insertions(+), 24 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/item.h b/drivers/net/ethernet/mellanox/mlxsw/item.h index e92cadc98128..ab70a873a01a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/item.h +++ b/drivers/net/ethernet/mellanox/mlxsw/item.h @@ -270,11 +270,13 @@ static struct mlxsw_item __ITEM_NAME(_type, _cname, _iname) = { \ .size = {.bits = _sizebits,}, \ .name = #_type "_" #_cname "_" #_iname, \ }; \ -static inline u8 mlxsw_##_type##_##_cname##_##_iname##_get(const char *buf) \ +static inline u8 __maybe_unused \ +mlxsw_##_type##_##_cname##_##_iname##_get(const char *buf) \ { \ return __mlxsw_item_get8(buf, &__ITEM_NAME(_type, _cname, _iname), 0); \ } \ -static inline void mlxsw_##_type##_##_cname##_##_iname##_set(char *buf, u8 val)\ +static inline void __maybe_unused \ +mlxsw_##_type##_##_cname##_##_iname##_set(char *buf, u8 val) \ { \ __mlxsw_item_set8(buf, &__ITEM_NAME(_type, _cname, _iname), 0, val); \ } @@ -290,13 +292,13 @@ static struct mlxsw_item __ITEM_NAME(_type, _cname, _iname) = { \ .size = {.bits = _sizebits,}, \ .name = #_type "_" #_cname "_" #_iname, \ }; \ -static inline u8 \ +static inline u8 __maybe_unused \ mlxsw_##_type##_##_cname##_##_iname##_get(const char *buf, unsigned short index)\ { \ return __mlxsw_item_get8(buf, &__ITEM_NAME(_type, _cname, _iname), \ index); \ } \ -static inline void \ +static inline void __maybe_unused \ mlxsw_##_type##_##_cname##_##_iname##_set(char *buf, unsigned short index, \ u8 val) \ { \ @@ -311,11 +313,13 @@ static struct mlxsw_item __ITEM_NAME(_type, _cname, _iname) = { \ .size = {.bits = _sizebits,}, \ .name = #_type "_" #_cname "_" #_iname, \ }; \ -static inline u16 mlxsw_##_type##_##_cname##_##_iname##_get(const char *buf) \ +static inline u16 __maybe_unused \ +mlxsw_##_type##_##_cname##_##_iname##_get(const char *buf) \ { \ return __mlxsw_item_get16(buf, &__ITEM_NAME(_type, _cname, _iname), 0); \ } \ -static inline void mlxsw_##_type##_##_cname##_##_iname##_set(char *buf, u16 val)\ +static inline void __maybe_unused \ +mlxsw_##_type##_##_cname##_##_iname##_set(char *buf, u16 val) \ { \ __mlxsw_item_set16(buf, &__ITEM_NAME(_type, _cname, _iname), 0, val); \ } @@ -331,13 +335,13 @@ static struct mlxsw_item __ITEM_NAME(_type, _cname, _iname) = { \ .size = {.bits = _sizebits,}, \ .name = #_type "_" #_cname "_" #_iname, \ }; \ -static inline u16 \ +static inline u16 __maybe_unused \ mlxsw_##_type##_##_cname##_##_iname##_get(const char *buf, unsigned short index)\ { \ return __mlxsw_item_get16(buf, &__ITEM_NAME(_type, _cname, _iname), \ index); \ } \ -static inline void \ +static inline void __maybe_unused \ mlxsw_##_type##_##_cname##_##_iname##_set(char *buf, unsigned short index, \ u16 val) \ { \ @@ -352,11 +356,13 @@ static struct mlxsw_item __ITEM_NAME(_type, _cname, _iname) = { \ .size = {.bits = _sizebits,}, \ .name = #_type "_" #_cname "_" #_iname, \ }; \ -static inline u32 mlxsw_##_type##_##_cname##_##_iname##_get(const char *buf) \ +static inline u32 __maybe_unused \ +mlxsw_##_type##_##_cname##_##_iname##_get(const char *buf) \ { \ return __mlxsw_item_get32(buf, &__ITEM_NAME(_type, _cname, _iname), 0); \ } \ -static inline void mlxsw_##_type##_##_cname##_##_iname##_set(char *buf, u32 val)\ +static inline void __maybe_unused \ +mlxsw_##_type##_##_cname##_##_iname##_set(char *buf, u32 val) \ { \ __mlxsw_item_set32(buf, &__ITEM_NAME(_type, _cname, _iname), 0, val); \ } @@ -372,13 +378,13 @@ static struct mlxsw_item __ITEM_NAME(_type, _cname, _iname) = { \ .size = {.bits = _sizebits,}, \ .name = #_type "_" #_cname "_" #_iname, \ }; \ -static inline u32 \ +static inline u32 __maybe_unused \ mlxsw_##_type##_##_cname##_##_iname##_get(const char *buf, unsigned short index)\ { \ return __mlxsw_item_get32(buf, &__ITEM_NAME(_type, _cname, _iname), \ index); \ } \ -static inline void \ +static inline void __maybe_unused \ mlxsw_##_type##_##_cname##_##_iname##_set(char *buf, unsigned short index, \ u32 val) \ { \ @@ -393,11 +399,13 @@ static struct mlxsw_item __ITEM_NAME(_type, _cname, _iname) = { \ .size = {.bits = _sizebits,}, \ .name = #_type "_" #_cname "_" #_iname, \ }; \ -static inline u64 mlxsw_##_type##_##_cname##_##_iname##_get(const char *buf) \ +static inline u64 __maybe_unused \ +mlxsw_##_type##_##_cname##_##_iname##_get(const char *buf) \ { \ return __mlxsw_item_get64(buf, &__ITEM_NAME(_type, _cname, _iname), 0); \ } \ -static inline void mlxsw_##_type##_##_cname##_##_iname##_set(char *buf, u64 val)\ +static inline void __maybe_unused \ +mlxsw_##_type##_##_cname##_##_iname##_set(char *buf, u64 val) \ { \ __mlxsw_item_set64(buf, &__ITEM_NAME(_type, _cname, _iname), 0, val); \ } @@ -413,13 +421,13 @@ static struct mlxsw_item __ITEM_NAME(_type, _cname, _iname) = { \ .size = {.bits = _sizebits,}, \ .name = #_type "_" #_cname "_" #_iname, \ }; \ -static inline u64 \ +static inline u64 __maybe_unused \ mlxsw_##_type##_##_cname##_##_iname##_get(const char *buf, unsigned short index)\ { \ return __mlxsw_item_get64(buf, &__ITEM_NAME(_type, _cname, _iname), \ index); \ } \ -static inline void \ +static inline void __maybe_unused \ mlxsw_##_type##_##_cname##_##_iname##_set(char *buf, unsigned short index, \ u64 val) \ { \ @@ -433,19 +441,19 @@ static struct mlxsw_item __ITEM_NAME(_type, _cname, _iname) = { \ .size = {.bytes = _sizebytes,}, \ .name = #_type "_" #_cname "_" #_iname, \ }; \ -static inline void \ +static inline void __maybe_unused \ mlxsw_##_type##_##_cname##_##_iname##_memcpy_from(const char *buf, char *dst) \ { \ __mlxsw_item_memcpy_from(buf, dst, \ &__ITEM_NAME(_type, _cname, _iname), 0); \ } \ -static inline void \ +static inline void __maybe_unused \ mlxsw_##_type##_##_cname##_##_iname##_memcpy_to(char *buf, const char *src) \ { \ __mlxsw_item_memcpy_to(buf, src, \ &__ITEM_NAME(_type, _cname, _iname), 0); \ } \ -static inline char * \ +static inline char * __maybe_unused \ mlxsw_##_type##_##_cname##_##_iname##_data(char *buf) \ { \ return __mlxsw_item_data(buf, &__ITEM_NAME(_type, _cname, _iname), 0); \ @@ -460,7 +468,7 @@ static struct mlxsw_item __ITEM_NAME(_type, _cname, _iname) = { \ .size = {.bytes = _sizebytes,}, \ .name = #_type "_" #_cname "_" #_iname, \ }; \ -static inline void \ +static inline void __maybe_unused \ mlxsw_##_type##_##_cname##_##_iname##_memcpy_from(const char *buf, \ unsigned short index, \ char *dst) \ @@ -468,7 +476,7 @@ mlxsw_##_type##_##_cname##_##_iname##_memcpy_from(const char *buf, \ __mlxsw_item_memcpy_from(buf, dst, \ &__ITEM_NAME(_type, _cname, _iname), index); \ } \ -static inline void \ +static inline void __maybe_unused \ mlxsw_##_type##_##_cname##_##_iname##_memcpy_to(char *buf, \ unsigned short index, \ const char *src) \ @@ -476,7 +484,7 @@ mlxsw_##_type##_##_cname##_##_iname##_memcpy_to(char *buf, \ __mlxsw_item_memcpy_to(buf, src, \ &__ITEM_NAME(_type, _cname, _iname), index); \ } \ -static inline char * \ +static inline char * __maybe_unused \ mlxsw_##_type##_##_cname##_##_iname##_data(char *buf, unsigned short index) \ { \ return __mlxsw_item_data(buf, \ @@ -491,14 +499,14 @@ static struct mlxsw_item __ITEM_NAME(_type, _cname, _iname) = { \ .size = {.bytes = _sizebytes,}, \ .name = #_type "_" #_cname "_" #_iname, \ }; \ -static inline u8 \ +static inline u8 __maybe_unused \ mlxsw_##_type##_##_cname##_##_iname##_get(const char *buf, u16 index) \ { \ return __mlxsw_item_bit_array_get(buf, \ &__ITEM_NAME(_type, _cname, _iname), \ index); \ } \ -static inline void \ +static inline void __maybe_unused \ mlxsw_##_type##_##_cname##_##_iname##_set(char *buf, u16 index, u8 val) \ { \ return __mlxsw_item_bit_array_set(buf, \ From 2b37367065c7d863c087e6ee4b5bf4d97019265e Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 8 Oct 2021 10:59:09 -0700 Subject: [PATCH 053/440] ethernet: forcedeth: remove direct netdev->dev_addr writes forcedeth writes to dev_addr byte by byte, make it use a local buffer instead. Commit the changes with eth_hw_addr_set() at the end. Signed-off-by: Jakub Kicinski Reviewed-by: Zhu Yanjun Signed-off-by: David S. Miller --- drivers/net/ethernet/nvidia/forcedeth.c | 49 +++++++++++++------------ 1 file changed, 26 insertions(+), 23 deletions(-) diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c index 3f269f914dac..9b530d7509a4 100644 --- a/drivers/net/ethernet/nvidia/forcedeth.c +++ b/drivers/net/ethernet/nvidia/forcedeth.c @@ -5711,6 +5711,7 @@ static int nv_probe(struct pci_dev *pci_dev, const struct pci_device_id *id) u32 phystate_orig = 0, phystate; int phyinitialized = 0; static int printed_version; + u8 mac[ETH_ALEN]; if (!printed_version++) pr_info("Reverse Engineered nForce ethernet driver. Version %s.\n", @@ -5884,50 +5885,52 @@ static int nv_probe(struct pci_dev *pci_dev, const struct pci_device_id *id) txreg = readl(base + NvRegTransmitPoll); if (id->driver_data & DEV_HAS_CORRECT_MACADDR) { /* mac address is already in correct order */ - dev->dev_addr[0] = (np->orig_mac[0] >> 0) & 0xff; - dev->dev_addr[1] = (np->orig_mac[0] >> 8) & 0xff; - dev->dev_addr[2] = (np->orig_mac[0] >> 16) & 0xff; - dev->dev_addr[3] = (np->orig_mac[0] >> 24) & 0xff; - dev->dev_addr[4] = (np->orig_mac[1] >> 0) & 0xff; - dev->dev_addr[5] = (np->orig_mac[1] >> 8) & 0xff; + mac[0] = (np->orig_mac[0] >> 0) & 0xff; + mac[1] = (np->orig_mac[0] >> 8) & 0xff; + mac[2] = (np->orig_mac[0] >> 16) & 0xff; + mac[3] = (np->orig_mac[0] >> 24) & 0xff; + mac[4] = (np->orig_mac[1] >> 0) & 0xff; + mac[5] = (np->orig_mac[1] >> 8) & 0xff; } else if (txreg & NVREG_TRANSMITPOLL_MAC_ADDR_REV) { /* mac address is already in correct order */ - dev->dev_addr[0] = (np->orig_mac[0] >> 0) & 0xff; - dev->dev_addr[1] = (np->orig_mac[0] >> 8) & 0xff; - dev->dev_addr[2] = (np->orig_mac[0] >> 16) & 0xff; - dev->dev_addr[3] = (np->orig_mac[0] >> 24) & 0xff; - dev->dev_addr[4] = (np->orig_mac[1] >> 0) & 0xff; - dev->dev_addr[5] = (np->orig_mac[1] >> 8) & 0xff; + mac[0] = (np->orig_mac[0] >> 0) & 0xff; + mac[1] = (np->orig_mac[0] >> 8) & 0xff; + mac[2] = (np->orig_mac[0] >> 16) & 0xff; + mac[3] = (np->orig_mac[0] >> 24) & 0xff; + mac[4] = (np->orig_mac[1] >> 0) & 0xff; + mac[5] = (np->orig_mac[1] >> 8) & 0xff; /* * Set orig mac address back to the reversed version. * This flag will be cleared during low power transition. * Therefore, we should always put back the reversed address. */ - np->orig_mac[0] = (dev->dev_addr[5] << 0) + (dev->dev_addr[4] << 8) + - (dev->dev_addr[3] << 16) + (dev->dev_addr[2] << 24); - np->orig_mac[1] = (dev->dev_addr[1] << 0) + (dev->dev_addr[0] << 8); + np->orig_mac[0] = (mac[5] << 0) + (mac[4] << 8) + + (mac[3] << 16) + (mac[2] << 24); + np->orig_mac[1] = (mac[1] << 0) + (mac[0] << 8); } else { /* need to reverse mac address to correct order */ - dev->dev_addr[0] = (np->orig_mac[1] >> 8) & 0xff; - dev->dev_addr[1] = (np->orig_mac[1] >> 0) & 0xff; - dev->dev_addr[2] = (np->orig_mac[0] >> 24) & 0xff; - dev->dev_addr[3] = (np->orig_mac[0] >> 16) & 0xff; - dev->dev_addr[4] = (np->orig_mac[0] >> 8) & 0xff; - dev->dev_addr[5] = (np->orig_mac[0] >> 0) & 0xff; + mac[0] = (np->orig_mac[1] >> 8) & 0xff; + mac[1] = (np->orig_mac[1] >> 0) & 0xff; + mac[2] = (np->orig_mac[0] >> 24) & 0xff; + mac[3] = (np->orig_mac[0] >> 16) & 0xff; + mac[4] = (np->orig_mac[0] >> 8) & 0xff; + mac[5] = (np->orig_mac[0] >> 0) & 0xff; writel(txreg|NVREG_TRANSMITPOLL_MAC_ADDR_REV, base + NvRegTransmitPoll); dev_dbg(&pci_dev->dev, "%s: set workaround bit for reversed mac addr\n", __func__); } - if (!is_valid_ether_addr(dev->dev_addr)) { + if (is_valid_ether_addr(mac)) { + eth_hw_addr_set(dev, mac); + } else { /* * Bad mac address. At least one bios sets the mac address * to 01:23:45:67:89:ab */ dev_err(&pci_dev->dev, "Invalid MAC address detected: %pM - Please complain to your hardware vendor.\n", - dev->dev_addr); + mac); eth_hw_addr_random(dev); dev_err(&pci_dev->dev, "Using random MAC address: %pM\n", dev->dev_addr); From a04436b27a93421634455455174e917dd63887c1 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 8 Oct 2021 10:59:10 -0700 Subject: [PATCH 054/440] ethernet: tg3: remove direct netdev->dev_addr writes tg3 does various forms of direct writes to netdev->dev_addr. Use a local buffer. Make sure local buffer is aligned since eth_platform_get_mac_address() may call ether_addr_copy(). tg3_get_device_address() returns whenever it finds a method that found a valid address. Instead of modifying all the exit points pass the buffer from the outside and commit the address in the caller. Constify the argument of the set addr helper. Signed-off-by: Jakub Kicinski Reviewed-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/tg3.c | 48 +++++++++++++++-------------- 1 file changed, 25 insertions(+), 23 deletions(-) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 32d486114a6d..e9518b98914b 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -3942,7 +3942,8 @@ static int tg3_load_tso_firmware(struct tg3 *tp) } /* tp->lock is held. */ -static void __tg3_set_one_mac_addr(struct tg3 *tp, u8 *mac_addr, int index) +static void __tg3_set_one_mac_addr(struct tg3 *tp, const u8 *mac_addr, + int index) { u32 addr_high, addr_low; @@ -16910,19 +16911,18 @@ static int tg3_get_invariants(struct tg3 *tp, const struct pci_device_id *ent) return err; } -static int tg3_get_device_address(struct tg3 *tp) +static int tg3_get_device_address(struct tg3 *tp, u8 *addr) { - struct net_device *dev = tp->dev; u32 hi, lo, mac_offset; int addr_ok = 0; int err; - if (!eth_platform_get_mac_address(&tp->pdev->dev, dev->dev_addr)) + if (!eth_platform_get_mac_address(&tp->pdev->dev, addr)) return 0; if (tg3_flag(tp, IS_SSB_CORE)) { - err = ssb_gige_get_macaddr(tp->pdev, &dev->dev_addr[0]); - if (!err && is_valid_ether_addr(&dev->dev_addr[0])) + err = ssb_gige_get_macaddr(tp->pdev, addr); + if (!err && is_valid_ether_addr(addr)) return 0; } @@ -16946,41 +16946,41 @@ static int tg3_get_device_address(struct tg3 *tp) /* First try to get it from MAC address mailbox. */ tg3_read_mem(tp, NIC_SRAM_MAC_ADDR_HIGH_MBOX, &hi); if ((hi >> 16) == 0x484b) { - dev->dev_addr[0] = (hi >> 8) & 0xff; - dev->dev_addr[1] = (hi >> 0) & 0xff; + addr[0] = (hi >> 8) & 0xff; + addr[1] = (hi >> 0) & 0xff; tg3_read_mem(tp, NIC_SRAM_MAC_ADDR_LOW_MBOX, &lo); - dev->dev_addr[2] = (lo >> 24) & 0xff; - dev->dev_addr[3] = (lo >> 16) & 0xff; - dev->dev_addr[4] = (lo >> 8) & 0xff; - dev->dev_addr[5] = (lo >> 0) & 0xff; + addr[2] = (lo >> 24) & 0xff; + addr[3] = (lo >> 16) & 0xff; + addr[4] = (lo >> 8) & 0xff; + addr[5] = (lo >> 0) & 0xff; /* Some old bootcode may report a 0 MAC address in SRAM */ - addr_ok = is_valid_ether_addr(&dev->dev_addr[0]); + addr_ok = is_valid_ether_addr(addr); } if (!addr_ok) { /* Next, try NVRAM. */ if (!tg3_flag(tp, NO_NVRAM) && !tg3_nvram_read_be32(tp, mac_offset + 0, &hi) && !tg3_nvram_read_be32(tp, mac_offset + 4, &lo)) { - memcpy(&dev->dev_addr[0], ((char *)&hi) + 2, 2); - memcpy(&dev->dev_addr[2], (char *)&lo, sizeof(lo)); + memcpy(&addr[0], ((char *)&hi) + 2, 2); + memcpy(&addr[2], (char *)&lo, sizeof(lo)); } /* Finally just fetch it out of the MAC control regs. */ else { hi = tr32(MAC_ADDR_0_HIGH); lo = tr32(MAC_ADDR_0_LOW); - dev->dev_addr[5] = lo & 0xff; - dev->dev_addr[4] = (lo >> 8) & 0xff; - dev->dev_addr[3] = (lo >> 16) & 0xff; - dev->dev_addr[2] = (lo >> 24) & 0xff; - dev->dev_addr[1] = hi & 0xff; - dev->dev_addr[0] = (hi >> 8) & 0xff; + addr[5] = lo & 0xff; + addr[4] = (lo >> 8) & 0xff; + addr[3] = (lo >> 16) & 0xff; + addr[2] = (lo >> 24) & 0xff; + addr[1] = hi & 0xff; + addr[0] = (hi >> 8) & 0xff; } } - if (!is_valid_ether_addr(&dev->dev_addr[0])) + if (!is_valid_ether_addr(addr)) return -EINVAL; return 0; } @@ -17556,6 +17556,7 @@ static int tg3_init_one(struct pci_dev *pdev, char str[40]; u64 dma_mask, persist_dma_mask; netdev_features_t features = 0; + u8 addr[ETH_ALEN] __aligned(2); err = pci_enable_device(pdev); if (err) { @@ -17778,12 +17779,13 @@ static int tg3_init_one(struct pci_dev *pdev, tp->rx_pending = 63; } - err = tg3_get_device_address(tp); + err = tg3_get_device_address(tp, addr); if (err) { dev_err(&pdev->dev, "Could not obtain valid ethernet address, aborting\n"); goto err_out_apeunmap; } + eth_hw_addr_set(dev, addr); intmbx = MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW; rcvmbx = MAILBOX_RCVRET_CON_IDX_0 + TG3_64BIT_REG_LOW; From ca87931755648655e4b57806deefe5c5d38024de Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 8 Oct 2021 10:59:11 -0700 Subject: [PATCH 055/440] ethernet: tulip: remove direct netdev->dev_addr writes Consify the casts of netdev->dev_addr. Convert pointless to eth_hw_addr_set() where possible. Use local buffers in a number of places. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/dec/tulip/de2104x.c | 15 +++++---- drivers/net/ethernet/dec/tulip/de4x5.c | 35 ++++++++++--------- drivers/net/ethernet/dec/tulip/dmfe.c | 9 +++-- drivers/net/ethernet/dec/tulip/tulip_core.c | 37 ++++++++++++--------- drivers/net/ethernet/dec/tulip/uli526x.c | 11 +++--- drivers/net/ethernet/dec/tulip/xircom_cb.c | 4 ++- 6 files changed, 61 insertions(+), 50 deletions(-) diff --git a/drivers/net/ethernet/dec/tulip/de2104x.c b/drivers/net/ethernet/dec/tulip/de2104x.c index 117c26fa5909..1e3c90c3c0ed 100644 --- a/drivers/net/ethernet/dec/tulip/de2104x.c +++ b/drivers/net/ethernet/dec/tulip/de2104x.c @@ -666,8 +666,8 @@ static void build_setup_frame_hash(u16 *setup_frm, struct net_device *dev) struct de_private *de = netdev_priv(dev); u16 hash_table[32]; struct netdev_hw_addr *ha; + const u16 *eaddrs; int i; - u16 *eaddrs; memset(hash_table, 0, sizeof(hash_table)); __set_bit_le(255, hash_table); /* Broadcast entry */ @@ -685,7 +685,7 @@ static void build_setup_frame_hash(u16 *setup_frm, struct net_device *dev) setup_frm = &de->setup_frame[13*6]; /* Fill the final entry with our physical address. */ - eaddrs = (u16 *)dev->dev_addr; + eaddrs = (const u16 *)dev->dev_addr; *setup_frm++ = eaddrs[0]; *setup_frm++ = eaddrs[0]; *setup_frm++ = eaddrs[1]; *setup_frm++ = eaddrs[1]; *setup_frm++ = eaddrs[2]; *setup_frm++ = eaddrs[2]; @@ -695,7 +695,7 @@ static void build_setup_frame_perfect(u16 *setup_frm, struct net_device *dev) { struct de_private *de = netdev_priv(dev); struct netdev_hw_addr *ha; - u16 *eaddrs; + const u16 *eaddrs; /* We have <= 14 addresses so we can use the wonderful 16 address perfect filtering of the Tulip. */ @@ -710,7 +710,7 @@ static void build_setup_frame_perfect(u16 *setup_frm, struct net_device *dev) setup_frm = &de->setup_frame[15*6]; /* Fill the final entry with our physical address. */ - eaddrs = (u16 *)dev->dev_addr; + eaddrs = (const u16 *)dev->dev_addr; *setup_frm++ = eaddrs[0]; *setup_frm++ = eaddrs[0]; *setup_frm++ = eaddrs[1]; *setup_frm++ = eaddrs[1]; *setup_frm++ = eaddrs[2]; *setup_frm++ = eaddrs[2]; @@ -1713,6 +1713,7 @@ static const struct ethtool_ops de_ethtool_ops = { static void de21040_get_mac_address(struct de_private *de) { + u8 addr[ETH_ALEN]; unsigned i; dw32 (ROMCmd, 0); /* Reset the pointer with a dummy write. */ @@ -1724,12 +1725,13 @@ static void de21040_get_mac_address(struct de_private *de) value = dr32(ROMCmd); rmb(); } while (value < 0 && --boguscnt > 0); - de->dev->dev_addr[i] = value; + addr[i] = value; udelay(1); if (boguscnt <= 0) pr_warn("timeout reading 21040 MAC address byte %u\n", i); } + eth_hw_addr_set(de->dev, addr); } static void de21040_get_media_info(struct de_private *de) @@ -1821,8 +1823,7 @@ static void de21041_get_srom_info(struct de_private *de) #endif /* store MAC address */ - for (i = 0; i < 6; i ++) - de->dev->dev_addr[i] = ee_data[i + sa_offset]; + eth_hw_addr_set(de->dev, &ee_data[i + sa_offset]); /* get offset of controller 0 info leaf. ignore 2nd byte. */ ofs = ee_data[SROMC0InfoLeaf]; diff --git a/drivers/net/ethernet/dec/tulip/de4x5.c b/drivers/net/ethernet/dec/tulip/de4x5.c index 36ab4cbf2ad0..13121c4dcfe6 100644 --- a/drivers/net/ethernet/dec/tulip/de4x5.c +++ b/drivers/net/ethernet/dec/tulip/de4x5.c @@ -4031,6 +4031,7 @@ get_hw_addr(struct net_device *dev) int broken, i, k, tmp, status = 0; u_short j,chksum; struct de4x5_private *lp = netdev_priv(dev); + u8 addr[ETH_ALEN]; broken = de4x5_bad_srom(lp); @@ -4042,28 +4043,30 @@ get_hw_addr(struct net_device *dev) if (lp->chipset == DC21040) { while ((tmp = inl(DE4X5_APROM)) < 0); k += (u_char) tmp; - dev->dev_addr[i++] = (u_char) tmp; + addr[i++] = (u_char) tmp; while ((tmp = inl(DE4X5_APROM)) < 0); k += (u_short) (tmp << 8); - dev->dev_addr[i++] = (u_char) tmp; + addr[i++] = (u_char) tmp; } else if (!broken) { - dev->dev_addr[i] = (u_char) lp->srom.ieee_addr[i]; i++; - dev->dev_addr[i] = (u_char) lp->srom.ieee_addr[i]; i++; + addr[i] = (u_char) lp->srom.ieee_addr[i]; i++; + addr[i] = (u_char) lp->srom.ieee_addr[i]; i++; } else if ((broken == SMC) || (broken == ACCTON)) { - dev->dev_addr[i] = *((u_char *)&lp->srom + i); i++; - dev->dev_addr[i] = *((u_char *)&lp->srom + i); i++; + addr[i] = *((u_char *)&lp->srom + i); i++; + addr[i] = *((u_char *)&lp->srom + i); i++; } } else { k += (u_char) (tmp = inb(EISA_APROM)); - dev->dev_addr[i++] = (u_char) tmp; + addr[i++] = (u_char) tmp; k += (u_short) ((tmp = inb(EISA_APROM)) << 8); - dev->dev_addr[i++] = (u_char) tmp; + addr[i++] = (u_char) tmp; } if (k > 0xffff) k-=0xffff; } if (k == 0xffff) k=0; + eth_hw_addr_set(dev, addr); + if (lp->bus == PCI) { if (lp->chipset == DC21040) { while ((tmp = inl(DE4X5_APROM)) < 0); @@ -4095,8 +4098,9 @@ get_hw_addr(struct net_device *dev) int x = dev->dev_addr[i]; x = ((x & 0xf) << 4) + ((x & 0xf0) >> 4); x = ((x & 0x33) << 2) + ((x & 0xcc) >> 2); - dev->dev_addr[i] = ((x & 0x55) << 1) + ((x & 0xaa) >> 1); + addr[i] = ((x & 0x55) << 1) + ((x & 0xaa) >> 1); } + eth_hw_addr_set(dev, addr); } #endif /* CONFIG_PPC_PMAC */ @@ -4158,12 +4162,9 @@ test_bad_enet(struct net_device *dev, int status) if ((tmp == 0) || (tmp == 0x5fa)) { if ((lp->chipset == last.chipset) && (lp->bus_num == last.bus) && (lp->bus_num > 0)) { - for (i=0; idev_addr[i] = last.addr[i]; - for (i=ETH_ALEN-1; i>2; --i) { - dev->dev_addr[i] += 1; - if (dev->dev_addr[i] != 0) break; - } - for (i=0; idev_addr[i]; + eth_addr_inc(last.addr); + eth_hw_addr_set(dev, last.addr); + if (!an_exception(lp)) { dev->irq = last.irq; } @@ -5391,9 +5392,7 @@ de4x5_siocdevprivate(struct net_device *dev, struct ifreq *rq, void __user *data if (netif_queue_stopped(dev)) return -EBUSY; netif_stop_queue(dev); - for (i=0; idev_addr[i] = tmp.addr[i]; - } + eth_hw_addr_set(dev, tmp.addr); build_setup_frame(dev, PHYS_ADDR_ONLY); /* Set up the descriptor and give ownership to the card */ load_packet(dev, lp->setup_frame, TD_IC | PERFECT_F | TD_SET | diff --git a/drivers/net/ethernet/dec/tulip/dmfe.c b/drivers/net/ethernet/dec/tulip/dmfe.c index c763b692e164..6e64ff20a378 100644 --- a/drivers/net/ethernet/dec/tulip/dmfe.c +++ b/drivers/net/ethernet/dec/tulip/dmfe.c @@ -476,8 +476,7 @@ static int dmfe_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) } /* Set Node address */ - for (i = 0; i < 6; i++) - dev->dev_addr[i] = db->srom[20 + i]; + eth_hw_addr_set(dev, &db->srom[20 + i]); err = register_netdev (dev); if (err) @@ -1436,9 +1435,9 @@ static void update_cr6(u32 cr6_data, void __iomem *ioaddr) static void dm9132_id_table(struct net_device *dev) { + const u16 *addrptr = (const u16 *)dev->dev_addr; struct dmfe_board_info *db = netdev_priv(dev); void __iomem *ioaddr = db->ioaddr + 0xc0; - u16 *addrptr = (u16 *)dev->dev_addr; struct netdev_hw_addr *ha; u16 i, hash_table[4]; @@ -1477,7 +1476,7 @@ static void send_filter_frame(struct net_device *dev) struct dmfe_board_info *db = netdev_priv(dev); struct netdev_hw_addr *ha; struct tx_desc *txptr; - u16 * addrptr; + const u16 * addrptr; u32 * suptr; int i; @@ -1487,7 +1486,7 @@ static void send_filter_frame(struct net_device *dev) suptr = (u32 *) txptr->tx_buf_ptr; /* Node address */ - addrptr = (u16 *) dev->dev_addr; + addrptr = (const u16 *) dev->dev_addr; *suptr++ = addrptr[0]; *suptr++ = addrptr[1]; *suptr++ = addrptr[2]; diff --git a/drivers/net/ethernet/dec/tulip/tulip_core.c b/drivers/net/ethernet/dec/tulip/tulip_core.c index 1ba6452c0683..ec0ce8f1beea 100644 --- a/drivers/net/ethernet/dec/tulip/tulip_core.c +++ b/drivers/net/ethernet/dec/tulip/tulip_core.c @@ -339,7 +339,7 @@ static void tulip_up(struct net_device *dev) } } else { /* This is set_rx_mode(), but without starting the transmitter. */ - u16 *eaddrs = (u16 *)dev->dev_addr; + const u16 *eaddrs = (const u16 *)dev->dev_addr; u16 *setup_frm = &tp->setup_frame[15*6]; dma_addr_t mapping; @@ -1001,8 +1001,8 @@ static void build_setup_frame_hash(u16 *setup_frm, struct net_device *dev) struct tulip_private *tp = netdev_priv(dev); u16 hash_table[32]; struct netdev_hw_addr *ha; + const u16 *eaddrs; int i; - u16 *eaddrs; memset(hash_table, 0, sizeof(hash_table)); __set_bit_le(255, hash_table); /* Broadcast entry */ @@ -1019,7 +1019,7 @@ static void build_setup_frame_hash(u16 *setup_frm, struct net_device *dev) setup_frm = &tp->setup_frame[13*6]; /* Fill the final entry with our physical address. */ - eaddrs = (u16 *)dev->dev_addr; + eaddrs = (const u16 *)dev->dev_addr; *setup_frm++ = eaddrs[0]; *setup_frm++ = eaddrs[0]; *setup_frm++ = eaddrs[1]; *setup_frm++ = eaddrs[1]; *setup_frm++ = eaddrs[2]; *setup_frm++ = eaddrs[2]; @@ -1029,7 +1029,7 @@ static void build_setup_frame_perfect(u16 *setup_frm, struct net_device *dev) { struct tulip_private *tp = netdev_priv(dev); struct netdev_hw_addr *ha; - u16 *eaddrs; + const u16 *eaddrs; /* We have <= 14 addresses so we can use the wonderful 16 address perfect filtering of the Tulip. */ @@ -1044,7 +1044,7 @@ static void build_setup_frame_perfect(u16 *setup_frm, struct net_device *dev) setup_frm = &tp->setup_frame[15*6]; /* Fill the final entry with our physical address. */ - eaddrs = (u16 *)dev->dev_addr; + eaddrs = (const u16 *)dev->dev_addr; *setup_frm++ = eaddrs[0]; *setup_frm++ = eaddrs[0]; *setup_frm++ = eaddrs[1]; *setup_frm++ = eaddrs[1]; *setup_frm++ = eaddrs[2]; *setup_frm++ = eaddrs[2]; @@ -1305,6 +1305,7 @@ static int tulip_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) int chip_idx = ent->driver_data; const char *chip_name = tulip_tbl[chip_idx].chip_name; unsigned int eeprom_missing = 0; + u8 addr[ETH_ALEN] __aligned(2); unsigned int force_csr0 = 0; board_idx++; @@ -1506,13 +1507,15 @@ static int tulip_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) do { value = ioread32(ioaddr + CSR9); } while (value < 0 && --boguscnt > 0); - put_unaligned_le16(value, ((__le16 *)dev->dev_addr) + i); + put_unaligned_le16(value, ((__le16 *)addr) + i); sum += value & 0xffff; } + eth_hw_addr_set(dev, addr); } else if (chip_idx == COMET) { /* No need to read the EEPROM. */ - put_unaligned_le32(ioread32(ioaddr + 0xA4), dev->dev_addr); - put_unaligned_le16(ioread32(ioaddr + 0xA8), dev->dev_addr + 4); + put_unaligned_le32(ioread32(ioaddr + 0xA4), addr); + put_unaligned_le16(ioread32(ioaddr + 0xA8), addr + 4); + eth_hw_addr_set(dev, addr); for (i = 0; i < 6; i ++) sum += dev->dev_addr[i]; } else { @@ -1575,20 +1578,23 @@ static int tulip_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) #endif for (i = 0; i < 6; i ++) { - dev->dev_addr[i] = ee_data[i + sa_offset]; + addr[i] = ee_data[i + sa_offset]; sum += ee_data[i + sa_offset]; } + eth_hw_addr_set(dev, addr); } /* Lite-On boards have the address byte-swapped. */ if ((dev->dev_addr[0] == 0xA0 || dev->dev_addr[0] == 0xC0 || dev->dev_addr[0] == 0x02) && - dev->dev_addr[1] == 0x00) + dev->dev_addr[1] == 0x00) { for (i = 0; i < 6; i+=2) { - char tmp = dev->dev_addr[i]; - dev->dev_addr[i] = dev->dev_addr[i+1]; - dev->dev_addr[i+1] = tmp; + addr[i] = dev->dev_addr[i+1]; + addr[i+1] = dev->dev_addr[i]; } + eth_hw_addr_set(dev, addr); + } + /* On the Zynx 315 Etherarray and other multiport boards only the first Tulip has an EEPROM. On Sparc systems the mac address is held in the OBP property @@ -1604,8 +1610,9 @@ static int tulip_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) #endif eeprom_missing = 1; for (i = 0; i < 5; i++) - dev->dev_addr[i] = last_phys_addr[i]; - dev->dev_addr[i] = last_phys_addr[i] + 1; + addr[i] = last_phys_addr[i]; + addr[i] = last_phys_addr[i] + 1; + eth_hw_addr_set(dev, addr); #if defined(CONFIG_SPARC) addr = of_get_property(dp, "local-mac-address", &len); if (addr && len == ETH_ALEN) diff --git a/drivers/net/ethernet/dec/tulip/uli526x.c b/drivers/net/ethernet/dec/tulip/uli526x.c index d67ef7d02d6b..77d9058431e3 100644 --- a/drivers/net/ethernet/dec/tulip/uli526x.c +++ b/drivers/net/ethernet/dec/tulip/uli526x.c @@ -272,6 +272,7 @@ static int uli526x_init_one(struct pci_dev *pdev, struct uli526x_board_info *db; /* board information structure */ struct net_device *dev; void __iomem *ioaddr; + u8 addr[ETH_ALEN]; int i, err; ULI526X_DBUG(0, "uli526x_init_one()", 0); @@ -379,7 +380,7 @@ static int uli526x_init_one(struct pci_dev *pdev, uw32(DCR13, 0x1b0); //Select ID Table access port //Read MAC address from CR14 for (i = 0; i < 6; i++) - dev->dev_addr[i] = ur32(DCR14); + addr[i] = ur32(DCR14); //Read end uw32(DCR13, 0); //Clear CR13 uw32(DCR0, 0); //Clear CR0 @@ -388,8 +389,10 @@ static int uli526x_init_one(struct pci_dev *pdev, else /*Exist SROM*/ { for (i = 0; i < 6; i++) - dev->dev_addr[i] = db->srom[20 + i]; + addr[i] = db->srom[20 + i]; } + eth_hw_addr_set(dev, addr); + err = register_netdev (dev); if (err) goto err_out_unmap; @@ -1343,7 +1346,7 @@ static void send_filter_frame(struct net_device *dev, int mc_cnt) void __iomem *ioaddr = db->ioaddr; struct netdev_hw_addr *ha; struct tx_desc *txptr; - u16 * addrptr; + const u16 * addrptr; u32 * suptr; int i; @@ -1353,7 +1356,7 @@ static void send_filter_frame(struct net_device *dev, int mc_cnt) suptr = (u32 *) txptr->tx_buf_ptr; /* Node address */ - addrptr = (u16 *) dev->dev_addr; + addrptr = (const u16 *) dev->dev_addr; *suptr++ = addrptr[0] << FLT_SHIFT; *suptr++ = addrptr[1] << FLT_SHIFT; *suptr++ = addrptr[2] << FLT_SHIFT; diff --git a/drivers/net/ethernet/dec/tulip/xircom_cb.c b/drivers/net/ethernet/dec/tulip/xircom_cb.c index a8de79355578..8759f9f76b62 100644 --- a/drivers/net/ethernet/dec/tulip/xircom_cb.c +++ b/drivers/net/ethernet/dec/tulip/xircom_cb.c @@ -1015,12 +1015,14 @@ static void read_mac_address(struct xircom_private *card) xw32(CSR10, i + 3); data_count = xr32(CSR9); if ((tuple == 0x22) && (data_id == 0x04) && (data_count == 0x06)) { + u8 addr[ETH_ALEN]; int j; for (j = 0; j < 6; j++) { xw32(CSR10, i + j + 4); - card->dev->dev_addr[j] = xr32(CSR9) & 0xff; + addr[j] = xr32(CSR9) & 0xff; } + eth_hw_addr_set(card->dev, addr); break; } else if (link == 0) { break; From a7639279c93c1e07a703c0f86f0831d4ea2314f0 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 8 Oct 2021 10:59:12 -0700 Subject: [PATCH 056/440] ethernet: sun: remove direct netdev->dev_addr writes Consify temporary variables pointing to netdev->dev_addr. A few places need local storage but pretty simple conversion over all. Note that macaddr[] is an array of ints, so we need to keep the loops. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/sun/cassini.c | 7 ++--- drivers/net/ethernet/sun/ldmvsw.c | 7 ++--- drivers/net/ethernet/sun/niu.c | 42 +++++++++++++++++------------- drivers/net/ethernet/sun/sungem.c | 11 +++++--- drivers/net/ethernet/sun/sunhme.c | 15 ++++++++--- 5 files changed, 48 insertions(+), 34 deletions(-) diff --git a/drivers/net/ethernet/sun/cassini.c b/drivers/net/ethernet/sun/cassini.c index 287ae4c538aa..d2d4f47c7e28 100644 --- a/drivers/net/ethernet/sun/cassini.c +++ b/drivers/net/ethernet/sun/cassini.c @@ -3027,7 +3027,7 @@ static void cas_mac_reset(struct cas *cp) /* Must be invoked under cp->lock. */ static void cas_init_mac(struct cas *cp) { - unsigned char *e = &cp->dev->dev_addr[0]; + const unsigned char *e = &cp->dev->dev_addr[0]; int i; cas_mac_reset(cp); @@ -3379,6 +3379,7 @@ static void cas_check_pci_invariants(struct cas *cp) static int cas_check_invariants(struct cas *cp) { struct pci_dev *pdev = cp->pdev; + u8 addr[ETH_ALEN]; u32 cfg; int i; @@ -3407,8 +3408,8 @@ static int cas_check_invariants(struct cas *cp) /* finish phy determination. MDIO1 takes precedence over MDIO0 if * they're both connected. */ - cp->phy_type = cas_get_vpd_info(cp, cp->dev->dev_addr, - PCI_SLOT(pdev->devfn)); + cp->phy_type = cas_get_vpd_info(cp, addr, PCI_SLOT(pdev->devfn)); + eth_hw_addr_set(cp->dev, addr); if (cp->phy_type & CAS_PHY_SERDES) { cp->cas_flags |= CAS_FLAG_1000MB_CAP; return 0; /* no more checking needed */ diff --git a/drivers/net/ethernet/sun/ldmvsw.c b/drivers/net/ethernet/sun/ldmvsw.c index 50bd4e3b0af9..074c5407c86b 100644 --- a/drivers/net/ethernet/sun/ldmvsw.c +++ b/drivers/net/ethernet/sun/ldmvsw.c @@ -230,7 +230,6 @@ static struct net_device *vsw_alloc_netdev(u8 hwaddr[], { struct net_device *dev; struct vnet_port *port; - int i; dev = alloc_etherdev_mqs(sizeof(*port), VNET_MAX_TXQS, 1); if (!dev) @@ -238,10 +237,8 @@ static struct net_device *vsw_alloc_netdev(u8 hwaddr[], dev->needed_headroom = VNET_PACKET_SKIP + 8; dev->needed_tailroom = 8; - for (i = 0; i < ETH_ALEN; i++) { - dev->dev_addr[i] = hwaddr[i]; - dev->perm_addr[i] = dev->dev_addr[i]; - } + eth_hw_addr_set(dev, hwaddr); + ether_addr_copy(dev->perm_addr, dev->dev_addr) sprintf(dev->name, "vif%d.%d", (int)handle, (int)port_id); diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c index 1a73a9401347..ba8ad76313a9 100644 --- a/drivers/net/ethernet/sun/niu.c +++ b/drivers/net/ethernet/sun/niu.c @@ -2603,7 +2603,7 @@ static int niu_init_link(struct niu *np) return 0; } -static void niu_set_primary_mac(struct niu *np, unsigned char *addr) +static void niu_set_primary_mac(struct niu *np, const unsigned char *addr) { u16 reg0 = addr[4] << 8 | addr[5]; u16 reg1 = addr[2] << 8 | addr[3]; @@ -8312,6 +8312,7 @@ static void niu_pci_vpd_validate(struct niu *np) { struct net_device *dev = np->dev; struct niu_vpd *vpd = &np->vpd; + u8 addr[ETH_ALEN]; u8 val8; if (!is_valid_ether_addr(&vpd->local_mac[0])) { @@ -8344,17 +8345,20 @@ static void niu_pci_vpd_validate(struct niu *np) return; } - eth_hw_addr_set(dev, vpd->local_mac); + ether_addr_copy(addr, vpd->local_mac); - val8 = dev->dev_addr[5]; - dev->dev_addr[5] += np->port; - if (dev->dev_addr[5] < val8) - dev->dev_addr[4]++; + val8 = addr[5]; + addr[5] += np->port; + if (addr[5] < val8) + addr[4]++; + + eth_hw_addr_set(dev, addr); } static int niu_pci_probe_sprom(struct niu *np) { struct net_device *dev = np->dev; + u8 addr[ETH_ALEN]; int len, i; u64 val, sum; u8 val8; @@ -8446,27 +8450,29 @@ static int niu_pci_probe_sprom(struct niu *np) val = nr64(ESPC_MAC_ADDR0); netif_printk(np, probe, KERN_DEBUG, np->dev, "SPROM: MAC_ADDR0[%08llx]\n", (unsigned long long)val); - dev->dev_addr[0] = (val >> 0) & 0xff; - dev->dev_addr[1] = (val >> 8) & 0xff; - dev->dev_addr[2] = (val >> 16) & 0xff; - dev->dev_addr[3] = (val >> 24) & 0xff; + addr[0] = (val >> 0) & 0xff; + addr[1] = (val >> 8) & 0xff; + addr[2] = (val >> 16) & 0xff; + addr[3] = (val >> 24) & 0xff; val = nr64(ESPC_MAC_ADDR1); netif_printk(np, probe, KERN_DEBUG, np->dev, "SPROM: MAC_ADDR1[%08llx]\n", (unsigned long long)val); - dev->dev_addr[4] = (val >> 0) & 0xff; - dev->dev_addr[5] = (val >> 8) & 0xff; + addr[4] = (val >> 0) & 0xff; + addr[5] = (val >> 8) & 0xff; - if (!is_valid_ether_addr(&dev->dev_addr[0])) { + if (!is_valid_ether_addr(addr)) { dev_err(np->device, "SPROM MAC address invalid [ %pM ]\n", - dev->dev_addr); + addr); return -EINVAL; } - val8 = dev->dev_addr[5]; - dev->dev_addr[5] += np->port; - if (dev->dev_addr[5] < val8) - dev->dev_addr[4]++; + val8 = addr[5]; + addr[5] += np->port; + if (addr[5] < val8) + addr[4]++; + + eth_hw_addr_set(dev, addr); val = nr64(ESPC_MOD_STR_LEN); netif_printk(np, probe, KERN_DEBUG, np->dev, diff --git a/drivers/net/ethernet/sun/sungem.c b/drivers/net/ethernet/sun/sungem.c index 5f786d6fa150..036856102c50 100644 --- a/drivers/net/ethernet/sun/sungem.c +++ b/drivers/net/ethernet/sun/sungem.c @@ -1810,7 +1810,7 @@ static u32 gem_setup_multicast(struct gem *gp) static void gem_init_mac(struct gem *gp) { - unsigned char *e = &gp->dev->dev_addr[0]; + const unsigned char *e = &gp->dev->dev_addr[0]; writel(0x1bf0, gp->regs + MAC_SNDPAUSE); @@ -2087,7 +2087,7 @@ static void gem_stop_phy(struct gem *gp, int wol) writel(mifcfg, gp->regs + MIF_CFG); if (wol && gp->has_wol) { - unsigned char *e = &gp->dev->dev_addr[0]; + const unsigned char *e = &gp->dev->dev_addr[0]; u32 csr; /* Setup wake-on-lan for MAGIC packet */ @@ -2431,8 +2431,8 @@ static struct net_device_stats *gem_get_stats(struct net_device *dev) static int gem_set_mac_address(struct net_device *dev, void *addr) { struct sockaddr *macaddr = (struct sockaddr *) addr; + const unsigned char *e = &dev->dev_addr[0]; struct gem *gp = netdev_priv(dev); - unsigned char *e = &dev->dev_addr[0]; if (!is_valid_ether_addr(macaddr->sa_data)) return -EADDRNOTAVAIL; @@ -2799,7 +2799,10 @@ static int gem_get_device_address(struct gem *gp) } eth_hw_addr_set(dev, addr); #else - get_gem_mac_nonobp(gp->pdev, gp->dev->dev_addr); + u8 addr[ETH_ALEN]; + + get_gem_mac_nonobp(gp->pdev, addr); + eth_hw_addr_set(gp->dev, addr); #endif return 0; } diff --git a/drivers/net/ethernet/sun/sunhme.c b/drivers/net/ethernet/sun/sunhme.c index fe5482b1872f..ad9029ae6848 100644 --- a/drivers/net/ethernet/sun/sunhme.c +++ b/drivers/net/ethernet/sun/sunhme.c @@ -1395,13 +1395,13 @@ force_link: /* hp->happy_lock must be held */ static int happy_meal_init(struct happy_meal *hp) { + const unsigned char *e = &hp->dev->dev_addr[0]; void __iomem *gregs = hp->gregs; void __iomem *etxregs = hp->etxregs; void __iomem *erxregs = hp->erxregs; void __iomem *bregs = hp->bigmacregs; void __iomem *tregs = hp->tcvregs; u32 regtmp, rxcfg; - unsigned char *e = &hp->dev->dev_addr[0]; /* If auto-negotiation timer is running, kill it. */ del_timer(&hp->happy_timer); @@ -2661,6 +2661,7 @@ static int happy_meal_sbus_probe_one(struct platform_device *op, int is_qfe) struct happy_meal *hp; struct net_device *dev; int i, qfe_slot = -1; + u8 addr[ETH_ALEN]; int err = -ENODEV; sbus_dp = op->dev.parent->of_node; @@ -2698,7 +2699,8 @@ static int happy_meal_sbus_probe_one(struct platform_device *op, int is_qfe) } if (i < 6) { /* a mac address was given */ for (i = 0; i < 6; i++) - dev->dev_addr[i] = macaddr[i]; + addr[i] = macaddr[i]; + eth_hw_addr_set(dev, addr); macaddr[5]++; } else { const unsigned char *addr; @@ -2969,6 +2971,7 @@ static int happy_meal_pci_probe(struct pci_dev *pdev, unsigned long hpreg_res; int i, qfe_slot = -1; char prom_name[64]; + u8 addr[ETH_ALEN]; int err; /* Now make sure pci_dev cookie is there. */ @@ -3044,7 +3047,8 @@ static int happy_meal_pci_probe(struct pci_dev *pdev, } if (i < 6) { /* a mac address was given */ for (i = 0; i < 6; i++) - dev->dev_addr[i] = macaddr[i]; + addr[i] = macaddr[i]; + eth_hw_addr_set(dev, addr); macaddr[5]++; } else { #ifdef CONFIG_SPARC @@ -3060,7 +3064,10 @@ static int happy_meal_pci_probe(struct pci_dev *pdev, eth_hw_addr_set(dev, idprom->id_ethaddr); } #else - get_hme_mac_nonsparc(pdev, &dev->dev_addr[0]); + u8 addr[ETH_ALEN]; + + get_hme_mac_nonsparc(pdev, addr); + eth_hw_addr_set(dev, addr); #endif } From 8ce218b6e58ad9d2149402ea48ae8dbb0c57b1b5 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 8 Oct 2021 10:59:13 -0700 Subject: [PATCH 057/440] ethernet: 8390: remove direct netdev->dev_addr writes 8390 contains a lot of loops assigning netdev->dev_addr byte by byte. Convert what's possible directly to eth_hw_addr_set(), use local buf in other places. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/8390/apne.c | 3 +-- drivers/net/ethernet/8390/ax88796.c | 6 ++++-- drivers/net/ethernet/8390/axnet_cs.c | 7 +++++-- drivers/net/ethernet/8390/mcf8390.c | 3 +-- drivers/net/ethernet/8390/ne.c | 4 +--- drivers/net/ethernet/8390/pcnet_cs.c | 22 ++++++++++++++++------ drivers/net/ethernet/8390/stnic.c | 5 ++--- drivers/net/ethernet/8390/zorro8390.c | 3 +-- 8 files changed, 31 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/8390/apne.c b/drivers/net/ethernet/8390/apne.c index da1ae37a9d73..991ad953aa79 100644 --- a/drivers/net/ethernet/8390/apne.c +++ b/drivers/net/ethernet/8390/apne.c @@ -320,8 +320,7 @@ static int __init apne_probe1(struct net_device *dev, int ioaddr) i = request_irq(dev->irq, apne_interrupt, IRQF_SHARED, DRV_NAME, dev); if (i) return i; - for (i = 0; i < ETH_ALEN; i++) - dev->dev_addr[i] = SA_prom[i]; + eth_hw_addr_set(dev, SA_prom); pr_cont(" %pM\n", dev->dev_addr); diff --git a/drivers/net/ethernet/8390/ax88796.c b/drivers/net/ethernet/8390/ax88796.c index b3e612b18abd..1f8acbba5b6b 100644 --- a/drivers/net/ethernet/8390/ax88796.c +++ b/drivers/net/ethernet/8390/ax88796.c @@ -748,11 +748,13 @@ static int ax_init_dev(struct net_device *dev) /* load the mac-address from the device */ if (ax->plat->flags & AXFLG_MAC_FROMDEV) { + u8 addr[ETH_ALEN]; + ei_outb(E8390_NODMA + E8390_PAGE1 + E8390_STOP, ei_local->mem + E8390_CMD); /* 0x61 */ for (i = 0; i < ETH_ALEN; i++) - dev->dev_addr[i] = - ei_inb(ioaddr + EN1_PHYS_SHIFT(i)); + addr[i] = ei_inb(ioaddr + EN1_PHYS_SHIFT(i)); + eth_hw_addr_set(dev, addr); } if ((ax->plat->flags & AXFLG_MAC_FROMPLATFORM) && diff --git a/drivers/net/ethernet/8390/axnet_cs.c b/drivers/net/ethernet/8390/axnet_cs.c index 3c370e686ec3..3aef959fc25b 100644 --- a/drivers/net/ethernet/8390/axnet_cs.c +++ b/drivers/net/ethernet/8390/axnet_cs.c @@ -187,6 +187,7 @@ static int get_prom(struct pcmcia_device *link) { struct net_device *dev = link->priv; unsigned int ioaddr = dev->base_addr; + u8 addr[ETH_ALEN]; int i, j; /* This is based on drivers/net/ethernet/8390/ne.c */ @@ -220,9 +221,11 @@ static int get_prom(struct pcmcia_device *link) for (i = 0; i < 6; i += 2) { j = inw(ioaddr + AXNET_DATAPORT); - dev->dev_addr[i] = j & 0xff; - dev->dev_addr[i+1] = j >> 8; + addr[i] = j & 0xff; + addr[i+1] = j >> 8; } + eth_hw_addr_set(dev, addr); + return 1; } /* get_prom */ diff --git a/drivers/net/ethernet/8390/mcf8390.c b/drivers/net/ethernet/8390/mcf8390.c index 4ad8031ab669..e320cccba61a 100644 --- a/drivers/net/ethernet/8390/mcf8390.c +++ b/drivers/net/ethernet/8390/mcf8390.c @@ -374,8 +374,7 @@ static int mcf8390_init(struct net_device *dev) if (ret) return ret; - for (i = 0; i < ETH_ALEN; i++) - dev->dev_addr[i] = SA_prom[i]; + eth_hw_addr_set(dev, SA_prom); netdev_dbg(dev, "Found ethernet address: %pM\n", dev->dev_addr); diff --git a/drivers/net/ethernet/8390/ne.c b/drivers/net/ethernet/8390/ne.c index 9afc712f5948..0a9118b8be0c 100644 --- a/drivers/net/ethernet/8390/ne.c +++ b/drivers/net/ethernet/8390/ne.c @@ -500,9 +500,7 @@ static int __init ne_probe1(struct net_device *dev, unsigned long ioaddr) dev->base_addr = ioaddr; - for (i = 0; i < ETH_ALEN; i++) { - dev->dev_addr[i] = SA_prom[i]; - } + eth_hw_addr_set(dev, SA_prom); pr_cont("%pM\n", dev->dev_addr); diff --git a/drivers/net/ethernet/8390/pcnet_cs.c b/drivers/net/ethernet/8390/pcnet_cs.c index 96ad72abd373..0f07fe03da98 100644 --- a/drivers/net/ethernet/8390/pcnet_cs.c +++ b/drivers/net/ethernet/8390/pcnet_cs.c @@ -278,6 +278,7 @@ static struct hw_info *get_hwinfo(struct pcmcia_device *link) { struct net_device *dev = link->priv; u_char __iomem *base, *virt; + u8 addr[ETH_ALEN]; int i, j; /* Allocate a small memory window */ @@ -302,7 +303,8 @@ static struct hw_info *get_hwinfo(struct pcmcia_device *link) (readb(base+2) == hw_info[i].a1) && (readb(base+4) == hw_info[i].a2)) { for (j = 0; j < 6; j++) - dev->dev_addr[j] = readb(base + (j<<1)); + addr[j] = readb(base + (j<<1)); + eth_hw_addr_set(dev, addr); break; } } @@ -324,6 +326,7 @@ static struct hw_info *get_prom(struct pcmcia_device *link) { struct net_device *dev = link->priv; unsigned int ioaddr = dev->base_addr; + u8 addr[ETH_ALEN]; u_char prom[32]; int i, j; @@ -362,7 +365,8 @@ static struct hw_info *get_prom(struct pcmcia_device *link) } if ((i < NR_INFO) || ((prom[28] == 0x57) && (prom[30] == 0x57))) { for (j = 0; j < 6; j++) - dev->dev_addr[j] = prom[j<<1]; + addr[j] = prom[j<<1]; + eth_hw_addr_set(dev, addr); return (i < NR_INFO) ? hw_info+i : &default_info; } return NULL; @@ -377,6 +381,7 @@ static struct hw_info *get_prom(struct pcmcia_device *link) static struct hw_info *get_dl10019(struct pcmcia_device *link) { struct net_device *dev = link->priv; + u8 addr[ETH_ALEN]; int i; u_char sum; @@ -385,7 +390,8 @@ static struct hw_info *get_dl10019(struct pcmcia_device *link) if (sum != 0xff) return NULL; for (i = 0; i < 6; i++) - dev->dev_addr[i] = inb_p(dev->base_addr + 0x14 + i); + addr[i] = inb_p(dev->base_addr + 0x14 + i); + eth_hw_addr_set(dev, addr); i = inb(dev->base_addr + 0x1f); return ((i == 0x91)||(i == 0x99)) ? &dl10022_info : &dl10019_info; } @@ -400,6 +406,7 @@ static struct hw_info *get_ax88190(struct pcmcia_device *link) { struct net_device *dev = link->priv; unsigned int ioaddr = dev->base_addr; + u8 addr[ETH_ALEN]; int i, j; /* Not much of a test, but the alternatives are messy */ @@ -413,9 +420,10 @@ static struct hw_info *get_ax88190(struct pcmcia_device *link) for (i = 0; i < 6; i += 2) { j = inw(ioaddr + PCNET_DATAPORT); - dev->dev_addr[i] = j & 0xff; - dev->dev_addr[i+1] = j >> 8; + addr[i] = j & 0xff; + addr[i+1] = j >> 8; } + eth_hw_addr_set(dev, addr); return NULL; } @@ -430,6 +438,7 @@ static struct hw_info *get_ax88190(struct pcmcia_device *link) static struct hw_info *get_hwired(struct pcmcia_device *link) { struct net_device *dev = link->priv; + u8 addr[ETH_ALEN]; int i; for (i = 0; i < 6; i++) @@ -438,7 +447,8 @@ static struct hw_info *get_hwired(struct pcmcia_device *link) return NULL; for (i = 0; i < 6; i++) - dev->dev_addr[i] = hw_addr[i]; + addr[i] = hw_addr[i]; + eth_hw_addr_set(dev, addr); return &default_info; } /* get_hwired */ diff --git a/drivers/net/ethernet/8390/stnic.c b/drivers/net/ethernet/8390/stnic.c index fbbd7f22c142..bd89ca8a92df 100644 --- a/drivers/net/ethernet/8390/stnic.c +++ b/drivers/net/ethernet/8390/stnic.c @@ -104,8 +104,8 @@ STNIC_WRITE (int reg, byte val) static int __init stnic_probe(void) { struct net_device *dev; - int i, err; struct ei_device *ei_local; + int err; /* If we are not running on a SolutionEngine, give up now */ if (! MACH_SE) @@ -119,8 +119,7 @@ static int __init stnic_probe(void) #ifdef CONFIG_SH_STANDARD_BIOS sh_bios_get_node_addr (stnic_eadr); #endif - for (i = 0; i < ETH_ALEN; i++) - dev->dev_addr[i] = stnic_eadr[i]; + eth_hw_addr_set(dev, stnic_eadr); /* Set the base address to point to the NIC, not the "real" base! */ dev->base_addr = 0x1000; diff --git a/drivers/net/ethernet/8390/zorro8390.c b/drivers/net/ethernet/8390/zorro8390.c index 35a500a21521..e8b4fe813a08 100644 --- a/drivers/net/ethernet/8390/zorro8390.c +++ b/drivers/net/ethernet/8390/zorro8390.c @@ -364,8 +364,7 @@ static int zorro8390_init(struct net_device *dev, unsigned long board, if (i) return i; - for (i = 0; i < ETH_ALEN; i++) - dev->dev_addr[i] = SA_prom[i]; + eth_hw_addr_set(dev, SA_prom); pr_debug("Found ethernet address: %pM\n", dev->dev_addr); From ea52a0b58e41c3b2b9e97ff13fe0da9c9e430ea8 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 8 Oct 2021 10:53:39 -0700 Subject: [PATCH 058/440] net: use dev_addr_set() Use dev_addr_set() instead of writing directly to netdev->dev_addr in various misc and old drivers. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 2 +- drivers/net/hamradio/baycom_epp.c | 2 +- drivers/net/hamradio/bpqether.c | 2 +- drivers/net/hamradio/dmascc.c | 3 +-- drivers/net/hamradio/hdlcdrv.c | 2 +- drivers/net/hamradio/scc.c | 2 +- drivers/net/hamradio/yam.c | 2 +- drivers/net/net_failover.c | 3 +-- drivers/net/ntb_netdev.c | 2 +- drivers/net/team/team.c | 2 +- drivers/net/vmxnet3/vmxnet3_drv.c | 4 ++-- drivers/net/wan/lapbether.c | 2 +- net/802/hippi.c | 2 +- net/atm/br2684.c | 2 +- net/netrom/nr_dev.c | 2 +- net/rose/rose_dev.c | 2 +- 16 files changed, 17 insertions(+), 19 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 77dc79a7f574..0c52612cb8e9 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -4414,7 +4414,7 @@ static int bond_set_mac_address(struct net_device *bond_dev, void *addr) } /* success */ - memcpy(bond_dev->dev_addr, ss->__data, bond_dev->addr_len); + dev_addr_set(bond_dev, ss->__data); return 0; unwind: diff --git a/drivers/net/hamradio/baycom_epp.c b/drivers/net/hamradio/baycom_epp.c index 775dcf4ebde5..f6428ff780ab 100644 --- a/drivers/net/hamradio/baycom_epp.c +++ b/drivers/net/hamradio/baycom_epp.c @@ -791,7 +791,7 @@ static int baycom_set_mac_address(struct net_device *dev, void *addr) struct sockaddr *sa = (struct sockaddr *)addr; /* addr is an AX.25 shifted ASCII mac address */ - memcpy(dev->dev_addr, sa->sa_data, dev->addr_len); + dev_addr_set(dev, sa->sa_data); return 0; } diff --git a/drivers/net/hamradio/bpqether.c b/drivers/net/hamradio/bpqether.c index d967b0748773..1ac816164a9b 100644 --- a/drivers/net/hamradio/bpqether.c +++ b/drivers/net/hamradio/bpqether.c @@ -302,7 +302,7 @@ static int bpq_set_mac_address(struct net_device *dev, void *addr) { struct sockaddr *sa = (struct sockaddr *)addr; - memcpy(dev->dev_addr, sa->sa_data, dev->addr_len); + dev_addr_set(dev, sa->sa_data); return 0; } diff --git a/drivers/net/hamradio/dmascc.c b/drivers/net/hamradio/dmascc.c index f4c3efc3e074..a4ba1b79d2d0 100644 --- a/drivers/net/hamradio/dmascc.c +++ b/drivers/net/hamradio/dmascc.c @@ -956,8 +956,7 @@ static int scc_send_packet(struct sk_buff *skb, struct net_device *dev) static int scc_set_mac_address(struct net_device *dev, void *sa) { - memcpy(dev->dev_addr, ((struct sockaddr *) sa)->sa_data, - dev->addr_len); + dev_addr_set(dev, ((struct sockaddr *)sa)->sa_data); return 0; } diff --git a/drivers/net/hamradio/hdlcdrv.c b/drivers/net/hamradio/hdlcdrv.c index 5805cfc83854..ab5327acec22 100644 --- a/drivers/net/hamradio/hdlcdrv.c +++ b/drivers/net/hamradio/hdlcdrv.c @@ -415,7 +415,7 @@ static int hdlcdrv_set_mac_address(struct net_device *dev, void *addr) struct sockaddr *sa = (struct sockaddr *)addr; /* addr is an AX.25 shifted ASCII mac address */ - memcpy(dev->dev_addr, sa->sa_data, dev->addr_len); + dev_addr_set(dev, sa->sa_data); return 0; } diff --git a/drivers/net/hamradio/scc.c b/drivers/net/hamradio/scc.c index e0bb131a33d7..9b745d09d327 100644 --- a/drivers/net/hamradio/scc.c +++ b/drivers/net/hamradio/scc.c @@ -1951,7 +1951,7 @@ static int scc_net_siocdevprivate(struct net_device *dev, static int scc_net_set_mac_address(struct net_device *dev, void *addr) { struct sockaddr *sa = (struct sockaddr *) addr; - memcpy(dev->dev_addr, sa->sa_data, dev->addr_len); + dev_addr_set(dev, sa->sa_data); return 0; } diff --git a/drivers/net/hamradio/yam.c b/drivers/net/hamradio/yam.c index 6ddacbdb224b..e557ccf98bcf 100644 --- a/drivers/net/hamradio/yam.c +++ b/drivers/net/hamradio/yam.c @@ -1063,7 +1063,7 @@ static int yam_set_mac_address(struct net_device *dev, void *addr) struct sockaddr *sa = (struct sockaddr *) addr; /* addr is an AX.25 shifted ASCII mac address */ - memcpy(dev->dev_addr, sa->sa_data, dev->addr_len); + dev_addr_set(dev, sa->sa_data); return 0; } diff --git a/drivers/net/net_failover.c b/drivers/net/net_failover.c index 2a4892402ed8..86ec5aae4289 100644 --- a/drivers/net/net_failover.c +++ b/drivers/net/net_failover.c @@ -748,8 +748,7 @@ struct failover *net_failover_create(struct net_device *standby_dev) failover_dev->hw_features |= NETIF_F_GSO_ENCAP_ALL; failover_dev->features |= failover_dev->hw_features; - memcpy(failover_dev->dev_addr, standby_dev->dev_addr, - failover_dev->addr_len); + dev_addr_set(failover_dev, standby_dev->dev_addr); failover_dev->min_mtu = standby_dev->min_mtu; failover_dev->max_mtu = standby_dev->max_mtu; diff --git a/drivers/net/ntb_netdev.c b/drivers/net/ntb_netdev.c index a5bab614ff84..98ca6b18415e 100644 --- a/drivers/net/ntb_netdev.c +++ b/drivers/net/ntb_netdev.c @@ -428,7 +428,7 @@ static int ntb_netdev_probe(struct device *client_dev) ndev->watchdog_timeo = msecs_to_jiffies(NTB_TX_TIMEOUT_MS); eth_random_addr(ndev->perm_addr); - memcpy(ndev->dev_addr, ndev->perm_addr, ndev->addr_len); + dev_addr_set(ndev, ndev->perm_addr); ndev->netdev_ops = &ntb_netdev_ops; ndev->ethtool_ops = &ntb_ethtool_ops; diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index dd7917cab2b1..8b2adc56b92a 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1790,7 +1790,7 @@ static int team_set_mac_address(struct net_device *dev, void *p) if (dev->type == ARPHRD_ETHER && !is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; - memcpy(dev->dev_addr, addr->sa_data, dev->addr_len); + dev_addr_set(dev, addr->sa_data); mutex_lock(&team->lock); list_for_each_entry(port, &team->port_list, list) if (team->ops.port_change_dev_addr) diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index 142f70670f5c..7a205ddf0060 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -2824,7 +2824,7 @@ vmxnet3_set_mac_addr(struct net_device *netdev, void *p) struct sockaddr *addr = p; struct vmxnet3_adapter *adapter = netdev_priv(netdev); - memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len); + dev_addr_set(netdev, addr->sa_data); vmxnet3_write_mac_addr(adapter, addr->sa_data); return 0; @@ -3638,7 +3638,7 @@ vmxnet3_probe_device(struct pci_dev *pdev, #endif vmxnet3_read_mac_addr(adapter, mac); - memcpy(netdev->dev_addr, mac, netdev->addr_len); + dev_addr_set(netdev, mac); netdev->netdev_ops = &vmxnet3_netdev_ops; vmxnet3_set_ethtool_ops(netdev); diff --git a/drivers/net/wan/lapbether.c b/drivers/net/wan/lapbether.c index 89d31adc3809..282192b82404 100644 --- a/drivers/net/wan/lapbether.c +++ b/drivers/net/wan/lapbether.c @@ -301,7 +301,7 @@ static int lapbeth_set_mac_address(struct net_device *dev, void *addr) { struct sockaddr *sa = addr; - memcpy(dev->dev_addr, sa->sa_data, dev->addr_len); + dev_addr_set(dev, sa->sa_data); return 0; } diff --git a/net/802/hippi.c b/net/802/hippi.c index f80b33a8f7e0..887e73d520e4 100644 --- a/net/802/hippi.c +++ b/net/802/hippi.c @@ -121,7 +121,7 @@ int hippi_mac_addr(struct net_device *dev, void *p) struct sockaddr *addr = p; if (netif_running(dev)) return -EBUSY; - memcpy(dev->dev_addr, addr->sa_data, dev->addr_len); + dev_addr_set(dev, addr->sa_data); return 0; } EXPORT_SYMBOL(hippi_mac_addr); diff --git a/net/atm/br2684.c b/net/atm/br2684.c index dd2a8dabed84..11854fde52db 100644 --- a/net/atm/br2684.c +++ b/net/atm/br2684.c @@ -578,7 +578,7 @@ static int br2684_regvcc(struct atm_vcc *atmvcc, void __user * arg) if (list_empty(&brdev->brvccs) && !brdev->mac_was_set) { unsigned char *esi = atmvcc->dev->esi; if (esi[0] | esi[1] | esi[2] | esi[3] | esi[4] | esi[5]) - memcpy(net_dev->dev_addr, esi, net_dev->addr_len); + dev_addr_set(net_dev, esi); else net_dev->dev_addr[2] = 1; } diff --git a/net/netrom/nr_dev.c b/net/netrom/nr_dev.c index 29e418c8c6c3..d1ca413d3317 100644 --- a/net/netrom/nr_dev.c +++ b/net/netrom/nr_dev.c @@ -111,7 +111,7 @@ static int __must_check nr_set_mac_address(struct net_device *dev, void *addr) ax25_listen_release((ax25_address *)dev->dev_addr, NULL); } - memcpy(dev->dev_addr, sa->sa_data, dev->addr_len); + dev_addr_set(dev, sa->sa_data); return 0; } diff --git a/net/rose/rose_dev.c b/net/rose/rose_dev.c index 051804fbee17..2a35e188b389 100644 --- a/net/rose/rose_dev.c +++ b/net/rose/rose_dev.c @@ -69,7 +69,7 @@ static int rose_set_mac_address(struct net_device *dev, void *addr) rose_del_loopback_node((rose_address *)dev->dev_addr); } - memcpy(dev->dev_addr, sa->sa_data, dev->addr_len); + dev_addr_set(dev, sa->sa_data); return 0; } From 5ee61ad7d59300d568f7c3da45171f2a31d0c4d3 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 9 Oct 2021 15:27:35 +0300 Subject: [PATCH 059/440] dt-bindings: net: dsa: fix typo in dsa-tag-protocol description There is a trivial typo when spelling "protocol", fix it. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/dsa/dsa.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/net/dsa/dsa.yaml b/Documentation/devicetree/bindings/net/dsa/dsa.yaml index 16aa192c118e..51f243244f7b 100644 --- a/Documentation/devicetree/bindings/net/dsa/dsa.yaml +++ b/Documentation/devicetree/bindings/net/dsa/dsa.yaml @@ -73,7 +73,7 @@ patternProperties: dsa-tag-protocol: description: Instead of the default, the switch will use this tag protocol if - possible. Useful when a device supports multiple protcols and + possible. Useful when a device supports multiple protocols and the default is incompatible with the Ethernet device. enum: - dsa From 7932d53162dc6550fc56b013da32c0975784647c Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 9 Oct 2021 15:27:36 +0300 Subject: [PATCH 060/440] dt-bindings: net: dsa: document felix family in dsa-tag-protocol The Vitesse/Microsemi/Microchip family of switches supported by the Felix DSA driver is capable of selecting between the native tagging protocol and ocelot-8021q. This is necessary to enable flow control on the CPU port. Certain systems where these switches are integrated use the switch as a port multiplexer, so the termination throughput is paramount. Changing the tagging protocol at runtime is possible for these systems, but since it is known beforehand that one tagging protocol will provide strictly better performance than the other, just allow them to specify the preference in the device tree. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/dsa/dsa.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/devicetree/bindings/net/dsa/dsa.yaml b/Documentation/devicetree/bindings/net/dsa/dsa.yaml index 51f243244f7b..224cfa45de9a 100644 --- a/Documentation/devicetree/bindings/net/dsa/dsa.yaml +++ b/Documentation/devicetree/bindings/net/dsa/dsa.yaml @@ -78,6 +78,9 @@ patternProperties: enum: - dsa - edsa + - ocelot + - ocelot-8021q + - seville phy-handle: true From c1634b118e846d4d53a3ebe63c0320d23fcb6cc8 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Sat, 9 Oct 2021 11:45:15 -0700 Subject: [PATCH 061/440] ionic: add filterlist to debugfs Dump the filter list to debugfs - includes the device-assigned filter id and the sync'd-to-hardware status. Signed-off-by: Shannon Nelson Signed-off-by: David S. Miller --- .../ethernet/pensando/ionic/ionic_debugfs.c | 46 +++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_debugfs.c b/drivers/net/ethernet/pensando/ionic/ionic_debugfs.c index 86b79430c2ad..c58217027564 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_debugfs.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_debugfs.c @@ -226,6 +226,50 @@ static int netdev_show(struct seq_file *seq, void *v) } DEFINE_SHOW_ATTRIBUTE(netdev); +static int lif_filters_show(struct seq_file *seq, void *v) +{ + struct ionic_lif *lif = seq->private; + struct ionic_rx_filter *f; + struct hlist_head *head; + struct hlist_node *tmp; + unsigned int i; + + seq_puts(seq, "id flow state type filter\n"); + spin_lock_bh(&lif->rx_filters.lock); + for (i = 0; i < IONIC_RX_FILTER_HLISTS; i++) { + head = &lif->rx_filters.by_id[i]; + hlist_for_each_entry_safe(f, tmp, head, by_id) { + switch (le16_to_cpu(f->cmd.match)) { + case IONIC_RX_FILTER_MATCH_VLAN: + seq_printf(seq, "0x%04x 0x%08x 0x%02x vlan 0x%04x\n", + f->filter_id, f->flow_id, f->state, + le16_to_cpu(f->cmd.vlan.vlan)); + break; + case IONIC_RX_FILTER_MATCH_MAC: + seq_printf(seq, "0x%04x 0x%08x 0x%02x mac %pM\n", + f->filter_id, f->flow_id, f->state, + f->cmd.mac.addr); + break; + case IONIC_RX_FILTER_MATCH_MAC_VLAN: + seq_printf(seq, "0x%04x 0x%08x 0x%02x macvl 0x%04x %pM\n", + f->filter_id, f->flow_id, f->state, + le16_to_cpu(f->cmd.vlan.vlan), + f->cmd.mac.addr); + break; + case IONIC_RX_FILTER_STEER_PKTCLASS: + seq_printf(seq, "0x%04x 0x%08x 0x%02x rxstr 0x%llx\n", + f->filter_id, f->flow_id, f->state, + le64_to_cpu(f->cmd.pkt_class)); + break; + } + } + } + spin_unlock_bh(&lif->rx_filters.lock); + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(lif_filters); + void ionic_debugfs_add_lif(struct ionic_lif *lif) { struct dentry *lif_dentry; @@ -237,6 +281,8 @@ void ionic_debugfs_add_lif(struct ionic_lif *lif) debugfs_create_file("netdev", 0400, lif->dentry, lif->netdev, &netdev_fops); + debugfs_create_file("filters", 0400, lif->dentry, + lif, &lif_filters_fops); } void ionic_debugfs_del_lif(struct ionic_lif *lif) From 1d4ddc4a5370793a24d37b9bd1e2cb52f42e6d65 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Sat, 9 Oct 2021 11:45:16 -0700 Subject: [PATCH 062/440] ionic: move lif mac address functions The routines that add and delete mac addresses from the firmware really should be in the file with the rest of the filter management. This simply moves the functions with no logic changes. Signed-off-by: Shannon Nelson Signed-off-by: David S. Miller --- .../net/ethernet/pensando/ionic/ionic_lif.c | 131 ------------------ .../ethernet/pensando/ionic/ionic_rx_filter.c | 131 ++++++++++++++++++ 2 files changed, 131 insertions(+), 131 deletions(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index 775ce83a1b28..968403a01477 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -1242,137 +1242,6 @@ void ionic_get_stats64(struct net_device *netdev, ns->tx_errors = ns->tx_aborted_errors; } -int ionic_lif_addr_add(struct ionic_lif *lif, const u8 *addr) -{ - struct ionic_admin_ctx ctx = { - .work = COMPLETION_INITIALIZER_ONSTACK(ctx.work), - .cmd.rx_filter_add = { - .opcode = IONIC_CMD_RX_FILTER_ADD, - .lif_index = cpu_to_le16(lif->index), - .match = cpu_to_le16(IONIC_RX_FILTER_MATCH_MAC), - }, - }; - int nfilters = le32_to_cpu(lif->identity->eth.max_ucast_filters); - bool mc = is_multicast_ether_addr(addr); - struct ionic_rx_filter *f; - int err = 0; - - memcpy(ctx.cmd.rx_filter_add.mac.addr, addr, ETH_ALEN); - - spin_lock_bh(&lif->rx_filters.lock); - f = ionic_rx_filter_by_addr(lif, addr); - if (f) { - /* don't bother if we already have it and it is sync'd */ - if (f->state == IONIC_FILTER_STATE_SYNCED) { - spin_unlock_bh(&lif->rx_filters.lock); - return 0; - } - - /* mark preemptively as sync'd to block any parallel attempts */ - f->state = IONIC_FILTER_STATE_SYNCED; - } else { - /* save as SYNCED to catch any DEL requests while processing */ - err = ionic_rx_filter_save(lif, 0, IONIC_RXQ_INDEX_ANY, 0, &ctx, - IONIC_FILTER_STATE_SYNCED); - } - spin_unlock_bh(&lif->rx_filters.lock); - if (err) - return err; - - netdev_dbg(lif->netdev, "rx_filter add ADDR %pM\n", addr); - - /* Don't bother with the write to FW if we know there's no room, - * we can try again on the next sync attempt. - */ - if ((lif->nucast + lif->nmcast) >= nfilters) - err = -ENOSPC; - else - err = ionic_adminq_post_wait(lif, &ctx); - - spin_lock_bh(&lif->rx_filters.lock); - if (err && err != -EEXIST) { - /* set the state back to NEW so we can try again later */ - f = ionic_rx_filter_by_addr(lif, addr); - if (f && f->state == IONIC_FILTER_STATE_SYNCED) { - f->state = IONIC_FILTER_STATE_NEW; - set_bit(IONIC_LIF_F_FILTER_SYNC_NEEDED, lif->state); - } - - spin_unlock_bh(&lif->rx_filters.lock); - - if (err == -ENOSPC) - return 0; - else - return err; - } - - if (mc) - lif->nmcast++; - else - lif->nucast++; - - f = ionic_rx_filter_by_addr(lif, addr); - if (f && f->state == IONIC_FILTER_STATE_OLD) { - /* Someone requested a delete while we were adding - * so update the filter info with the results from the add - * and the data will be there for the delete on the next - * sync cycle. - */ - err = ionic_rx_filter_save(lif, 0, IONIC_RXQ_INDEX_ANY, 0, &ctx, - IONIC_FILTER_STATE_OLD); - } else { - err = ionic_rx_filter_save(lif, 0, IONIC_RXQ_INDEX_ANY, 0, &ctx, - IONIC_FILTER_STATE_SYNCED); - } - - spin_unlock_bh(&lif->rx_filters.lock); - - return err; -} - -int ionic_lif_addr_del(struct ionic_lif *lif, const u8 *addr) -{ - struct ionic_admin_ctx ctx = { - .work = COMPLETION_INITIALIZER_ONSTACK(ctx.work), - .cmd.rx_filter_del = { - .opcode = IONIC_CMD_RX_FILTER_DEL, - .lif_index = cpu_to_le16(lif->index), - }, - }; - struct ionic_rx_filter *f; - int state; - int err; - - spin_lock_bh(&lif->rx_filters.lock); - f = ionic_rx_filter_by_addr(lif, addr); - if (!f) { - spin_unlock_bh(&lif->rx_filters.lock); - return -ENOENT; - } - - netdev_dbg(lif->netdev, "rx_filter del ADDR %pM (id %d)\n", - addr, f->filter_id); - - state = f->state; - ctx.cmd.rx_filter_del.filter_id = cpu_to_le32(f->filter_id); - ionic_rx_filter_free(lif, f); - - if (is_multicast_ether_addr(addr) && lif->nmcast) - lif->nmcast--; - else if (!is_multicast_ether_addr(addr) && lif->nucast) - lif->nucast--; - - spin_unlock_bh(&lif->rx_filters.lock); - - if (state != IONIC_FILTER_STATE_NEW) { - err = ionic_adminq_post_wait(lif, &ctx); - if (err && err != -EEXIST) - return err; - } - - return 0; -} - static int ionic_addr_add(struct net_device *netdev, const u8 *addr) { return ionic_lif_list_addr(netdev_priv(netdev), addr, ADD_ADDR); diff --git a/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c b/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c index 69728f9013cb..19115d966693 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c @@ -286,6 +286,137 @@ int ionic_lif_list_addr(struct ionic_lif *lif, const u8 *addr, bool mode) return 0; } +int ionic_lif_addr_add(struct ionic_lif *lif, const u8 *addr) +{ + struct ionic_admin_ctx ctx = { + .work = COMPLETION_INITIALIZER_ONSTACK(ctx.work), + .cmd.rx_filter_add = { + .opcode = IONIC_CMD_RX_FILTER_ADD, + .lif_index = cpu_to_le16(lif->index), + .match = cpu_to_le16(IONIC_RX_FILTER_MATCH_MAC), + }, + }; + int nfilters = le32_to_cpu(lif->identity->eth.max_ucast_filters); + bool mc = is_multicast_ether_addr(addr); + struct ionic_rx_filter *f; + int err = 0; + + memcpy(ctx.cmd.rx_filter_add.mac.addr, addr, ETH_ALEN); + + spin_lock_bh(&lif->rx_filters.lock); + f = ionic_rx_filter_by_addr(lif, addr); + if (f) { + /* don't bother if we already have it and it is sync'd */ + if (f->state == IONIC_FILTER_STATE_SYNCED) { + spin_unlock_bh(&lif->rx_filters.lock); + return 0; + } + + /* mark preemptively as sync'd to block any parallel attempts */ + f->state = IONIC_FILTER_STATE_SYNCED; + } else { + /* save as SYNCED to catch any DEL requests while processing */ + err = ionic_rx_filter_save(lif, 0, IONIC_RXQ_INDEX_ANY, 0, &ctx, + IONIC_FILTER_STATE_SYNCED); + } + spin_unlock_bh(&lif->rx_filters.lock); + if (err) + return err; + + netdev_dbg(lif->netdev, "rx_filter add ADDR %pM\n", addr); + + /* Don't bother with the write to FW if we know there's no room, + * we can try again on the next sync attempt. + */ + if ((lif->nucast + lif->nmcast) >= nfilters) + err = -ENOSPC; + else + err = ionic_adminq_post_wait(lif, &ctx); + + spin_lock_bh(&lif->rx_filters.lock); + if (err && err != -EEXIST) { + /* set the state back to NEW so we can try again later */ + f = ionic_rx_filter_by_addr(lif, addr); + if (f && f->state == IONIC_FILTER_STATE_SYNCED) { + f->state = IONIC_FILTER_STATE_NEW; + set_bit(IONIC_LIF_F_FILTER_SYNC_NEEDED, lif->state); + } + + spin_unlock_bh(&lif->rx_filters.lock); + + if (err == -ENOSPC) + return 0; + else + return err; + } + + if (mc) + lif->nmcast++; + else + lif->nucast++; + + f = ionic_rx_filter_by_addr(lif, addr); + if (f && f->state == IONIC_FILTER_STATE_OLD) { + /* Someone requested a delete while we were adding + * so update the filter info with the results from the add + * and the data will be there for the delete on the next + * sync cycle. + */ + err = ionic_rx_filter_save(lif, 0, IONIC_RXQ_INDEX_ANY, 0, &ctx, + IONIC_FILTER_STATE_OLD); + } else { + err = ionic_rx_filter_save(lif, 0, IONIC_RXQ_INDEX_ANY, 0, &ctx, + IONIC_FILTER_STATE_SYNCED); + } + + spin_unlock_bh(&lif->rx_filters.lock); + + return err; +} + +int ionic_lif_addr_del(struct ionic_lif *lif, const u8 *addr) +{ + struct ionic_admin_ctx ctx = { + .work = COMPLETION_INITIALIZER_ONSTACK(ctx.work), + .cmd.rx_filter_del = { + .opcode = IONIC_CMD_RX_FILTER_DEL, + .lif_index = cpu_to_le16(lif->index), + }, + }; + struct ionic_rx_filter *f; + int state; + int err; + + spin_lock_bh(&lif->rx_filters.lock); + f = ionic_rx_filter_by_addr(lif, addr); + if (!f) { + spin_unlock_bh(&lif->rx_filters.lock); + return -ENOENT; + } + + netdev_dbg(lif->netdev, "rx_filter del ADDR %pM (id %d)\n", + addr, f->filter_id); + + state = f->state; + ctx.cmd.rx_filter_del.filter_id = cpu_to_le32(f->filter_id); + ionic_rx_filter_free(lif, f); + + if (is_multicast_ether_addr(addr) && lif->nmcast) + lif->nmcast--; + else if (!is_multicast_ether_addr(addr) && lif->nucast) + lif->nucast--; + + spin_unlock_bh(&lif->rx_filters.lock); + + if (state != IONIC_FILTER_STATE_NEW) { + err = ionic_adminq_post_wait(lif, &ctx); + if (err && err != -EEXIST) + return err; + } + + return 0; +} + struct sync_item { struct list_head list; struct ionic_rx_filter f; From 4ed642cc6538abc8652a7acaf3b94d5356f196a0 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Sat, 9 Oct 2021 11:45:17 -0700 Subject: [PATCH 063/440] ionic: remove mac overflow flags The overflow flags really aren't useful and we don't need lif struct elements to track them. Signed-off-by: Shannon Nelson Signed-off-by: David S. Miller --- drivers/net/ethernet/pensando/ionic/ionic_lif.c | 7 ++----- drivers/net/ethernet/pensando/ionic/ionic_lif.h | 2 -- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index 968403a01477..4a080612142a 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -1283,14 +1283,11 @@ void ionic_lif_rx_mode(struct ionic_lif *lif) * to see if we can disable NIC PROMISC */ nfilters = le32_to_cpu(lif->identity->eth.max_ucast_filters); + if ((lif->nucast + lif->nmcast) >= nfilters) { rx_mode |= IONIC_RX_MODE_F_PROMISC; rx_mode |= IONIC_RX_MODE_F_ALLMULTI; - lif->uc_overflow = true; - lif->mc_overflow = true; - } else if (lif->uc_overflow) { - lif->uc_overflow = false; - lif->mc_overflow = false; + } else { if (!(nd_flags & IFF_PROMISC)) rx_mode &= ~IONIC_RX_MODE_F_PROMISC; if (!(nd_flags & IFF_ALLMULTI)) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.h b/drivers/net/ethernet/pensando/ionic/ionic_lif.h index 41f28154745f..541aa54e4ffd 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.h @@ -189,8 +189,6 @@ struct ionic_lif { u16 rx_mode; u64 hw_features; bool registered; - bool mc_overflow; - bool uc_overflow; u16 lif_type; unsigned int nmcast; unsigned int nucast; From ff542fbe5d552a2e34b82a6c845390999f952b24 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Sat, 9 Oct 2021 11:45:18 -0700 Subject: [PATCH 064/440] ionic: add generic filter search In preparation for enhancing vlan filter management, add a filter search routine that can figure out for itself which type of filter search is needed. Signed-off-by: Shannon Nelson Signed-off-by: David S. Miller --- .../ethernet/pensando/ionic/ionic_rx_filter.c | 21 ++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c b/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c index 19115d966693..38109244a722 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c @@ -239,6 +239,21 @@ struct ionic_rx_filter *ionic_rx_filter_rxsteer(struct ionic_lif *lif) return NULL; } +static struct ionic_rx_filter *ionic_rx_filter_find(struct ionic_lif *lif, + struct ionic_rx_filter_add_cmd *ac) +{ + switch (le16_to_cpu(ac->match)) { + case IONIC_RX_FILTER_MATCH_VLAN: + return ionic_rx_filter_by_vlan(lif, le16_to_cpu(ac->vlan.vlan)); + case IONIC_RX_FILTER_MATCH_MAC: + return ionic_rx_filter_by_addr(lif, ac->mac.addr); + default: + netdev_err(lif->netdev, "unsupported filter match %d", + le16_to_cpu(ac->match)); + return NULL; + } +} + int ionic_lif_list_addr(struct ionic_lif *lif, const u8 *addr, bool mode) { struct ionic_rx_filter *f; @@ -304,7 +319,7 @@ int ionic_lif_addr_add(struct ionic_lif *lif, const u8 *addr) memcpy(ctx.cmd.rx_filter_add.mac.addr, addr, ETH_ALEN); spin_lock_bh(&lif->rx_filters.lock); - f = ionic_rx_filter_by_addr(lif, addr); + f = ionic_rx_filter_find(lif, &ctx.cmd.rx_filter_add); if (f) { /* don't bother if we already have it and it is sync'd */ if (f->state == IONIC_FILTER_STATE_SYNCED) { @@ -336,7 +351,7 @@ int ionic_lif_addr_add(struct ionic_lif *lif, const u8 *addr) spin_lock_bh(&lif->rx_filters.lock); if (err && err != -EEXIST) { /* set the state back to NEW so we can try again later */ - f = ionic_rx_filter_by_addr(lif, addr); + f = ionic_rx_filter_find(lif, &ctx.cmd.rx_filter_add); if (f && f->state == IONIC_FILTER_STATE_SYNCED) { f->state = IONIC_FILTER_STATE_NEW; set_bit(IONIC_LIF_F_FILTER_SYNC_NEEDED, lif->state); @@ -355,7 +370,7 @@ int ionic_lif_addr_add(struct ionic_lif *lif, const u8 *addr) else lif->nucast++; - f = ionic_rx_filter_by_addr(lif, addr); + f = ionic_rx_filter_find(lif, &ctx.cmd.rx_filter_add); if (f && f->state == IONIC_FILTER_STATE_OLD) { /* Someone requested a delete while we were adding * so update the filter info with the results from the add From eba688b15d34cbe721b30919d4f3e373defda8f1 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Sat, 9 Oct 2021 11:45:19 -0700 Subject: [PATCH 065/440] ionic: generic filter add In preparation for adding vlan overflow management, rework the ionic_lif_addr_add() function to something a little more generic that can be used for other filter types. Signed-off-by: Shannon Nelson Signed-off-by: David S. Miller --- .../ethernet/pensando/ionic/ionic_rx_filter.c | 68 +++++++++++++------ 1 file changed, 46 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c b/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c index 38109244a722..10837e41f819 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c @@ -301,22 +301,19 @@ int ionic_lif_list_addr(struct ionic_lif *lif, const u8 *addr, bool mode) return 0; } -int ionic_lif_addr_add(struct ionic_lif *lif, const u8 *addr) +static int ionic_lif_filter_add(struct ionic_lif *lif, + struct ionic_rx_filter_add_cmd *ac) { struct ionic_admin_ctx ctx = { .work = COMPLETION_INITIALIZER_ONSTACK(ctx.work), - .cmd.rx_filter_add = { - .opcode = IONIC_CMD_RX_FILTER_ADD, - .lif_index = cpu_to_le16(lif->index), - .match = cpu_to_le16(IONIC_RX_FILTER_MATCH_MAC), - }, }; - int nfilters = le32_to_cpu(lif->identity->eth.max_ucast_filters); - bool mc = is_multicast_ether_addr(addr); struct ionic_rx_filter *f; + int nfilters; int err = 0; - memcpy(ctx.cmd.rx_filter_add.mac.addr, addr, ETH_ALEN); + ctx.cmd.rx_filter_add = *ac; + ctx.cmd.rx_filter_add.opcode = IONIC_CMD_RX_FILTER_ADD, + ctx.cmd.rx_filter_add.lif_index = cpu_to_le16(lif->index), spin_lock_bh(&lif->rx_filters.lock); f = ionic_rx_filter_find(lif, &ctx.cmd.rx_filter_add); @@ -338,37 +335,53 @@ int ionic_lif_addr_add(struct ionic_lif *lif, const u8 *addr) if (err) return err; - netdev_dbg(lif->netdev, "rx_filter add ADDR %pM\n", addr); - /* Don't bother with the write to FW if we know there's no room, * we can try again on the next sync attempt. */ - if ((lif->nucast + lif->nmcast) >= nfilters) - err = -ENOSPC; - else + switch (le16_to_cpu(ctx.cmd.rx_filter_add.match)) { + case IONIC_RX_FILTER_MATCH_MAC: + netdev_dbg(lif->netdev, "%s: rx_filter add ADDR %pM\n", + __func__, ctx.cmd.rx_filter_add.mac.addr); + nfilters = le32_to_cpu(lif->identity->eth.max_ucast_filters); + if ((lif->nucast + lif->nmcast) >= nfilters) + err = -ENOSPC; + break; + } + + if (err != -ENOSPC) err = ionic_adminq_post_wait(lif, &ctx); spin_lock_bh(&lif->rx_filters.lock); + if (err && err != -EEXIST) { /* set the state back to NEW so we can try again later */ f = ionic_rx_filter_find(lif, &ctx.cmd.rx_filter_add); if (f && f->state == IONIC_FILTER_STATE_SYNCED) { f->state = IONIC_FILTER_STATE_NEW; - set_bit(IONIC_LIF_F_FILTER_SYNC_NEEDED, lif->state); + + /* If -ENOSPC we won't waste time trying to sync again + * until there is a delete that might make room + */ + if (err != -ENOSPC) + set_bit(IONIC_LIF_F_FILTER_SYNC_NEEDED, lif->state); } spin_unlock_bh(&lif->rx_filters.lock); if (err == -ENOSPC) return 0; - else - return err; + + return err; } - if (mc) - lif->nmcast++; - else - lif->nucast++; + switch (le16_to_cpu(ctx.cmd.rx_filter_add.match)) { + case IONIC_RX_FILTER_MATCH_MAC: + if (is_multicast_ether_addr(ctx.cmd.rx_filter_add.mac.addr)) + lif->nmcast++; + else + lif->nucast++; + break; + } f = ionic_rx_filter_find(lif, &ctx.cmd.rx_filter_add); if (f && f->state == IONIC_FILTER_STATE_OLD) { @@ -389,6 +402,17 @@ int ionic_lif_addr_add(struct ionic_lif *lif, const u8 *addr) return err; } +int ionic_lif_addr_add(struct ionic_lif *lif, const u8 *addr) +{ + struct ionic_rx_filter_add_cmd ac = { + .match = cpu_to_le16(IONIC_RX_FILTER_MATCH_MAC), + }; + + memcpy(&ac.mac.addr, addr, ETH_ALEN); + + return ionic_lif_filter_add(lif, &ac); +} + int ionic_lif_addr_del(struct ionic_lif *lif, const u8 *addr) { struct ionic_admin_ctx ctx = { @@ -493,7 +517,7 @@ loop_out: } list_for_each_entry_safe(sync_item, spos, &sync_add_list, list) { - (void)ionic_lif_addr_add(lif, sync_item->f.cmd.mac.addr); + (void)ionic_lif_filter_add(lif, &sync_item->f.cmd); list_del(&sync_item->list); devm_kfree(dev, sync_item); From c2b63d3449d327ef46e1d074cd97fe22193d8aa1 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Sat, 9 Oct 2021 11:45:20 -0700 Subject: [PATCH 066/440] ionic: generic filter delete Similar to the filter add, make a generic filter delete. Signed-off-by: Shannon Nelson Signed-off-by: David S. Miller --- .../ethernet/pensando/ionic/ionic_rx_filter.c | 35 +++++++++++++------ 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c b/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c index 10837e41f819..40a12b9df982 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c @@ -413,7 +413,8 @@ int ionic_lif_addr_add(struct ionic_lif *lif, const u8 *addr) return ionic_lif_filter_add(lif, &ac); } -int ionic_lif_addr_del(struct ionic_lif *lif, const u8 *addr) +static int ionic_lif_filter_del(struct ionic_lif *lif, + struct ionic_rx_filter_add_cmd *ac) { struct ionic_admin_ctx ctx = { .work = COMPLETION_INITIALIZER_ONSTACK(ctx.work), @@ -427,24 +428,27 @@ int ionic_lif_addr_del(struct ionic_lif *lif, const u8 *addr) int err; spin_lock_bh(&lif->rx_filters.lock); - f = ionic_rx_filter_by_addr(lif, addr); + f = ionic_rx_filter_find(lif, ac); if (!f) { spin_unlock_bh(&lif->rx_filters.lock); return -ENOENT; } - netdev_dbg(lif->netdev, "rx_filter del ADDR %pM (id %d)\n", - addr, f->filter_id); + switch (le16_to_cpu(ac->match)) { + case IONIC_RX_FILTER_MATCH_MAC: + netdev_dbg(lif->netdev, "%s: rx_filter del ADDR %pM id %d\n", + __func__, ac->mac.addr, f->filter_id); + if (is_multicast_ether_addr(ac->mac.addr) && lif->nmcast) + lif->nmcast--; + else if (!is_multicast_ether_addr(ac->mac.addr) && lif->nucast) + lif->nucast--; + break; + } state = f->state; ctx.cmd.rx_filter_del.filter_id = cpu_to_le32(f->filter_id); ionic_rx_filter_free(lif, f); - if (is_multicast_ether_addr(addr) && lif->nmcast) - lif->nmcast--; - else if (!is_multicast_ether_addr(addr) && lif->nucast) - lif->nucast--; - spin_unlock_bh(&lif->rx_filters.lock); if (state != IONIC_FILTER_STATE_NEW) { @@ -456,6 +460,17 @@ int ionic_lif_addr_del(struct ionic_lif *lif, const u8 *addr) return 0; } +int ionic_lif_addr_del(struct ionic_lif *lif, const u8 *addr) +{ + struct ionic_rx_filter_add_cmd ac = { + .match = cpu_to_le16(IONIC_RX_FILTER_MATCH_MAC), + }; + + memcpy(&ac.mac.addr, addr, ETH_ALEN); + + return ionic_lif_filter_del(lif, &ac); +} + struct sync_item { struct list_head list; struct ionic_rx_filter f; @@ -510,7 +525,7 @@ loop_out: * they can clear room for some new filters */ list_for_each_entry_safe(sync_item, spos, &sync_del_list, list) { - (void)ionic_lif_addr_del(lif, sync_item->f.cmd.mac.addr); + (void)ionic_lif_filter_del(lif, &sync_item->f.cmd); list_del(&sync_item->list); devm_kfree(dev, sync_item); From 9b0b6ba6226e02def72e3b6985989c2d6b9c69e2 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Sat, 9 Oct 2021 11:45:21 -0700 Subject: [PATCH 067/440] ionic: handle vlan id overflow Add vlans to the existing rx_filter_sync mechanics currently used for managing mac filters. Older versions of our firmware had no enforced limits on the number of vlans that the LIF could request, but requesting large numbers of vlans caused issues in FW memory management, so an arbitrary limit was added in the FW. The FW now returns -ENOSPC when it hits that limit, which the driver needs to handle. Unfortunately, the FW doesn't advertise the vlan id limit, as it does with mac filters, so the driver won't know the limit until it bumps into it. We'll grab the current vlan id count and use that as the limit from there on and thus prevent getting any more -ENOSPC errors. Just as is done for the mac filters, the device puts the device into promiscuous mode when -ENOSPC is seen for vlan ids, and the driver will track the vlans that aren't synced to the FW. When vlans are removed, the driver will retry the un-synced vlans. If all outstanding vlans are synced, the promiscuous mode will be disabled. Signed-off-by: Shannon Nelson Signed-off-by: David S. Miller --- .../net/ethernet/pensando/ionic/ionic_lif.c | 52 +++++-------------- .../net/ethernet/pensando/ionic/ionic_lif.h | 2 + .../ethernet/pensando/ionic/ionic_rx_filter.c | 41 ++++++++++++++- .../ethernet/pensando/ionic/ionic_rx_filter.h | 2 + 4 files changed, 56 insertions(+), 41 deletions(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index 4a080612142a..893a80e36632 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -1273,7 +1273,7 @@ void ionic_lif_rx_mode(struct ionic_lif *lif) rx_mode |= (nd_flags & IFF_PROMISC) ? IONIC_RX_MODE_F_PROMISC : 0; rx_mode |= (nd_flags & IFF_ALLMULTI) ? IONIC_RX_MODE_F_ALLMULTI : 0; - /* sync the mac filters */ + /* sync the filters */ ionic_rx_filter_sync(lif); /* check for overflow state @@ -1284,7 +1284,8 @@ void ionic_lif_rx_mode(struct ionic_lif *lif) */ nfilters = le32_to_cpu(lif->identity->eth.max_ucast_filters); - if ((lif->nucast + lif->nmcast) >= nfilters) { + if (((lif->nucast + lif->nmcast) >= nfilters) || + (lif->max_vlans && lif->nvlans >= lif->max_vlans)) { rx_mode |= IONIC_RX_MODE_F_PROMISC; rx_mode |= IONIC_RX_MODE_F_ALLMULTI; } else { @@ -1672,59 +1673,30 @@ static int ionic_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, u16 vid) { struct ionic_lif *lif = netdev_priv(netdev); - struct ionic_admin_ctx ctx = { - .work = COMPLETION_INITIALIZER_ONSTACK(ctx.work), - .cmd.rx_filter_add = { - .opcode = IONIC_CMD_RX_FILTER_ADD, - .lif_index = cpu_to_le16(lif->index), - .match = cpu_to_le16(IONIC_RX_FILTER_MATCH_VLAN), - .vlan.vlan = cpu_to_le16(vid), - }, - }; int err; - netdev_dbg(netdev, "rx_filter add VLAN %d\n", vid); - err = ionic_adminq_post_wait(lif, &ctx); + err = ionic_lif_vlan_add(lif, vid); if (err) return err; - spin_lock_bh(&lif->rx_filters.lock); - err = ionic_rx_filter_save(lif, 0, IONIC_RXQ_INDEX_ANY, 0, &ctx, - IONIC_FILTER_STATE_SYNCED); - spin_unlock_bh(&lif->rx_filters.lock); + ionic_lif_rx_mode(lif); - return err; + return 0; } static int ionic_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid) { struct ionic_lif *lif = netdev_priv(netdev); - struct ionic_admin_ctx ctx = { - .work = COMPLETION_INITIALIZER_ONSTACK(ctx.work), - .cmd.rx_filter_del = { - .opcode = IONIC_CMD_RX_FILTER_DEL, - .lif_index = cpu_to_le16(lif->index), - }, - }; - struct ionic_rx_filter *f; + int err; - spin_lock_bh(&lif->rx_filters.lock); + err = ionic_lif_vlan_del(lif, vid); + if (err) + return err; - f = ionic_rx_filter_by_vlan(lif, vid); - if (!f) { - spin_unlock_bh(&lif->rx_filters.lock); - return -ENOENT; - } + ionic_lif_rx_mode(lif); - netdev_dbg(netdev, "rx_filter del VLAN %d (id %d)\n", - vid, f->filter_id); - - ctx.cmd.rx_filter_del.filter_id = cpu_to_le32(f->filter_id); - ionic_rx_filter_free(lif, f); - spin_unlock_bh(&lif->rx_filters.lock); - - return ionic_adminq_post_wait(lif, &ctx); + return 0; } int ionic_lif_rss_config(struct ionic_lif *lif, const u16 types, diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.h b/drivers/net/ethernet/pensando/ionic/ionic_lif.h index 541aa54e4ffd..9f7ab2f17f93 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.h @@ -192,6 +192,8 @@ struct ionic_lif { u16 lif_type; unsigned int nmcast; unsigned int nucast; + unsigned int nvlans; + unsigned int max_vlans; char name[IONIC_LIF_NAME_MAX_SZ]; union ionic_lif_identity *identity; diff --git a/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c b/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c index 40a12b9df982..366f15794866 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c @@ -337,8 +337,16 @@ static int ionic_lif_filter_add(struct ionic_lif *lif, /* Don't bother with the write to FW if we know there's no room, * we can try again on the next sync attempt. + * Since the FW doesn't have a way to tell us the vlan limit, + * we start max_vlans at 0 until we hit the ENOSPC error. */ switch (le16_to_cpu(ctx.cmd.rx_filter_add.match)) { + case IONIC_RX_FILTER_MATCH_VLAN: + netdev_dbg(lif->netdev, "%s: rx_filter add VLAN %d\n", + __func__, ctx.cmd.rx_filter_add.vlan.vlan); + if (lif->max_vlans && lif->nvlans >= lif->max_vlans) + err = -ENOSPC; + break; case IONIC_RX_FILTER_MATCH_MAC: netdev_dbg(lif->netdev, "%s: rx_filter add ADDR %pM\n", __func__, ctx.cmd.rx_filter_add.mac.addr); @@ -368,13 +376,19 @@ static int ionic_lif_filter_add(struct ionic_lif *lif, spin_unlock_bh(&lif->rx_filters.lock); - if (err == -ENOSPC) + if (err == -ENOSPC) { + if (le16_to_cpu(ctx.cmd.rx_filter_add.match) == IONIC_RX_FILTER_MATCH_VLAN) + lif->max_vlans = lif->nvlans; return 0; + } return err; } switch (le16_to_cpu(ctx.cmd.rx_filter_add.match)) { + case IONIC_RX_FILTER_MATCH_VLAN: + lif->nvlans++; + break; case IONIC_RX_FILTER_MATCH_MAC: if (is_multicast_ether_addr(ctx.cmd.rx_filter_add.mac.addr)) lif->nmcast++; @@ -413,6 +427,16 @@ int ionic_lif_addr_add(struct ionic_lif *lif, const u8 *addr) return ionic_lif_filter_add(lif, &ac); } +int ionic_lif_vlan_add(struct ionic_lif *lif, const u16 vid) +{ + struct ionic_rx_filter_add_cmd ac = { + .match = cpu_to_le16(IONIC_RX_FILTER_MATCH_VLAN), + .vlan.vlan = cpu_to_le16(vid), + }; + + return ionic_lif_filter_add(lif, &ac); +} + static int ionic_lif_filter_del(struct ionic_lif *lif, struct ionic_rx_filter_add_cmd *ac) { @@ -435,6 +459,11 @@ static int ionic_lif_filter_del(struct ionic_lif *lif, } switch (le16_to_cpu(ac->match)) { + case IONIC_RX_FILTER_MATCH_VLAN: + netdev_dbg(lif->netdev, "%s: rx_filter del VLAN %d id %d\n", + __func__, ac->vlan.vlan, f->filter_id); + lif->nvlans--; + break; case IONIC_RX_FILTER_MATCH_MAC: netdev_dbg(lif->netdev, "%s: rx_filter del ADDR %pM id %d\n", __func__, ac->mac.addr, f->filter_id); @@ -471,6 +500,16 @@ int ionic_lif_addr_del(struct ionic_lif *lif, const u8 *addr) return ionic_lif_filter_del(lif, &ac); } +int ionic_lif_vlan_del(struct ionic_lif *lif, const u16 vid) +{ + struct ionic_rx_filter_add_cmd ac = { + .match = cpu_to_le16(IONIC_RX_FILTER_MATCH_VLAN), + .vlan.vlan = cpu_to_le16(vid), + }; + + return ionic_lif_filter_del(lif, &ac); +} + struct sync_item { struct list_head list; struct ionic_rx_filter f; diff --git a/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.h b/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.h index a66e35f0833b..87b2666f248b 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.h @@ -44,5 +44,7 @@ struct ionic_rx_filter *ionic_rx_filter_rxsteer(struct ionic_lif *lif); void ionic_rx_filter_sync(struct ionic_lif *lif); int ionic_lif_list_addr(struct ionic_lif *lif, const u8 *addr, bool mode); int ionic_rx_filters_need_sync(struct ionic_lif *lif); +int ionic_lif_vlan_add(struct ionic_lif *lif, const u16 vid); +int ionic_lif_vlan_del(struct ionic_lif *lif, const u16 vid); #endif /* _IONIC_RX_FILTER_H_ */ From 8c9d956ab6fbd10413398eeb81f433b07c1c23d7 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Sat, 9 Oct 2021 11:45:22 -0700 Subject: [PATCH 068/440] ionic: allow adminq requests to override default error message The AdminQ handler has an error handler that automatically prints an error message when the request has failed. However, there are situations where the caller can expect that it might fail and has an alternative strategy, thus may not want the error message sent to the log, such as hitting -ENOSPC when adding a new vlan id. We add a new interface to the AdminQ API to allow for override of the default behavior, and an interface to the use standard error message formatting. Signed-off-by: Shannon Nelson Signed-off-by: David S. Miller --- drivers/net/ethernet/pensando/ionic/ionic.h | 7 ++- .../net/ethernet/pensando/ionic/ionic_main.c | 47 +++++++++++++------ .../net/ethernet/pensando/ionic/ionic_phc.c | 8 ++-- 3 files changed, 42 insertions(+), 20 deletions(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic.h b/drivers/net/ethernet/pensando/ionic/ionic.h index d570d03b23f6..5e25411ff02f 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic.h +++ b/drivers/net/ethernet/pensando/ionic/ionic.h @@ -70,8 +70,13 @@ struct ionic_admin_ctx { }; int ionic_adminq_post(struct ionic_lif *lif, struct ionic_admin_ctx *ctx); -int ionic_adminq_wait(struct ionic_lif *lif, struct ionic_admin_ctx *ctx, int err); +int ionic_adminq_wait(struct ionic_lif *lif, struct ionic_admin_ctx *ctx, + const int err, const bool do_msg); int ionic_adminq_post_wait(struct ionic_lif *lif, struct ionic_admin_ctx *ctx); +int ionic_adminq_post_wait_nomsg(struct ionic_lif *lif, struct ionic_admin_ctx *ctx); +void ionic_adminq_netdev_err_print(struct ionic_lif *lif, u8 opcode, + u8 status, int err); + int ionic_dev_cmd_wait(struct ionic *ionic, unsigned long max_wait); int ionic_set_dma_mask(struct ionic *ionic); int ionic_setup(struct ionic *ionic); diff --git a/drivers/net/ethernet/pensando/ionic/ionic_main.c b/drivers/net/ethernet/pensando/ionic/ionic_main.c index bb49f1b8ef67..875f4ec42efe 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_main.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_main.c @@ -212,24 +212,28 @@ static void ionic_adminq_flush(struct ionic_lif *lif) spin_unlock_irqrestore(&lif->adminq_lock, irqflags); } +void ionic_adminq_netdev_err_print(struct ionic_lif *lif, u8 opcode, + u8 status, int err) +{ + netdev_err(lif->netdev, "%s (%d) failed: %s (%d)\n", + ionic_opcode_to_str(opcode), opcode, + ionic_error_to_str(status), err); +} + static int ionic_adminq_check_err(struct ionic_lif *lif, struct ionic_admin_ctx *ctx, - bool timeout) + const bool timeout, + const bool do_msg) { - struct net_device *netdev = lif->netdev; - const char *opcode_str; - const char *status_str; int err = 0; if (ctx->comp.comp.status || timeout) { - opcode_str = ionic_opcode_to_str(ctx->cmd.cmd.opcode); - status_str = ionic_error_to_str(ctx->comp.comp.status); err = timeout ? -ETIMEDOUT : ionic_error_to_errno(ctx->comp.comp.status); - netdev_err(netdev, "%s (%d) failed: %s (%d)\n", - opcode_str, ctx->cmd.cmd.opcode, - timeout ? "TIMEOUT" : status_str, err); + if (do_msg) + ionic_adminq_netdev_err_print(lif, ctx->cmd.cmd.opcode, + ctx->comp.comp.status, err); if (timeout) ionic_adminq_flush(lif); @@ -298,7 +302,8 @@ err_out: return err; } -int ionic_adminq_wait(struct ionic_lif *lif, struct ionic_admin_ctx *ctx, int err) +int ionic_adminq_wait(struct ionic_lif *lif, struct ionic_admin_ctx *ctx, + const int err, const bool do_msg) { struct net_device *netdev = lif->netdev; unsigned long time_limit; @@ -310,7 +315,7 @@ int ionic_adminq_wait(struct ionic_lif *lif, struct ionic_admin_ctx *ctx, int er name = ionic_opcode_to_str(ctx->cmd.cmd.opcode); if (err) { - if (!test_bit(IONIC_LIF_F_FW_RESET, lif->state)) + if (do_msg && !test_bit(IONIC_LIF_F_FW_RESET, lif->state)) netdev_err(netdev, "Posting of %s (%d) failed: %d\n", name, ctx->cmd.cmd.opcode, err); return err; @@ -328,8 +333,9 @@ int ionic_adminq_wait(struct ionic_lif *lif, struct ionic_admin_ctx *ctx, int er /* interrupt the wait if FW stopped */ if (test_bit(IONIC_LIF_F_FW_RESET, lif->state)) { - netdev_err(netdev, "%s (%d) interrupted, FW in reset\n", - name, ctx->cmd.cmd.opcode); + if (do_msg) + netdev_err(netdev, "%s (%d) interrupted, FW in reset\n", + name, ctx->cmd.cmd.opcode); return -ENXIO; } @@ -339,7 +345,9 @@ int ionic_adminq_wait(struct ionic_lif *lif, struct ionic_admin_ctx *ctx, int er dev_dbg(lif->ionic->dev, "%s: elapsed %d msecs\n", __func__, jiffies_to_msecs(time_done - time_start)); - return ionic_adminq_check_err(lif, ctx, time_after_eq(time_done, time_limit)); + return ionic_adminq_check_err(lif, ctx, + time_after_eq(time_done, time_limit), + do_msg); } int ionic_adminq_post_wait(struct ionic_lif *lif, struct ionic_admin_ctx *ctx) @@ -348,7 +356,16 @@ int ionic_adminq_post_wait(struct ionic_lif *lif, struct ionic_admin_ctx *ctx) err = ionic_adminq_post(lif, ctx); - return ionic_adminq_wait(lif, ctx, err); + return ionic_adminq_wait(lif, ctx, err, true); +} + +int ionic_adminq_post_wait_nomsg(struct ionic_lif *lif, struct ionic_admin_ctx *ctx) +{ + int err; + + err = ionic_adminq_post(lif, ctx); + + return ionic_adminq_wait(lif, ctx, err, false); } static void ionic_dev_cmd_clean(struct ionic *ionic) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_phc.c b/drivers/net/ethernet/pensando/ionic/ionic_phc.c index eed2db69d708..887046838b3b 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_phc.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_phc.c @@ -348,7 +348,7 @@ static int ionic_phc_adjfine(struct ptp_clock_info *info, long scaled_ppm) spin_unlock_irqrestore(&phc->lock, irqflags); - return ionic_adminq_wait(phc->lif, &ctx, err); + return ionic_adminq_wait(phc->lif, &ctx, err, true); } static int ionic_phc_adjtime(struct ptp_clock_info *info, s64 delta) @@ -373,7 +373,7 @@ static int ionic_phc_adjtime(struct ptp_clock_info *info, s64 delta) spin_unlock_irqrestore(&phc->lock, irqflags); - return ionic_adminq_wait(phc->lif, &ctx, err); + return ionic_adminq_wait(phc->lif, &ctx, err, true); } static int ionic_phc_settime64(struct ptp_clock_info *info, @@ -402,7 +402,7 @@ static int ionic_phc_settime64(struct ptp_clock_info *info, spin_unlock_irqrestore(&phc->lock, irqflags); - return ionic_adminq_wait(phc->lif, &ctx, err); + return ionic_adminq_wait(phc->lif, &ctx, err, true); } static int ionic_phc_gettimex64(struct ptp_clock_info *info, @@ -459,7 +459,7 @@ static long ionic_phc_aux_work(struct ptp_clock_info *info) spin_unlock_irqrestore(&phc->lock, irqflags); - ionic_adminq_wait(phc->lif, &ctx, err); + ionic_adminq_wait(phc->lif, &ctx, err, true); return phc->aux_work_delay; } From f91958cc962225bfb4d135631379a644b1e2d089 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Sat, 9 Oct 2021 11:45:23 -0700 Subject: [PATCH 069/440] ionic: tame the filter no space message Override the automatic AdminQ error message in order to capture the potential No Space message when we hit the max vlan limit, and add additional messaging to detail what filter failed. Signed-off-by: Shannon Nelson Signed-off-by: David S. Miller --- .../net/ethernet/pensando/ionic/ionic_rx_filter.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c b/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c index 366f15794866..f6e785f949f9 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c @@ -357,7 +357,7 @@ static int ionic_lif_filter_add(struct ionic_lif *lif, } if (err != -ENOSPC) - err = ionic_adminq_post_wait(lif, &ctx); + err = ionic_adminq_post_wait_nomsg(lif, &ctx); spin_lock_bh(&lif->rx_filters.lock); @@ -382,6 +382,19 @@ static int ionic_lif_filter_add(struct ionic_lif *lif, return 0; } + ionic_adminq_netdev_err_print(lif, ctx.cmd.cmd.opcode, + ctx.comp.comp.status, err); + switch (le16_to_cpu(ctx.cmd.rx_filter_add.match)) { + case IONIC_RX_FILTER_MATCH_VLAN: + netdev_info(lif->netdev, "rx_filter add failed: VLAN %d\n", + ctx.cmd.rx_filter_add.vlan.vlan); + break; + case IONIC_RX_FILTER_MATCH_MAC: + netdev_info(lif->netdev, "rx_filter add failed: ADDR %pM\n", + ctx.cmd.rx_filter_add.mac.addr); + break; + } + return err; } From 4b70dce2c1b93930fe4728a221a8d8e674c271c5 Mon Sep 17 00:00:00 2001 From: Juhee Kang Date: Sun, 10 Oct 2021 13:03:27 +0900 Subject: [PATCH 070/440] bnxt: use netif_is_rxfh_configured instead of open code The open code which is dev->priv_flags & IFF_RXFH_CONFIGURED is defined as a helper function on netdevice.h. So use netif_is_rxfh_configured() function instead of open code. This patch doesn't change logic. Signed-off-by: Juhee Kang Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +- drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index f05f4271d4c1..800020f4d79d 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -6366,7 +6366,7 @@ static int __bnxt_reserve_rings(struct bnxt *bp) if (rx_rings != bp->rx_nr_rings) { netdev_warn(bp->dev, "Able to reserve only %d out of %d requested RX rings\n", rx_rings, bp->rx_nr_rings); - if ((bp->dev->priv_flags & IFF_RXFH_CONFIGURED) && + if (netif_is_rxfh_configured(bp->dev) && (bnxt_get_nr_rss_ctxs(bp, bp->rx_nr_rings) != bnxt_get_nr_rss_ctxs(bp, rx_rings) || bnxt_get_max_rss_ring(bp) >= rx_rings)) { diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 7260910e75fb..fbb56b1f70fd 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -909,7 +909,7 @@ static int bnxt_set_channels(struct net_device *dev, if (bnxt_get_nr_rss_ctxs(bp, req_rx_rings) != bnxt_get_nr_rss_ctxs(bp, bp->rx_nr_rings) && - (dev->priv_flags & IFF_RXFH_CONFIGURED)) { + netif_is_rxfh_configured(dev)) { netdev_warn(dev, "RSS table size change required, RSS table entries must be default to proceed\n"); return -EINVAL; } From c60882a4566a0a62dc3a40c85131103aad83dcb3 Mon Sep 17 00:00:00 2001 From: Juhee Kang Date: Sun, 10 Oct 2021 13:03:28 +0900 Subject: [PATCH 071/440] hv_netvsc: use netif_is_bond_master() instead of open code Use netif_is_bond_master() function instead of open code, which is ((event_dev->priv_flags & IFF_BONDING) && (event_dev->flags & IFF_MASTER)). This patch doesn't change logic. Signed-off-by: Juhee Kang Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc_drv.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 479d2835a220..65850ea8a924 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -2742,8 +2742,7 @@ static int netvsc_netdev_event(struct notifier_block *this, return NOTIFY_DONE; /* Avoid Bonding master dev with same MAC registering as VF */ - if ((event_dev->priv_flags & IFF_BONDING) && - (event_dev->flags & IFF_MASTER)) + if (netif_is_bond_master(event_dev)) return NOTIFY_DONE; switch (event) { From 0199215216978b612d4e8be11e878b87bc643033 Mon Sep 17 00:00:00 2001 From: Juhee Kang Date: Sun, 10 Oct 2021 13:03:29 +0900 Subject: [PATCH 072/440] mlxsw: spectrum: use netif_is_macsec() instead of open code Open code which is dev->priv_flags & IFF_MACSEC has already defined as netif_is_macsec(). So use netif_is_macsec() instead of open code. This patch doesn't change logic. Signed-off-by: Juhee Kang Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 15f4a658e436..0723c1314ea2 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -5237,7 +5237,7 @@ static inline void netif_keep_dst(struct net_device *dev) static inline bool netif_reduces_vlan_mtu(struct net_device *dev) { /* TODO: reserve and use an additional IFF bit, if we get more users */ - return dev->priv_flags & IFF_MACSEC; + return netif_is_macsec(dev); } extern struct pernet_operations __net_initdata loopback_net_ops; From ba3c01ee02ed0d821c9f241f179bbc9457542b8f Mon Sep 17 00:00:00 2001 From: Ansuel Smith Date: Sun, 10 Oct 2021 00:46:15 +0200 Subject: [PATCH 073/440] net: phy: at803x: fix resume for QCA8327 phy From Documentation phy resume triggers phy reset and restart auto-negotiation. Add a dedicated function to wait reset to finish as it was notice a regression where port sometime are not reliable after a suspend/resume session. The reset wait logic is copied from phy_poll_reset. Add dedicated suspend function to use genphy_suspend only with QCA8337 phy and set only additional debug settings for QCA8327. With more test it was reported that QCA8327 doesn't proprely support this mode and using this cause the unreliability of the switch ports, especially the malfunction of the port0. Fixes: 15b9df4ece17 ("net: phy: at803x: add resume/suspend function to qca83xx phy") Signed-off-by: Ansuel Smith Signed-off-by: David S. Miller --- drivers/net/phy/at803x.c | 69 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 63 insertions(+), 6 deletions(-) diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c index ae7e1f1c59f0..0b506af51a50 100644 --- a/drivers/net/phy/at803x.c +++ b/drivers/net/phy/at803x.c @@ -92,9 +92,14 @@ #define AT803X_DEBUG_REG_5 0x05 #define AT803X_DEBUG_TX_CLK_DLY_EN BIT(8) +#define AT803X_DEBUG_REG_HIB_CTRL 0x0b +#define AT803X_DEBUG_HIB_CTRL_SEL_RST_80U BIT(10) +#define AT803X_DEBUG_HIB_CTRL_EN_ANY_CHANGE BIT(13) + #define AT803X_DEBUG_REG_3C 0x3C #define AT803X_DEBUG_REG_3D 0x3D +#define AT803X_DEBUG_GATE_CLK_IN1000 BIT(6) #define AT803X_DEBUG_REG_1F 0x1F #define AT803X_DEBUG_PLL_ON BIT(2) @@ -1315,6 +1320,58 @@ static int qca83xx_config_init(struct phy_device *phydev) return 0; } +static int qca83xx_resume(struct phy_device *phydev) +{ + int ret, val; + + /* Skip reset if not suspended */ + if (!phydev->suspended) + return 0; + + /* Reinit the port, reset values set by suspend */ + qca83xx_config_init(phydev); + + /* Reset the port on port resume */ + phy_set_bits(phydev, MII_BMCR, BMCR_RESET | BMCR_ANENABLE); + + /* On resume from suspend the switch execute a reset and + * restart auto-negotiation. Wait for reset to complete. + */ + ret = phy_read_poll_timeout(phydev, MII_BMCR, val, !(val & BMCR_RESET), + 50000, 600000, true); + if (ret) + return ret; + + msleep(1); + + return 0; +} + +static int qca83xx_suspend(struct phy_device *phydev) +{ + u16 mask = 0; + + /* Only QCA8337 support actual suspend. + * QCA8327 cause port unreliability when phy suspend + * is set. + */ + if (phydev->drv->phy_id == QCA8337_PHY_ID) { + genphy_suspend(phydev); + } else { + mask |= ~(BMCR_SPEED1000 | BMCR_FULLDPLX); + phy_modify(phydev, MII_BMCR, mask, 0); + } + + at803x_debug_reg_mask(phydev, AT803X_DEBUG_REG_3D, + AT803X_DEBUG_GATE_CLK_IN1000, 0); + + at803x_debug_reg_mask(phydev, AT803X_DEBUG_REG_HIB_CTRL, + AT803X_DEBUG_HIB_CTRL_EN_ANY_CHANGE | + AT803X_DEBUG_HIB_CTRL_SEL_RST_80U, 0); + + return 0; +} + static struct phy_driver at803x_driver[] = { { /* Qualcomm Atheros AR8035 */ @@ -1439,8 +1496,8 @@ static struct phy_driver at803x_driver[] = { .get_sset_count = at803x_get_sset_count, .get_strings = at803x_get_strings, .get_stats = at803x_get_stats, - .suspend = genphy_suspend, - .resume = genphy_resume, + .suspend = qca83xx_suspend, + .resume = qca83xx_resume, }, { /* QCA8327-A from switch QCA8327-AL1A */ .phy_id = QCA8327_A_PHY_ID, @@ -1454,8 +1511,8 @@ static struct phy_driver at803x_driver[] = { .get_sset_count = at803x_get_sset_count, .get_strings = at803x_get_strings, .get_stats = at803x_get_stats, - .suspend = genphy_suspend, - .resume = genphy_resume, + .suspend = qca83xx_suspend, + .resume = qca83xx_resume, }, { /* QCA8327-B from switch QCA8327-BL1A */ .phy_id = QCA8327_B_PHY_ID, @@ -1469,8 +1526,8 @@ static struct phy_driver at803x_driver[] = { .get_sset_count = at803x_get_sset_count, .get_strings = at803x_get_strings, .get_stats = at803x_get_stats, - .suspend = genphy_suspend, - .resume = genphy_resume, + .suspend = qca83xx_suspend, + .resume = qca83xx_resume, }, }; module_phy_driver(at803x_driver); From 1ca8311949aec5c9447645731ef1c6bc5bd71350 Mon Sep 17 00:00:00 2001 From: Ansuel Smith Date: Sun, 10 Oct 2021 00:46:16 +0200 Subject: [PATCH 074/440] net: phy: at803x: add DAC amplitude fix for 8327 phy QCA8327 internal phy require DAC amplitude adjustement set to +6% with 100m speed. Also add additional define to report a change of the same reg in QCA8337. (different scope it does set 1000m voltage) Add link_change_notify function to set the proper amplitude adjustement on PHY_RUNNING state and disable on any other state. Fixes: b4df02b562f4 ("net: phy: at803x: add support for qca 8327 A variant internal phy") Signed-off-by: Ansuel Smith Signed-off-by: David S. Miller --- drivers/net/phy/at803x.c | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c index 0b506af51a50..df76e7f1141c 100644 --- a/drivers/net/phy/at803x.c +++ b/drivers/net/phy/at803x.c @@ -87,6 +87,8 @@ #define AT803X_PSSR_MR_AN_COMPLETE 0x0200 #define AT803X_DEBUG_REG_0 0x00 +#define QCA8327_DEBUG_MANU_CTRL_EN BIT(2) +#define QCA8337_DEBUG_MANU_CTRL_EN GENMASK(3, 2) #define AT803X_DEBUG_RX_CLK_DLY_EN BIT(15) #define AT803X_DEBUG_REG_5 0x05 @@ -1317,9 +1319,37 @@ static int qca83xx_config_init(struct phy_device *phydev) break; } + /* QCA8327 require DAC amplitude adjustment for 100m set to +6%. + * Disable on init and enable only with 100m speed following + * qca original source code. + */ + if (phydev->drv->phy_id == QCA8327_A_PHY_ID || + phydev->drv->phy_id == QCA8327_B_PHY_ID) + at803x_debug_reg_mask(phydev, AT803X_DEBUG_REG_0, + QCA8327_DEBUG_MANU_CTRL_EN, 0); + return 0; } +static void qca83xx_link_change_notify(struct phy_device *phydev) +{ + /* QCA8337 doesn't require DAC Amplitude adjustement */ + if (phydev->drv->phy_id == QCA8337_PHY_ID) + return; + + /* Set DAC Amplitude adjustment to +6% for 100m on link running */ + if (phydev->state == PHY_RUNNING) { + if (phydev->speed == SPEED_100) + at803x_debug_reg_mask(phydev, AT803X_DEBUG_REG_0, + QCA8327_DEBUG_MANU_CTRL_EN, + QCA8327_DEBUG_MANU_CTRL_EN); + } else { + /* Reset DAC Amplitude adjustment */ + at803x_debug_reg_mask(phydev, AT803X_DEBUG_REG_0, + QCA8327_DEBUG_MANU_CTRL_EN, 0); + } +} + static int qca83xx_resume(struct phy_device *phydev) { int ret, val; @@ -1489,6 +1519,7 @@ static struct phy_driver at803x_driver[] = { .phy_id_mask = QCA8K_PHY_ID_MASK, .name = "Qualcomm Atheros 8337 internal PHY", /* PHY_GBIT_FEATURES */ + .link_change_notify = qca83xx_link_change_notify, .probe = at803x_probe, .flags = PHY_IS_INTERNAL, .config_init = qca83xx_config_init, @@ -1504,6 +1535,7 @@ static struct phy_driver at803x_driver[] = { .phy_id_mask = QCA8K_PHY_ID_MASK, .name = "Qualcomm Atheros 8327-A internal PHY", /* PHY_GBIT_FEATURES */ + .link_change_notify = qca83xx_link_change_notify, .probe = at803x_probe, .flags = PHY_IS_INTERNAL, .config_init = qca83xx_config_init, @@ -1519,6 +1551,7 @@ static struct phy_driver at803x_driver[] = { .phy_id_mask = QCA8K_PHY_ID_MASK, .name = "Qualcomm Atheros 8327-B internal PHY", /* PHY_GBIT_FEATURES */ + .link_change_notify = qca83xx_link_change_notify, .probe = at803x_probe, .flags = PHY_IS_INTERNAL, .config_init = qca83xx_config_init, From 9d1c29b4028557a496be9c5eb2b4b86063700636 Mon Sep 17 00:00:00 2001 From: Ansuel Smith Date: Sun, 10 Oct 2021 00:46:17 +0200 Subject: [PATCH 075/440] net: phy: at803x: enable prefer master for 83xx internal phy From original QCA source code the port was set to prefer master as port type in 1000BASE-T mode. Apply the same settings also here. Signed-off-by: Ansuel Smith Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/at803x.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c index df76e7f1141c..76a1d4629b6c 100644 --- a/drivers/net/phy/at803x.c +++ b/drivers/net/phy/at803x.c @@ -1328,6 +1328,9 @@ static int qca83xx_config_init(struct phy_device *phydev) at803x_debug_reg_mask(phydev, AT803X_DEBUG_REG_0, QCA8327_DEBUG_MANU_CTRL_EN, 0); + /* Following original QCA sourcecode set port to prefer master */ + phy_set_bits(phydev, MII_CTRL1000, CTL1000_PREFER_MASTER); + return 0; } From 67999555ff42e91de7654488d9a7735bd9e84555 Mon Sep 17 00:00:00 2001 From: Ansuel Smith Date: Sun, 10 Oct 2021 00:46:18 +0200 Subject: [PATCH 076/440] net: phy: at803x: better describe debug regs Give a name to known debug regs from Documentation instead of using unknown hex values. Signed-off-by: Ansuel Smith Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/at803x.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c index 76a1d4629b6c..69da011e82c8 100644 --- a/drivers/net/phy/at803x.c +++ b/drivers/net/phy/at803x.c @@ -86,12 +86,12 @@ #define AT803X_PSSR 0x11 /*PHY-Specific Status Register*/ #define AT803X_PSSR_MR_AN_COMPLETE 0x0200 -#define AT803X_DEBUG_REG_0 0x00 +#define AT803X_DEBUG_ANALOG_TEST_CTRL 0x00 #define QCA8327_DEBUG_MANU_CTRL_EN BIT(2) #define QCA8337_DEBUG_MANU_CTRL_EN GENMASK(3, 2) #define AT803X_DEBUG_RX_CLK_DLY_EN BIT(15) -#define AT803X_DEBUG_REG_5 0x05 +#define AT803X_DEBUG_SYSTEM_CTRL_MODE 0x05 #define AT803X_DEBUG_TX_CLK_DLY_EN BIT(8) #define AT803X_DEBUG_REG_HIB_CTRL 0x0b @@ -100,7 +100,7 @@ #define AT803X_DEBUG_REG_3C 0x3C -#define AT803X_DEBUG_REG_3D 0x3D +#define AT803X_DEBUG_REG_GREEN 0x3D #define AT803X_DEBUG_GATE_CLK_IN1000 BIT(6) #define AT803X_DEBUG_REG_1F 0x1F @@ -285,25 +285,25 @@ static int at803x_read_page(struct phy_device *phydev) static int at803x_enable_rx_delay(struct phy_device *phydev) { - return at803x_debug_reg_mask(phydev, AT803X_DEBUG_REG_0, 0, + return at803x_debug_reg_mask(phydev, AT803X_DEBUG_ANALOG_TEST_CTRL, 0, AT803X_DEBUG_RX_CLK_DLY_EN); } static int at803x_enable_tx_delay(struct phy_device *phydev) { - return at803x_debug_reg_mask(phydev, AT803X_DEBUG_REG_5, 0, + return at803x_debug_reg_mask(phydev, AT803X_DEBUG_SYSTEM_CTRL_MODE, 0, AT803X_DEBUG_TX_CLK_DLY_EN); } static int at803x_disable_rx_delay(struct phy_device *phydev) { - return at803x_debug_reg_mask(phydev, AT803X_DEBUG_REG_0, + return at803x_debug_reg_mask(phydev, AT803X_DEBUG_ANALOG_TEST_CTRL, AT803X_DEBUG_RX_CLK_DLY_EN, 0); } static int at803x_disable_tx_delay(struct phy_device *phydev) { - return at803x_debug_reg_mask(phydev, AT803X_DEBUG_REG_5, + return at803x_debug_reg_mask(phydev, AT803X_DEBUG_SYSTEM_CTRL_MODE, AT803X_DEBUG_TX_CLK_DLY_EN, 0); } @@ -1303,9 +1303,9 @@ static int qca83xx_config_init(struct phy_device *phydev) switch (switch_revision) { case 1: /* For 100M waveform */ - at803x_debug_reg_write(phydev, AT803X_DEBUG_REG_0, 0x02ea); + at803x_debug_reg_write(phydev, AT803X_DEBUG_ANALOG_TEST_CTRL, 0x02ea); /* Turn on Gigabit clock */ - at803x_debug_reg_write(phydev, AT803X_DEBUG_REG_3D, 0x68a0); + at803x_debug_reg_write(phydev, AT803X_DEBUG_REG_GREEN, 0x68a0); break; case 2: @@ -1313,8 +1313,8 @@ static int qca83xx_config_init(struct phy_device *phydev) fallthrough; case 4: phy_write_mmd(phydev, MDIO_MMD_PCS, MDIO_AZ_DEBUG, 0x803f); - at803x_debug_reg_write(phydev, AT803X_DEBUG_REG_3D, 0x6860); - at803x_debug_reg_write(phydev, AT803X_DEBUG_REG_5, 0x2c46); + at803x_debug_reg_write(phydev, AT803X_DEBUG_REG_GREEN, 0x6860); + at803x_debug_reg_write(phydev, AT803X_DEBUG_SYSTEM_CTRL_MODE, 0x2c46); at803x_debug_reg_write(phydev, AT803X_DEBUG_REG_3C, 0x6000); break; } @@ -1325,7 +1325,7 @@ static int qca83xx_config_init(struct phy_device *phydev) */ if (phydev->drv->phy_id == QCA8327_A_PHY_ID || phydev->drv->phy_id == QCA8327_B_PHY_ID) - at803x_debug_reg_mask(phydev, AT803X_DEBUG_REG_0, + at803x_debug_reg_mask(phydev, AT803X_DEBUG_ANALOG_TEST_CTRL, QCA8327_DEBUG_MANU_CTRL_EN, 0); /* Following original QCA sourcecode set port to prefer master */ @@ -1343,12 +1343,12 @@ static void qca83xx_link_change_notify(struct phy_device *phydev) /* Set DAC Amplitude adjustment to +6% for 100m on link running */ if (phydev->state == PHY_RUNNING) { if (phydev->speed == SPEED_100) - at803x_debug_reg_mask(phydev, AT803X_DEBUG_REG_0, + at803x_debug_reg_mask(phydev, AT803X_DEBUG_ANALOG_TEST_CTRL, QCA8327_DEBUG_MANU_CTRL_EN, QCA8327_DEBUG_MANU_CTRL_EN); } else { /* Reset DAC Amplitude adjustment */ - at803x_debug_reg_mask(phydev, AT803X_DEBUG_REG_0, + at803x_debug_reg_mask(phydev, AT803X_DEBUG_ANALOG_TEST_CTRL, QCA8327_DEBUG_MANU_CTRL_EN, 0); } } @@ -1395,7 +1395,7 @@ static int qca83xx_suspend(struct phy_device *phydev) phy_modify(phydev, MII_BMCR, mask, 0); } - at803x_debug_reg_mask(phydev, AT803X_DEBUG_REG_3D, + at803x_debug_reg_mask(phydev, AT803X_DEBUG_REG_GREEN, AT803X_DEBUG_GATE_CLK_IN1000, 0); at803x_debug_reg_mask(phydev, AT803X_DEBUG_REG_HIB_CTRL, From c0288ae8e6bd402785360de5bbc94ded770af000 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Fri, 8 Oct 2021 16:21:03 +0200 Subject: [PATCH 077/440] net: make dev_get_port_parent_id slightly more readable Cosmetic commit making dev_get_port_parent_id slightly more readable. There is no need to split the condition to return after calling devlink_compat_switch_id_get and after that 'recurse' is always true. Signed-off-by: Antoine Tenart Signed-off-by: David S. Miller --- net/core/dev.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 1594cd2955ba..eb61a8821b3a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -9165,14 +9165,11 @@ int dev_get_port_parent_id(struct net_device *dev, } err = devlink_compat_switch_id_get(dev, ppid); - if (!err || err != -EOPNOTSUPP) + if (!recurse || err != -EOPNOTSUPP) return err; - if (!recurse) - return -EOPNOTSUPP; - netdev_for_each_lower_dev(dev, lower_dev, iter) { - err = dev_get_port_parent_id(lower_dev, ppid, recurse); + err = dev_get_port_parent_id(lower_dev, ppid, true); if (err) break; if (!first.id_len) From 6213f07cb542c9651ba614e784bf58ed41354936 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Sat, 9 Oct 2021 17:32:43 +0800 Subject: [PATCH 078/440] virtio_net: skip RCU read lock by checking xdp_enabled of vi networking benchmark shows that __rcu_read_lock and __rcu_read_unlock takes some cpu cycles, and we can avoid calling them partially in virtio rx path by check xdp_enabled of vi, and xdp is disabled most of time Signed-off-by: Li RongQing Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 2ed49884565f..74d2be438180 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -734,6 +734,12 @@ static struct sk_buff *receive_small(struct net_device *dev, dev->stats.rx_length_errors++; goto err_len; } + + if (likely(!vi->xdp_enabled)) { + xdp_prog = NULL; + goto skip_xdp; + } + rcu_read_lock(); xdp_prog = rcu_dereference(rq->xdp_prog); if (xdp_prog) { @@ -816,6 +822,7 @@ static struct sk_buff *receive_small(struct net_device *dev, } rcu_read_unlock(); +skip_xdp: skb = build_skb(buf, buflen); if (!skb) { put_page(page); @@ -897,6 +904,12 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, dev->stats.rx_length_errors++; goto err_skb; } + + if (likely(!vi->xdp_enabled)) { + xdp_prog = NULL; + goto skip_xdp; + } + rcu_read_lock(); xdp_prog = rcu_dereference(rq->xdp_prog); if (xdp_prog) { @@ -1024,6 +1037,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, } rcu_read_unlock(); +skip_xdp: head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog, metasize, headroom); curr_skb = head_skb; From b9c56ccb436d1e952bb1fc0ddf752669a116b310 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 10 Oct 2021 09:01:32 +0200 Subject: [PATCH 079/440] ethernet: Remove redundant 'flush_workqueue()' calls 'destroy_workqueue()' already drains the queue before destroying it, so there is no need to flush it explicitly. Remove the redundant 'flush_workqueue()' calls. This was generated with coccinelle: @@ expression E; @@ - flush_workqueue(E); destroy_workqueue(E); Signed-off-by: Christophe JAILLET Reviewed-by: Leon Romanovsky #mlx* Signed-off-by: David S. Miller --- drivers/net/ethernet/amd/xgbe/xgbe-drv.c | 2 -- drivers/net/ethernet/brocade/bna/bnad.c | 1 - drivers/net/ethernet/cavium/liquidio/lio_core.c | 1 - drivers/net/ethernet/emulex/benet/be_main.c | 1 - drivers/net/ethernet/marvell/octeontx2/af/cgx.c | 1 - drivers/net/ethernet/marvell/octeontx2/af/rvu.c | 2 -- drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c | 1 - drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c | 1 - drivers/net/ethernet/mellanox/mlx4/cmd.c | 2 -- drivers/net/ethernet/mellanox/mlx4/en_main.c | 1 - drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c | 1 - drivers/net/ethernet/qlogic/qed/qed_sriov.c | 1 - 12 files changed, 15 deletions(-) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index b5bf5af8db5f..30d24d19f40d 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -1912,10 +1912,8 @@ static int xgbe_close(struct net_device *netdev) clk_disable_unprepare(pdata->ptpclk); clk_disable_unprepare(pdata->sysclk); - flush_workqueue(pdata->an_workqueue); destroy_workqueue(pdata->an_workqueue); - flush_workqueue(pdata->dev_workqueue); destroy_workqueue(pdata->dev_workqueue); set_bit(XGBE_DOWN, &pdata->dev_state); diff --git a/drivers/net/ethernet/brocade/bna/bnad.c b/drivers/net/ethernet/brocade/bna/bnad.c index b1947fd9a07c..bbdc829c3524 100644 --- a/drivers/net/ethernet/brocade/bna/bnad.c +++ b/drivers/net/ethernet/brocade/bna/bnad.c @@ -3515,7 +3515,6 @@ static void bnad_uninit(struct bnad *bnad) { if (bnad->work_q) { - flush_workqueue(bnad->work_q); destroy_workqueue(bnad->work_q); bnad->work_q = NULL; } diff --git a/drivers/net/ethernet/cavium/liquidio/lio_core.c b/drivers/net/ethernet/cavium/liquidio/lio_core.c index ec7928b54e4a..73cb03266549 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_core.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_core.c @@ -490,7 +490,6 @@ void cleanup_rx_oom_poll_fn(struct net_device *netdev) wq = &lio->rxq_status_wq[q_no]; if (wq->wq) { cancel_delayed_work_sync(&wq->wk.work); - flush_workqueue(wq->wq); destroy_workqueue(wq->wq); wq->wq = NULL; } diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index ef60e2da05a4..736a6ea86eb1 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -4621,7 +4621,6 @@ static void be_destroy_err_recovery_workq(void) if (!be_err_recovery_workq) return; - flush_workqueue(be_err_recovery_workq); destroy_workqueue(be_err_recovery_workq); be_err_recovery_workq = NULL; } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c index d379a35c4618..186d00a9ab35 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c @@ -1519,7 +1519,6 @@ static int cgx_lmac_exit(struct cgx *cgx) int i; if (cgx->cgx_cmd_workq) { - flush_workqueue(cgx->cgx_cmd_workq); destroy_workqueue(cgx->cgx_cmd_workq); cgx->cgx_cmd_workq = NULL; } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c index 4cb24e91e648..0a1e9f6e216a 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c @@ -2399,7 +2399,6 @@ static void rvu_mbox_destroy(struct mbox_wq_info *mw) int devid; if (mw->mbox_wq) { - flush_workqueue(mw->mbox_wq); destroy_workqueue(mw->mbox_wq); mw->mbox_wq = NULL; } @@ -2944,7 +2943,6 @@ fail: static void rvu_flr_wq_destroy(struct rvu *rvu) { if (rvu->flr_wq) { - flush_workqueue(rvu->flr_wq); destroy_workqueue(rvu->flr_wq); rvu->flr_wq = NULL; } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c index c3842e454657..2ca182a4ce82 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c @@ -324,7 +324,6 @@ static int cgx_lmac_event_handler_init(struct rvu *rvu) static void rvu_cgx_wq_destroy(struct rvu *rvu) { if (rvu->cgx_evh_wq) { - flush_workqueue(rvu->cgx_evh_wq); destroy_workqueue(rvu->cgx_evh_wq); rvu->cgx_evh_wq = NULL; } diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c index 980219a7e6fe..e6cb8cd0787d 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c @@ -279,7 +279,6 @@ static void otx2vf_vfaf_mbox_destroy(struct otx2_nic *vf) struct mbox *mbox = &vf->mbox; if (vf->mbox_wq) { - flush_workqueue(vf->mbox_wq); destroy_workqueue(vf->mbox_wq); vf->mbox_wq = NULL; } diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index 94ead263081f..e10b7b04b894 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -2480,7 +2480,6 @@ int mlx4_multi_func_init(struct mlx4_dev *dev) return 0; err_thread: - flush_workqueue(priv->mfunc.master.comm_wq); destroy_workqueue(priv->mfunc.master.comm_wq); err_slaves: while (i--) { @@ -2587,7 +2586,6 @@ void mlx4_multi_func_cleanup(struct mlx4_dev *dev) int i, port; if (mlx4_is_master(dev)) { - flush_workqueue(priv->mfunc.master.comm_wq); destroy_workqueue(priv->mfunc.master.comm_wq); for (i = 0; i < dev->num_slaves; i++) { for (port = 1; port <= MLX4_MAX_PORTS; port++) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_main.c b/drivers/net/ethernet/mellanox/mlx4/en_main.c index 109472d6b61f..f1259bdb1a29 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_main.c @@ -237,7 +237,6 @@ static void mlx4_en_remove(struct mlx4_dev *dev, void *endev_ptr) if (mdev->pndev[i]) mlx4_en_destroy_netdev(mdev->pndev[i]); - flush_workqueue(mdev->workqueue); destroy_workqueue(mdev->workqueue); (void) mlx4_mr_free(dev, &mdev->mr); iounmap(mdev->uar_map); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c index f9cf9fb31547..da1bec04efff 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c @@ -1069,7 +1069,6 @@ void mlx5_fw_tracer_destroy(struct mlx5_fw_tracer *tracer) mlx5_fw_tracer_clean_saved_traces_array(tracer); mlx5_fw_tracer_free_strings_db(tracer); mlx5_fw_tracer_destroy_log_buf(tracer); - flush_workqueue(tracer->work_queue); destroy_workqueue(tracer->work_queue); kvfree(tracer); } diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c index f88f58ec2a20..8ac38828ba45 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c +++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c @@ -5224,7 +5224,6 @@ void qed_iov_wq_stop(struct qed_dev *cdev, bool schedule_first) cancel_delayed_work_sync(&cdev->hwfns[i].iov_task); } - flush_workqueue(cdev->hwfns[i].iov_wq); destroy_workqueue(cdev->hwfns[i].iov_wq); } } From 0182d0788cd66292cb1698b48dd21887d93c68ed Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Date: Sun, 10 Oct 2021 15:39:35 +0530 Subject: [PATCH 080/440] octeontx2-pf: Simplify the receive buffer size calculation This patch separates the logic of configuring hardware maximum transmit frame size and receive frame size. This simplifies the logic to calculate receive buffer size and using cqe descriptor of different size. Also additional size of skb_shared_info structure is allocated for each receive buffer pointer given to hardware which is not necessary. Hence change the size calculation to remove the size of skb_shared_info. Add a check for array out of bounds while adding fragments to the network stack. Signed-off-by: Subbaraya Sundeep Signed-off-by: Hariprasad Kelam Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- .../ethernet/marvell/octeontx2/nic/cn10k.c | 2 +- .../marvell/octeontx2/nic/otx2_common.c | 10 +++---- .../marvell/octeontx2/nic/otx2_common.h | 2 +- .../ethernet/marvell/octeontx2/nic/otx2_pf.c | 15 +++++----- .../marvell/octeontx2/nic/otx2_txrx.c | 30 +++++++++++++------ .../marvell/octeontx2/nic/otx2_txrx.h | 4 +-- 6 files changed, 35 insertions(+), 28 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c index 95f21dfdba48..fd4f083c699e 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c @@ -88,7 +88,7 @@ int cn10k_sq_aq_init(void *dev, u16 qidx, u16 sqb_aura) aq->sq.ena = 1; /* Only one SMQ is allocated, map all SQ's to that SMQ */ aq->sq.smq = pfvf->hw.txschq_list[NIX_TXSCH_LVL_SMQ][0]; - aq->sq.smq_rr_weight = mtu_to_dwrr_weight(pfvf, pfvf->max_frs); + aq->sq.smq_rr_weight = mtu_to_dwrr_weight(pfvf, pfvf->tx_max_pktlen); aq->sq.default_chan = pfvf->hw.tx_chan_base; aq->sq.sqe_stype = NIX_STYPE_STF; /* Cache SQB */ aq->sq.sqb_aura = sqb_aura; diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c index 0c89eb8a670b..66da31f30d3e 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c @@ -231,7 +231,7 @@ int otx2_hw_set_mtu(struct otx2_nic *pfvf, int mtu) return -ENOMEM; } - req->maxlen = pfvf->max_frs; + req->maxlen = pfvf->netdev->mtu + OTX2_ETH_HLEN + OTX2_HW_TIMESTAMP_LEN; err = otx2_sync_mbox_msg(&pfvf->mbox); mutex_unlock(&pfvf->mbox.lock); @@ -590,7 +590,7 @@ int otx2_txschq_config(struct otx2_nic *pfvf, int lvl) u64 schq, parent; u64 dwrr_val; - dwrr_val = mtu_to_dwrr_weight(pfvf, pfvf->max_frs); + dwrr_val = mtu_to_dwrr_weight(pfvf, pfvf->tx_max_pktlen); req = otx2_mbox_alloc_msg_nix_txschq_cfg(&pfvf->mbox); if (!req) @@ -603,9 +603,7 @@ int otx2_txschq_config(struct otx2_nic *pfvf, int lvl) /* Set topology e.t.c configuration */ if (lvl == NIX_TXSCH_LVL_SMQ) { req->reg[0] = NIX_AF_SMQX_CFG(schq); - req->regval[0] = ((pfvf->netdev->max_mtu + OTX2_ETH_HLEN) << 8) - | OTX2_MIN_MTU; - + req->regval[0] = ((u64)pfvf->tx_max_pktlen << 8) | OTX2_MIN_MTU; req->regval[0] |= (0x20ULL << 51) | (0x80ULL << 39) | (0x2ULL << 36); req->num_regs++; @@ -800,7 +798,7 @@ int otx2_sq_aq_init(void *dev, u16 qidx, u16 sqb_aura) aq->sq.ena = 1; /* Only one SMQ is allocated, map all SQ's to that SMQ */ aq->sq.smq = pfvf->hw.txschq_list[NIX_TXSCH_LVL_SMQ][0]; - aq->sq.smq_rr_quantum = mtu_to_dwrr_weight(pfvf, pfvf->max_frs); + aq->sq.smq_rr_quantum = mtu_to_dwrr_weight(pfvf, pfvf->tx_max_pktlen); aq->sq.default_chan = pfvf->hw.tx_chan_base; aq->sq.sqe_stype = NIX_STYPE_STF; /* Cache SQB */ aq->sq.sqb_aura = sqb_aura; diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h index a05f6bdac01e..61e52812983f 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h @@ -326,7 +326,7 @@ struct otx2_nic { struct net_device *netdev; struct dev_hw_ops *hw_ops; void *iommu_domain; - u16 max_frs; + u16 tx_max_pktlen; u16 rbsize; /* Receive buffer size */ #define OTX2_FLAG_RX_TSTAMP_ENABLED BIT_ULL(0) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index f24e920324eb..1e0d0c9c1dac 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -1312,16 +1312,14 @@ static int otx2_get_rbuf_size(struct otx2_nic *pf, int mtu) * NIX transfers entire data using 6 segments/buffers and writes * a CQE_RX descriptor with those segment addresses. First segment * has additional data prepended to packet. Also software omits a - * headroom of 128 bytes and sizeof(struct skb_shared_info) in - * each segment. Hence the total size of memory needed - * to receive a packet with 'mtu' is: + * headroom of 128 bytes in each segment. Hence the total size of + * memory needed to receive a packet with 'mtu' is: * frame size = mtu + additional data; - * memory = frame_size + (headroom + struct skb_shared_info size) * 6; + * memory = frame_size + headroom * 6; * each receive buffer size = memory / 6; */ frame_size = mtu + OTX2_ETH_HLEN + OTX2_HW_TIMESTAMP_LEN; - total_size = frame_size + (OTX2_HEAD_ROOM + - OTX2_DATA_ALIGN(sizeof(struct skb_shared_info))) * 6; + total_size = frame_size + OTX2_HEAD_ROOM * 6; rbuf_size = total_size / 6; return ALIGN(rbuf_size, 2048); @@ -1343,7 +1341,8 @@ static int otx2_init_hw_resources(struct otx2_nic *pf) hw->sqpool_cnt = hw->tot_tx_queues; hw->pool_cnt = hw->rqpool_cnt + hw->sqpool_cnt; - pf->max_frs = pf->netdev->mtu + OTX2_ETH_HLEN + OTX2_HW_TIMESTAMP_LEN; + /* Maximum hardware supported transmit length */ + pf->tx_max_pktlen = pf->netdev->max_mtu + OTX2_ETH_HLEN; pf->rbsize = otx2_get_rbuf_size(pf, pf->netdev->mtu); @@ -1807,7 +1806,7 @@ static netdev_tx_t otx2_xmit(struct sk_buff *skb, struct net_device *netdev) /* Check for minimum and maximum packet length */ if (skb->len <= ETH_HLEN || - (!skb_shinfo(skb)->gso_size && skb->len > pf->max_frs)) { + (!skb_shinfo(skb)->gso_size && skb->len > pf->tx_max_pktlen)) { dev_kfree_skb(skb); return NETDEV_TX_OK; } diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c index 2d2b67319340..0cc6353254bf 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c @@ -181,8 +181,9 @@ static void otx2_set_rxtstamp(struct otx2_nic *pfvf, skb_hwtstamps(skb)->hwtstamp = ns_to_ktime(tsns); } -static void otx2_skb_add_frag(struct otx2_nic *pfvf, struct sk_buff *skb, - u64 iova, int len, struct nix_rx_parse_s *parse) +static bool otx2_skb_add_frag(struct otx2_nic *pfvf, struct sk_buff *skb, + u64 iova, int len, struct nix_rx_parse_s *parse, + int qidx) { struct page *page; int off = 0; @@ -203,11 +204,22 @@ static void otx2_skb_add_frag(struct otx2_nic *pfvf, struct sk_buff *skb, } page = virt_to_page(va); - skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, - va - page_address(page) + off, len - off, pfvf->rbsize); + if (likely(skb_shinfo(skb)->nr_frags < MAX_SKB_FRAGS)) { + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, + va - page_address(page) + off, + len - off, pfvf->rbsize); - otx2_dma_unmap_page(pfvf, iova - OTX2_HEAD_ROOM, - pfvf->rbsize, DMA_FROM_DEVICE); + otx2_dma_unmap_page(pfvf, iova - OTX2_HEAD_ROOM, + pfvf->rbsize, DMA_FROM_DEVICE); + return true; + } + + /* If more than MAX_SKB_FRAGS fragments are received then + * give back those buffer pointers to hardware for reuse. + */ + pfvf->hw_ops->aura_freeptr(pfvf, qidx, iova & ~0x07ULL); + + return false; } static void otx2_set_rxhash(struct otx2_nic *pfvf, @@ -349,9 +361,9 @@ static void otx2_rcv_pkt_handler(struct otx2_nic *pfvf, seg_addr = &sg->seg_addr; seg_size = (void *)sg; for (seg = 0; seg < sg->segs; seg++, seg_addr++) { - otx2_skb_add_frag(pfvf, skb, *seg_addr, seg_size[seg], - parse); - cq->pool_ptrs++; + if (otx2_skb_add_frag(pfvf, skb, *seg_addr, + seg_size[seg], parse, cq->cq_idx)) + cq->pool_ptrs++; } start += sizeof(*sg); } diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h index 5c05774a8d05..f1a04cf9210c 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h @@ -39,9 +39,7 @@ #define RCV_FRAG_LEN(x) \ ((RCV_FRAG_LEN1(x) < 2048) ? 2048 : RCV_FRAG_LEN1(x)) -#define DMA_BUFFER_LEN(x) \ - ((x) - OTX2_HEAD_ROOM - \ - OTX2_DATA_ALIGN(sizeof(struct skb_shared_info))) +#define DMA_BUFFER_LEN(x) ((x) - OTX2_HEAD_ROOM) /* IRQ triggered when NIX_LF_CINTX_CNT[ECOUNT] * is equal to this value. From ce8bd03c47fc8328e82a48d332fba69fd538e9bf Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Mon, 11 Oct 2021 17:34:24 +1100 Subject: [PATCH 081/440] ethernet: sun: add missing semicolon, fix build Fix for this build problem: drivers/net/ethernet/sun/ldmvsw.c: In function 'vsw_alloc_netdev': drivers/net/ethernet/sun/ldmvsw.c:243:2: error: expected ';' before 'sprintf' sprintf(dev->name, "vif%d.%d", (int)handle, (int)port_id); ^~~~~~~ Fixes: a7639279c93c ("ethernet: sun: remove direct netdev->dev_addr writes") Signed-off-by: Stephen Rothwell Link: https://lore.kernel.org/r/20211011173424.7743035d@canb.auug.org.au Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/sun/ldmvsw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/sun/ldmvsw.c b/drivers/net/ethernet/sun/ldmvsw.c index 074c5407c86b..6b59b14e74b1 100644 --- a/drivers/net/ethernet/sun/ldmvsw.c +++ b/drivers/net/ethernet/sun/ldmvsw.c @@ -238,7 +238,7 @@ static struct net_device *vsw_alloc_netdev(u8 hwaddr[], dev->needed_tailroom = 8; eth_hw_addr_set(dev, hwaddr); - ether_addr_copy(dev->perm_addr, dev->dev_addr) + ether_addr_copy(dev->perm_addr, dev->dev_addr); sprintf(dev->name, "vif%d.%d", (int)handle, (int)port_id); From 7715ec32472c792ca4bd170345e30fb48e191799 Mon Sep 17 00:00:00 2001 From: Grishma Kotecha Date: Fri, 6 Aug 2021 10:48:58 +0200 Subject: [PATCH 082/440] ice: implement low level recipes functions Add code to manage recipes and profiles on admin queue layer. Allow the driver to add a new recipe and update an existing one. Get a recipe and get a recipe to profile association is mostly used in update existing recipes code. Only default recipes can be updated. An update is done by reading recipes from HW, changing their params and calling add recipe command. Support following admin queue commands: - ice_aqc_opc_add_recipe (0x0290) - create a recipe with protocol header information and other details that determine how this recipe filter works - ice_aqc_opc_recipe_to_profile (0x0291) - associate a switch recipe to a profile - ice_aqc_opc_get_recipe (0x0292) - get details of an existing recipe - ice_aqc_opc_get_recipe_to_profile (0x0293) - get a recipe associated with profile ID Define ICE_AQC_RES_TYPE_RECIPE resource type to hold a switch recipe. It is needed when a new switch recipe needs to be created. Co-developed-by: Dan Nowlin Signed-off-by: Dan Nowlin Signed-off-by: Grishma Kotecha Signed-off-by: Wojciech Drewek Tested-by: Sandeep Penigalapati Signed-off-by: Tony Nguyen --- .../net/ethernet/intel/ice/ice_adminq_cmd.h | 57 +++++++ drivers/net/ethernet/intel/ice/ice_switch.c | 155 ++++++++++++++++++ 2 files changed, 212 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h index d407cf8c829d..2b4437dc112f 100644 --- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h @@ -233,6 +233,7 @@ struct ice_aqc_get_sw_cfg_resp_elem { */ #define ICE_AQC_RES_TYPE_VSI_LIST_REP 0x03 #define ICE_AQC_RES_TYPE_VSI_LIST_PRUNE 0x04 +#define ICE_AQC_RES_TYPE_RECIPE 0x05 #define ICE_AQC_RES_TYPE_FDIR_COUNTER_BLOCK 0x21 #define ICE_AQC_RES_TYPE_FDIR_GUARANTEED_ENTRIES 0x22 #define ICE_AQC_RES_TYPE_FDIR_SHARED_ENTRIES 0x23 @@ -241,6 +242,7 @@ struct ice_aqc_get_sw_cfg_resp_elem { #define ICE_AQC_RES_TYPE_HASH_PROF_BLDR_PROFID 0x60 #define ICE_AQC_RES_TYPE_HASH_PROF_BLDR_TCAM 0x61 +#define ICE_AQC_RES_TYPE_FLAG_SHARED BIT(7) #define ICE_AQC_RES_TYPE_FLAG_SCAN_BOTTOM BIT(12) #define ICE_AQC_RES_TYPE_FLAG_IGNORE_INDEX BIT(13) @@ -474,6 +476,53 @@ struct ice_aqc_vsi_props { #define ICE_MAX_NUM_RECIPES 64 +/* Add/Get Recipe (indirect 0x0290/0x0292) */ +struct ice_aqc_add_get_recipe { + __le16 num_sub_recipes; /* Input in Add cmd, Output in Get cmd */ + __le16 return_index; /* Input, used for Get cmd only */ + u8 reserved[4]; + __le32 addr_high; + __le32 addr_low; +}; + +struct ice_aqc_recipe_content { + u8 rid; +#define ICE_AQ_RECIPE_ID_IS_ROOT BIT(7) +#define ICE_AQ_SW_ID_LKUP_IDX 0 + u8 lkup_indx[5]; +#define ICE_AQ_RECIPE_LKUP_IGNORE BIT(7) +#define ICE_AQ_SW_ID_LKUP_MASK 0x00FF + __le16 mask[5]; + u8 result_indx; +#define ICE_AQ_RECIPE_RESULT_DATA_S 0 +#define ICE_AQ_RECIPE_RESULT_DATA_M (0x3F << ICE_AQ_RECIPE_RESULT_DATA_S) +#define ICE_AQ_RECIPE_RESULT_EN BIT(7) + u8 rsvd0[3]; + u8 act_ctrl_join_priority; + u8 act_ctrl_fwd_priority; + u8 act_ctrl; +#define ICE_AQ_RECIPE_ACT_INV_ACT BIT(2) + u8 rsvd1; + __le32 dflt_act; +}; + +struct ice_aqc_recipe_data_elem { + u8 recipe_indx; + u8 resp_bits; + u8 rsvd0[2]; + u8 recipe_bitmap[8]; + u8 rsvd1[4]; + struct ice_aqc_recipe_content content; + u8 rsvd2[20]; +}; + +/* Set/Get Recipes to Profile Association (direct 0x0291/0x0293) */ +struct ice_aqc_recipe_to_profile { + __le16 profile_id; + u8 rsvd[6]; + DECLARE_BITMAP(recipe_assoc, ICE_MAX_NUM_RECIPES); +}; + /* Add/Update/Remove/Get switch rules (indirect 0x02A0, 0x02A1, 0x02A2, 0x02A3) */ struct ice_aqc_sw_rules { @@ -1936,6 +1985,8 @@ struct ice_aq_desc { struct ice_aqc_set_port_id_led set_port_id_led; struct ice_aqc_get_sw_cfg get_sw_conf; struct ice_aqc_sw_rules sw_rules; + struct ice_aqc_add_get_recipe add_get_recipe; + struct ice_aqc_recipe_to_profile recipe_to_profile; struct ice_aqc_get_topo get_topo; struct ice_aqc_sched_elem_cmd sched_elem_cmd; struct ice_aqc_query_txsched_res query_sched_res; @@ -2044,6 +2095,12 @@ enum ice_adminq_opc { ice_aqc_opc_update_vsi = 0x0211, ice_aqc_opc_free_vsi = 0x0213, + /* recipe commands */ + ice_aqc_opc_add_recipe = 0x0290, + ice_aqc_opc_recipe_to_profile = 0x0291, + ice_aqc_opc_get_recipe = 0x0292, + ice_aqc_opc_get_recipe_to_profile = 0x0293, + /* switch rules population commands */ ice_aqc_opc_add_sw_rules = 0x02A0, ice_aqc_opc_update_sw_rules = 0x02A1, diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c index 1e86a6dba454..becc4ff6b7c6 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.c +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@ -543,6 +543,161 @@ ice_aq_sw_rules(struct ice_hw *hw, void *rule_list, u16 rule_list_sz, return status; } +/** + * ice_aq_add_recipe - add switch recipe + * @hw: pointer to the HW struct + * @s_recipe_list: pointer to switch rule population list + * @num_recipes: number of switch recipes in the list + * @cd: pointer to command details structure or NULL + * + * Add(0x0290) + */ +static enum ice_status __maybe_unused +ice_aq_add_recipe(struct ice_hw *hw, + struct ice_aqc_recipe_data_elem *s_recipe_list, + u16 num_recipes, struct ice_sq_cd *cd) +{ + struct ice_aqc_add_get_recipe *cmd; + struct ice_aq_desc desc; + u16 buf_size; + + cmd = &desc.params.add_get_recipe; + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_add_recipe); + + cmd->num_sub_recipes = cpu_to_le16(num_recipes); + desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD); + + buf_size = num_recipes * sizeof(*s_recipe_list); + + return ice_aq_send_cmd(hw, &desc, s_recipe_list, buf_size, cd); +} + +/** + * ice_aq_get_recipe - get switch recipe + * @hw: pointer to the HW struct + * @s_recipe_list: pointer to switch rule population list + * @num_recipes: pointer to the number of recipes (input and output) + * @recipe_root: root recipe number of recipe(s) to retrieve + * @cd: pointer to command details structure or NULL + * + * Get(0x0292) + * + * On input, *num_recipes should equal the number of entries in s_recipe_list. + * On output, *num_recipes will equal the number of entries returned in + * s_recipe_list. + * + * The caller must supply enough space in s_recipe_list to hold all possible + * recipes and *num_recipes must equal ICE_MAX_NUM_RECIPES. + */ +static enum ice_status __maybe_unused +ice_aq_get_recipe(struct ice_hw *hw, + struct ice_aqc_recipe_data_elem *s_recipe_list, + u16 *num_recipes, u16 recipe_root, struct ice_sq_cd *cd) +{ + struct ice_aqc_add_get_recipe *cmd; + struct ice_aq_desc desc; + enum ice_status status; + u16 buf_size; + + if (*num_recipes != ICE_MAX_NUM_RECIPES) + return ICE_ERR_PARAM; + + cmd = &desc.params.add_get_recipe; + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_recipe); + + cmd->return_index = cpu_to_le16(recipe_root); + cmd->num_sub_recipes = 0; + + buf_size = *num_recipes * sizeof(*s_recipe_list); + + status = ice_aq_send_cmd(hw, &desc, s_recipe_list, buf_size, cd); + *num_recipes = le16_to_cpu(cmd->num_sub_recipes); + + return status; +} + +/** + * ice_aq_map_recipe_to_profile - Map recipe to packet profile + * @hw: pointer to the HW struct + * @profile_id: package profile ID to associate the recipe with + * @r_bitmap: Recipe bitmap filled in and need to be returned as response + * @cd: pointer to command details structure or NULL + * Recipe to profile association (0x0291) + */ +static enum ice_status __maybe_unused +ice_aq_map_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap, + struct ice_sq_cd *cd) +{ + struct ice_aqc_recipe_to_profile *cmd; + struct ice_aq_desc desc; + + cmd = &desc.params.recipe_to_profile; + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_recipe_to_profile); + cmd->profile_id = cpu_to_le16(profile_id); + /* Set the recipe ID bit in the bitmask to let the device know which + * profile we are associating the recipe to + */ + memcpy(cmd->recipe_assoc, r_bitmap, sizeof(cmd->recipe_assoc)); + + return ice_aq_send_cmd(hw, &desc, NULL, 0, cd); +} + +/** + * ice_aq_get_recipe_to_profile - Map recipe to packet profile + * @hw: pointer to the HW struct + * @profile_id: package profile ID to associate the recipe with + * @r_bitmap: Recipe bitmap filled in and need to be returned as response + * @cd: pointer to command details structure or NULL + * Associate profile ID with given recipe (0x0293) + */ +static enum ice_status __maybe_unused +ice_aq_get_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap, + struct ice_sq_cd *cd) +{ + struct ice_aqc_recipe_to_profile *cmd; + struct ice_aq_desc desc; + enum ice_status status; + + cmd = &desc.params.recipe_to_profile; + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_recipe_to_profile); + cmd->profile_id = cpu_to_le16(profile_id); + + status = ice_aq_send_cmd(hw, &desc, NULL, 0, cd); + if (!status) + memcpy(r_bitmap, cmd->recipe_assoc, sizeof(cmd->recipe_assoc)); + + return status; +} + +/** + * ice_alloc_recipe - add recipe resource + * @hw: pointer to the hardware structure + * @rid: recipe ID returned as response to AQ call + */ +static enum ice_status __maybe_unused ice_alloc_recipe(struct ice_hw *hw, u16 *rid) +{ + struct ice_aqc_alloc_free_res_elem *sw_buf; + enum ice_status status; + u16 buf_len; + + buf_len = struct_size(sw_buf, elem, 1); + sw_buf = kzalloc(buf_len, GFP_KERNEL); + if (!sw_buf) + return ICE_ERR_NO_MEMORY; + + sw_buf->num_elems = cpu_to_le16(1); + sw_buf->res_type = cpu_to_le16((ICE_AQC_RES_TYPE_RECIPE << + ICE_AQC_RES_TYPE_S) | + ICE_AQC_RES_TYPE_FLAG_SHARED); + status = ice_aq_alloc_free_res(hw, 1, sw_buf, buf_len, + ice_aqc_opc_alloc_res, NULL); + if (!status) + *rid = le16_to_cpu(sw_buf->elem[0].e.sw_resp); + kfree(sw_buf); + + return status; +} + /* ice_init_port_info - Initialize port_info with switch configuration data * @pi: pointer to port_info * @vsi_port_num: VSI number or port number From 450052a4142c6bf2ec4ba6a54d833a575482a032 Mon Sep 17 00:00:00 2001 From: Dan Nowlin Date: Fri, 6 Aug 2021 10:48:59 +0200 Subject: [PATCH 083/440] ice: manage profiles and field vectors Implement functions to manage profiles and field vectors in hardware. In hardware, there are up to 256 profiles and each of these profiles can have 48 field vector words. Each field vector word is described by protocol id and offset in the packet. To add a new recipe all used profiles need to be searched. If the profile contains all required protocol ids and offsets from the recipe it can be used. The driver has to add this profile to recipe association to tell hardware that newly added recipe is going to be associated with this profile. The amount of used profiles depend on the package. To avoid searching across not used profile, max profile id value is calculated at init flow. The profile is considered as unused when all field vector words in the profile are invalid (protocol id 0xff and offset 0x1ff). Profiles are read from the package section ICE_SID_FLD_VEC_SW. Empty field vector words can be used for recipe results. Store all unused field vector words in prof_res_bm. It is a 256 elements array (max number of profiles) each element is a 48 bit bitmap (max number of field vector words). For now, support only non-tunnel profiles type. Co-developed-by: Grishma Kotecha Signed-off-by: Grishma Kotecha Signed-off-by: Dan Nowlin Signed-off-by: Wojciech Drewek Tested-by: Sandeep Penigalapati Signed-off-by: Tony Nguyen --- .../net/ethernet/intel/ice/ice_flex_pipe.c | 242 ++++++++++++++++++ .../net/ethernet/intel/ice/ice_flex_pipe.h | 8 + .../net/ethernet/intel/ice/ice_flex_type.h | 13 + drivers/net/ethernet/intel/ice/ice_switch.c | 3 +- drivers/net/ethernet/intel/ice/ice_type.h | 3 + 5 files changed, 268 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c index 06ac9badee77..cd98b1d2f1a3 100644 --- a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c +++ b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c @@ -1329,6 +1329,86 @@ fw_ddp_compat_free_alloc: return status; } +/** + * ice_sw_fv_handler + * @sect_type: section type + * @section: pointer to section + * @index: index of the field vector entry to be returned + * @offset: ptr to variable that receives the offset in the field vector table + * + * This is a callback function that can be passed to ice_pkg_enum_entry. + * This function treats the given section as of type ice_sw_fv_section and + * enumerates offset field. "offset" is an index into the field vector table. + */ +static void * +ice_sw_fv_handler(u32 sect_type, void *section, u32 index, u32 *offset) +{ + struct ice_sw_fv_section *fv_section = section; + + if (!section || sect_type != ICE_SID_FLD_VEC_SW) + return NULL; + if (index >= le16_to_cpu(fv_section->count)) + return NULL; + if (offset) + /* "index" passed in to this function is relative to a given + * 4k block. To get to the true index into the field vector + * table need to add the relative index to the base_offset + * field of this section + */ + *offset = le16_to_cpu(fv_section->base_offset) + index; + return fv_section->fv + index; +} + +/** + * ice_get_prof_index_max - get the max profile index for used profile + * @hw: pointer to the HW struct + * + * Calling this function will get the max profile index for used profile + * and store the index number in struct ice_switch_info *switch_info + * in HW for following use. + */ +static enum ice_status ice_get_prof_index_max(struct ice_hw *hw) +{ + u16 prof_index = 0, j, max_prof_index = 0; + struct ice_pkg_enum state; + struct ice_seg *ice_seg; + bool flag = false; + struct ice_fv *fv; + u32 offset; + + memset(&state, 0, sizeof(state)); + + if (!hw->seg) + return ICE_ERR_PARAM; + + ice_seg = hw->seg; + + do { + fv = ice_pkg_enum_entry(ice_seg, &state, ICE_SID_FLD_VEC_SW, + &offset, ice_sw_fv_handler); + if (!fv) + break; + ice_seg = NULL; + + /* in the profile that not be used, the prot_id is set to 0xff + * and the off is set to 0x1ff for all the field vectors. + */ + for (j = 0; j < hw->blk[ICE_BLK_SW].es.fvw; j++) + if (fv->ew[j].prot_id != ICE_PROT_INVALID || + fv->ew[j].off != ICE_FV_OFFSET_INVAL) + flag = true; + if (flag && prof_index > max_prof_index) + max_prof_index = prof_index; + + prof_index++; + flag = false; + } while (fv); + + hw->switch_info->max_used_prof_index = max_prof_index; + + return 0; +} + /** * ice_init_pkg - initialize/download package * @hw: pointer to the hardware structure @@ -1408,6 +1488,7 @@ enum ice_status ice_init_pkg(struct ice_hw *hw, u8 *buf, u32 len) */ ice_init_pkg_regs(hw); ice_fill_blk_tbls(hw); + ice_get_prof_index_max(hw); } else { ice_debug(hw, ICE_DBG_INIT, "package load failed, %d\n", status); @@ -1484,6 +1565,167 @@ static struct ice_buf_build *ice_pkg_buf_alloc(struct ice_hw *hw) return bld; } +/** + * ice_get_sw_fv_bitmap - Get switch field vector bitmap based on profile type + * @hw: pointer to hardware structure + * @req_profs: type of profiles requested + * @bm: pointer to memory for returning the bitmap of field vectors + */ +void +ice_get_sw_fv_bitmap(struct ice_hw *hw, enum ice_prof_type req_profs, + unsigned long *bm) +{ + struct ice_pkg_enum state; + struct ice_seg *ice_seg; + struct ice_fv *fv; + + if (req_profs == ICE_PROF_ALL) { + bitmap_set(bm, 0, ICE_MAX_NUM_PROFILES); + return; + } + + memset(&state, 0, sizeof(state)); + bitmap_zero(bm, ICE_MAX_NUM_PROFILES); + ice_seg = hw->seg; + do { + u32 offset; + + fv = ice_pkg_enum_entry(ice_seg, &state, ICE_SID_FLD_VEC_SW, + &offset, ice_sw_fv_handler); + ice_seg = NULL; + + if (fv) { + if (req_profs & ICE_PROF_NON_TUN) + set_bit((u16)offset, bm); + } + } while (fv); +} + +/** + * ice_get_sw_fv_list + * @hw: pointer to the HW structure + * @prot_ids: field vector to search for with a given protocol ID + * @ids_cnt: lookup/protocol count + * @bm: bitmap of field vectors to consider + * @fv_list: Head of a list + * + * Finds all the field vector entries from switch block that contain + * a given protocol ID and returns a list of structures of type + * "ice_sw_fv_list_entry". Every structure in the list has a field vector + * definition and profile ID information + * NOTE: The caller of the function is responsible for freeing the memory + * allocated for every list entry. + */ +enum ice_status +ice_get_sw_fv_list(struct ice_hw *hw, u8 *prot_ids, u16 ids_cnt, + unsigned long *bm, struct list_head *fv_list) +{ + struct ice_sw_fv_list_entry *fvl; + struct ice_sw_fv_list_entry *tmp; + struct ice_pkg_enum state; + struct ice_seg *ice_seg; + struct ice_fv *fv; + u32 offset; + + memset(&state, 0, sizeof(state)); + + if (!ids_cnt || !hw->seg) + return ICE_ERR_PARAM; + + ice_seg = hw->seg; + do { + u16 i; + + fv = ice_pkg_enum_entry(ice_seg, &state, ICE_SID_FLD_VEC_SW, + &offset, ice_sw_fv_handler); + if (!fv) + break; + ice_seg = NULL; + + /* If field vector is not in the bitmap list, then skip this + * profile. + */ + if (!test_bit((u16)offset, bm)) + continue; + + for (i = 0; i < ids_cnt; i++) { + int j; + + /* This code assumes that if a switch field vector line + * has a matching protocol, then this line will contain + * the entries necessary to represent every field in + * that protocol header. + */ + for (j = 0; j < hw->blk[ICE_BLK_SW].es.fvw; j++) + if (fv->ew[j].prot_id == prot_ids[i]) + break; + if (j >= hw->blk[ICE_BLK_SW].es.fvw) + break; + if (i + 1 == ids_cnt) { + fvl = devm_kzalloc(ice_hw_to_dev(hw), + sizeof(*fvl), GFP_KERNEL); + if (!fvl) + goto err; + fvl->fv_ptr = fv; + fvl->profile_id = offset; + list_add(&fvl->list_entry, fv_list); + break; + } + } + } while (fv); + if (list_empty(fv_list)) + return ICE_ERR_CFG; + return 0; + +err: + list_for_each_entry_safe(fvl, tmp, fv_list, list_entry) { + list_del(&fvl->list_entry); + devm_kfree(ice_hw_to_dev(hw), fvl); + } + + return ICE_ERR_NO_MEMORY; +} + +/** + * ice_init_prof_result_bm - Initialize the profile result index bitmap + * @hw: pointer to hardware structure + */ +void ice_init_prof_result_bm(struct ice_hw *hw) +{ + struct ice_pkg_enum state; + struct ice_seg *ice_seg; + struct ice_fv *fv; + + memset(&state, 0, sizeof(state)); + + if (!hw->seg) + return; + + ice_seg = hw->seg; + do { + u32 off; + u16 i; + + fv = ice_pkg_enum_entry(ice_seg, &state, ICE_SID_FLD_VEC_SW, + &off, ice_sw_fv_handler); + ice_seg = NULL; + if (!fv) + break; + + bitmap_zero(hw->switch_info->prof_res_bm[off], + ICE_MAX_FV_WORDS); + + /* Determine empty field vector indices, these can be + * used for recipe results. Skip index 0, since it is + * always used for Switch ID. + */ + for (i = 1; i < ICE_MAX_FV_WORDS; i++) + if (fv->ew[i].prot_id == ICE_PROT_INVALID && + fv->ew[i].off == ICE_FV_OFFSET_INVAL) + set_bit(i, hw->switch_info->prof_res_bm[off]); + } while (fv); +} + /** * ice_pkg_buf_free * @hw: pointer to the HW structure diff --git a/drivers/net/ethernet/intel/ice/ice_flex_pipe.h b/drivers/net/ethernet/intel/ice/ice_flex_pipe.h index 8a58e79729b9..0cf5f0c24178 100644 --- a/drivers/net/ethernet/intel/ice/ice_flex_pipe.h +++ b/drivers/net/ethernet/intel/ice/ice_flex_pipe.h @@ -18,6 +18,14 @@ #define ICE_PKG_CNT 4 +void +ice_get_sw_fv_bitmap(struct ice_hw *hw, enum ice_prof_type type, + unsigned long *bm); +void +ice_init_prof_result_bm(struct ice_hw *hw); +enum ice_status +ice_get_sw_fv_list(struct ice_hw *hw, u8 *prot_ids, u16 ids_cnt, + unsigned long *bm, struct list_head *fv_list); bool ice_get_open_tunnel_port(struct ice_hw *hw, u16 *port); int ice_udp_tunnel_set_port(struct net_device *netdev, unsigned int table, diff --git a/drivers/net/ethernet/intel/ice/ice_flex_type.h b/drivers/net/ethernet/intel/ice/ice_flex_type.h index 7d8b517a63c9..120bcebaa080 100644 --- a/drivers/net/ethernet/intel/ice/ice_flex_type.h +++ b/drivers/net/ethernet/intel/ice/ice_flex_type.h @@ -13,6 +13,8 @@ struct ice_fv_word { u8 resvrd; } __packed; +#define ICE_MAX_NUM_PROFILES 256 + #define ICE_MAX_FV_WORDS 48 struct ice_fv { struct ice_fv_word ew[ICE_MAX_FV_WORDS]; @@ -279,6 +281,12 @@ struct ice_sw_fv_section { struct ice_fv fv[]; }; +struct ice_sw_fv_list_entry { + struct list_head list_entry; + u32 profile_id; + struct ice_fv *fv_ptr; +}; + /* The BOOST TCAM stores the match packet header in reverse order, meaning * the fields are reversed; in addition, this means that the normally big endian * fields of the packet are now little endian. @@ -603,4 +611,9 @@ struct ice_chs_chg { }; #define ICE_FLOW_PTYPE_MAX ICE_XLT1_CNT + +enum ice_prof_type { + ICE_PROF_NON_TUN = 0x1, + ICE_PROF_ALL = 0xFF, +}; #endif /* _ICE_FLEX_TYPE_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c index becc4ff6b7c6..dc0e79e77c49 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.c +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@ -59,10 +59,11 @@ enum ice_status ice_init_def_sw_recp(struct ice_hw *hw) if (!recps) return ICE_ERR_NO_MEMORY; - for (i = 0; i < ICE_SW_LKUP_LAST; i++) { + for (i = 0; i < ICE_MAX_NUM_RECIPES; i++) { recps[i].root_rid = i; INIT_LIST_HEAD(&recps[i].filt_rules); INIT_LIST_HEAD(&recps[i].filt_replay_rules); + INIT_LIST_HEAD(&recps[i].rg_list); mutex_init(&recps[i].filt_rule_lock); } diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h index e064439fc1a9..b2786783b80f 100644 --- a/drivers/net/ethernet/intel/ice/ice_type.h +++ b/drivers/net/ethernet/intel/ice/ice_type.h @@ -677,6 +677,9 @@ struct ice_port_info { struct ice_switch_info { struct list_head vsi_list_map_head; struct ice_sw_recipe *recp_list; + u16 max_used_prof_index; + + DECLARE_BITMAP(prof_res_bm[ICE_MAX_NUM_PROFILES], ICE_MAX_FV_WORDS); }; /* FW logging configuration */ From fd2a6b71e3008360c7b26a3d7002fe03dbdc61fd Mon Sep 17 00:00:00 2001 From: Dan Nowlin Date: Fri, 6 Aug 2021 10:49:00 +0200 Subject: [PATCH 084/440] ice: create advanced switch recipe These changes introduce code for creating advanced recipes for the switch in hardware. There are a couple of recipes already defined in the HW. They apply to matching on basic protocol headers, like MAC, VLAN, MACVLAN, ethertype or direction (promiscuous), etc.. If the user wants to match on other protocol headers (eg. ip address, src/dst port etc.) or different variation of already supported protocols, there is a need to create new, more complex recipe. That new recipe is referred as 'advanced recipe', and the filtering rule created on top of that recipe is called 'advanced rule'. One recipe can have up to 5 words, but the first word is always reserved for match on switch id, so the driver can define up to 4 words for one recipe. To support recipes with more words up to 5 recipes can be chained, so 20 words can be programmed for look up. Input for adding recipe function is a list of protocols to support. Based on this list correct profile is being chosen. Correct profile means that it contains all protocol types from a list. Each profile have up to 48 field vector words and each of this word have protocol id and offset. These two fields need to match with input data for adding recipe function. If the correct profile can't be found the function returns an error. The next step after finding the correct profile is grouping words into groups. One group can have up to 4 words. This is done to simplify sending recipes to HW (because recipe also can have up to 4 words). In case of chaining (so when look up consists of more than 4 words) last recipe will always have results from the previous recipes used as words. A recipe to profile map is used to store information about which profile is associate with this recipe. This map is an array of 64 elements (max number of recipes) and each element is a 256 bits bitmap (max number of profiles) Profile to recipe map is used to store information about which recipe is associate with this profile. This map is an array of 256 elements (max number of profiles) and each element is a 64 bits bitmap (max number of recipes) Signed-off-by: Dan Nowlin Signed-off-by: Wojciech Drewek Tested-by: Sandeep Penigalapati Signed-off-by: Tony Nguyen --- .../net/ethernet/intel/ice/ice_flex_pipe.c | 33 +- .../net/ethernet/intel/ice/ice_flex_pipe.h | 6 + .../ethernet/intel/ice/ice_protocol_type.h | 169 +++ drivers/net/ethernet/intel/ice/ice_switch.c | 1116 ++++++++++++++++- drivers/net/ethernet/intel/ice/ice_switch.h | 114 +- 5 files changed, 1414 insertions(+), 24 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c index cd98b1d2f1a3..5e8f3256f8ea 100644 --- a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c +++ b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c @@ -735,7 +735,7 @@ static void ice_release_global_cfg_lock(struct ice_hw *hw) * * This function will request ownership of the change lock. */ -static enum ice_status +enum ice_status ice_acquire_change_lock(struct ice_hw *hw, enum ice_aq_res_access_type access) { return ice_acquire_res(hw, ICE_CHANGE_LOCK_RES_ID, access, @@ -748,7 +748,7 @@ ice_acquire_change_lock(struct ice_hw *hw, enum ice_aq_res_access_type access) * * This function will release the change lock using the proper Admin Command. */ -static void ice_release_change_lock(struct ice_hw *hw) +void ice_release_change_lock(struct ice_hw *hw) { ice_release_res(hw, ICE_CHANGE_LOCK_RES_ID); } @@ -2105,6 +2105,35 @@ int ice_udp_tunnel_unset_port(struct net_device *netdev, unsigned int table, return 0; } +/** + * ice_find_prot_off - find prot ID and offset pair, based on prof and FV index + * @hw: pointer to the hardware structure + * @blk: hardware block + * @prof: profile ID + * @fv_idx: field vector word index + * @prot: variable to receive the protocol ID + * @off: variable to receive the protocol offset + */ +enum ice_status +ice_find_prot_off(struct ice_hw *hw, enum ice_block blk, u8 prof, u16 fv_idx, + u8 *prot, u16 *off) +{ + struct ice_fv_word *fv_ext; + + if (prof >= hw->blk[blk].es.count) + return ICE_ERR_PARAM; + + if (fv_idx >= hw->blk[blk].es.fvw) + return ICE_ERR_PARAM; + + fv_ext = hw->blk[blk].es.t + (prof * hw->blk[blk].es.fvw); + + *prot = fv_ext[fv_idx].prot_id; + *off = fv_ext[fv_idx].off; + + return 0; +} + /* PTG Management */ /** diff --git a/drivers/net/ethernet/intel/ice/ice_flex_pipe.h b/drivers/net/ethernet/intel/ice/ice_flex_pipe.h index 0cf5f0c24178..344c2637facd 100644 --- a/drivers/net/ethernet/intel/ice/ice_flex_pipe.h +++ b/drivers/net/ethernet/intel/ice/ice_flex_pipe.h @@ -18,6 +18,12 @@ #define ICE_PKG_CNT 4 +enum ice_status +ice_acquire_change_lock(struct ice_hw *hw, enum ice_aq_res_access_type access); +void ice_release_change_lock(struct ice_hw *hw); +enum ice_status +ice_find_prot_off(struct ice_hw *hw, enum ice_block blk, u8 prof, u16 fv_idx, + u8 *prot, u16 *off); void ice_get_sw_fv_bitmap(struct ice_hw *hw, enum ice_prof_type type, unsigned long *bm); diff --git a/drivers/net/ethernet/intel/ice/ice_protocol_type.h b/drivers/net/ethernet/intel/ice/ice_protocol_type.h index 199aa5b71540..0b220dfa7457 100644 --- a/drivers/net/ethernet/intel/ice/ice_protocol_type.h +++ b/drivers/net/ethernet/intel/ice/ice_protocol_type.h @@ -3,6 +3,44 @@ #ifndef _ICE_PROTOCOL_TYPE_H_ #define _ICE_PROTOCOL_TYPE_H_ +#define ICE_IPV6_ADDR_LENGTH 16 + +/* Each recipe can match up to 5 different fields. Fields to match can be meta- + * data, values extracted from packet headers, or results from other recipes. + * One of the 5 fields is reserved for matching the switch ID. So, up to 4 + * recipes can provide intermediate results to another one through chaining, + * e.g. recipes 0, 1, 2, and 3 can provide intermediate results to recipe 4. + */ +#define ICE_NUM_WORDS_RECIPE 4 + +/* Max recipes that can be chained */ +#define ICE_MAX_CHAIN_RECIPE 5 + +/* 1 word reserved for switch ID from allowed 5 words. + * So a recipe can have max 4 words. And you can chain 5 such recipes + * together. So maximum words that can be programmed for look up is 5 * 4. + */ +#define ICE_MAX_CHAIN_WORDS (ICE_NUM_WORDS_RECIPE * ICE_MAX_CHAIN_RECIPE) + +/* Field vector index corresponding to chaining */ +#define ICE_CHAIN_FV_INDEX_START 47 + +enum ice_protocol_type { + ICE_MAC_OFOS = 0, + ICE_MAC_IL, + ICE_ETYPE_OL, + ICE_VLAN_OFOS, + ICE_IPV4_OFOS, + ICE_IPV4_IL, + ICE_IPV6_OFOS, + ICE_IPV6_IL, + ICE_TCP_IL, + ICE_UDP_OF, + ICE_UDP_ILOS, + ICE_SCTP_IL, + ICE_PROTOCOL_LAST +}; + /* Decoders for ice_prot_id: * - F: First * - I: Inner @@ -35,4 +73,135 @@ enum ice_prot_id { ICE_PROT_META_ID = 255, /* when offset == metadata */ ICE_PROT_INVALID = 255 /* when offset == ICE_FV_OFFSET_INVAL */ }; + +#define ICE_MAC_OFOS_HW 1 +#define ICE_MAC_IL_HW 4 +#define ICE_ETYPE_OL_HW 9 +#define ICE_VLAN_OF_HW 16 +#define ICE_VLAN_OL_HW 17 +#define ICE_IPV4_OFOS_HW 32 +#define ICE_IPV4_IL_HW 33 +#define ICE_IPV6_OFOS_HW 40 +#define ICE_IPV6_IL_HW 41 +#define ICE_TCP_IL_HW 49 +#define ICE_UDP_ILOS_HW 53 + +#define ICE_UDP_OF_HW 52 /* UDP Tunnels */ + +#define ICE_TUN_FLAG_FV_IND 2 + +/* Mapping of software defined protocol ID to hardware defined protocol ID */ +struct ice_protocol_entry { + enum ice_protocol_type type; + u8 protocol_id; +}; + +struct ice_ether_hdr { + u8 dst_addr[ETH_ALEN]; + u8 src_addr[ETH_ALEN]; +}; + +struct ice_ethtype_hdr { + __be16 ethtype_id; +}; + +struct ice_ether_vlan_hdr { + u8 dst_addr[ETH_ALEN]; + u8 src_addr[ETH_ALEN]; + __be32 vlan_id; +}; + +struct ice_vlan_hdr { + __be16 type; + __be16 vlan; +}; + +struct ice_ipv4_hdr { + u8 version; + u8 tos; + __be16 total_length; + __be16 id; + __be16 frag_off; + u8 time_to_live; + u8 protocol; + __be16 check; + __be32 src_addr; + __be32 dst_addr; +}; + +struct ice_ipv6_hdr { + __be32 be_ver_tc_flow; + __be16 payload_len; + u8 next_hdr; + u8 hop_limit; + u8 src_addr[ICE_IPV6_ADDR_LENGTH]; + u8 dst_addr[ICE_IPV6_ADDR_LENGTH]; +}; + +struct ice_sctp_hdr { + __be16 src_port; + __be16 dst_port; + __be32 verification_tag; + __be32 check; +}; + +struct ice_l4_hdr { + __be16 src_port; + __be16 dst_port; + __be16 len; + __be16 check; +}; + +union ice_prot_hdr { + struct ice_ether_hdr eth_hdr; + struct ice_ethtype_hdr ethertype; + struct ice_vlan_hdr vlan_hdr; + struct ice_ipv4_hdr ipv4_hdr; + struct ice_ipv6_hdr ipv6_hdr; + struct ice_l4_hdr l4_hdr; + struct ice_sctp_hdr sctp_hdr; +}; + +/* This is mapping table entry that maps every word within a given protocol + * structure to the real byte offset as per the specification of that + * protocol header. + * for e.g. dst address is 3 words in ethertype header and corresponding bytes + * are 0, 2, 3 in the actual packet header and src address is at 4, 6, 8 + */ +struct ice_prot_ext_tbl_entry { + enum ice_protocol_type prot_type; + /* Byte offset into header of given protocol type */ + u8 offs[sizeof(union ice_prot_hdr)]; +}; + +/* Extractions to be looked up for a given recipe */ +struct ice_prot_lkup_ext { + u16 prot_type; + u8 n_val_words; + /* create a buffer to hold max words per recipe */ + u16 field_off[ICE_MAX_CHAIN_WORDS]; + u16 field_mask[ICE_MAX_CHAIN_WORDS]; + + struct ice_fv_word fv_words[ICE_MAX_CHAIN_WORDS]; + + /* Indicate field offsets that have field vector indices assigned */ + DECLARE_BITMAP(done, ICE_MAX_CHAIN_WORDS); +}; + +struct ice_pref_recipe_group { + u8 n_val_pairs; /* Number of valid pairs */ + struct ice_fv_word pairs[ICE_NUM_WORDS_RECIPE]; + u16 mask[ICE_NUM_WORDS_RECIPE]; +}; + +struct ice_recp_grp_entry { + struct list_head l_entry; + +#define ICE_INVAL_CHAIN_IND 0xFF + u16 rid; + u8 chain_idx; + u16 fv_idx[ICE_NUM_WORDS_RECIPE]; + u16 fv_mask[ICE_NUM_WORDS_RECIPE]; + struct ice_pref_recipe_group r_group; +}; #endif /* _ICE_PROTOCOL_TYPE_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c index dc0e79e77c49..eb7cd1a955f1 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.c +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@ -42,6 +42,14 @@ static const u8 dummy_eth_header[DUMMY_ETH_HDR_LEN] = { 0x2, 0, 0, 0, 0, 0, (offsetof(struct ice_aqc_sw_rules_elem, pdata.vsi_list.vsi) + \ ((n) * sizeof(((struct ice_sw_rule_vsi_list *)0)->vsi[0]))) +/* this is a recipe to profile association bitmap */ +static DECLARE_BITMAP(recipe_to_profile[ICE_MAX_NUM_RECIPES], + ICE_MAX_NUM_PROFILES); + +/* this is a profile to recipe association bitmap */ +static DECLARE_BITMAP(profile_to_recipe[ICE_MAX_NUM_PROFILES], + ICE_MAX_NUM_RECIPES); + /** * ice_init_def_sw_recp - initialize the recipe book keeping tables * @hw: pointer to the HW struct @@ -553,7 +561,7 @@ ice_aq_sw_rules(struct ice_hw *hw, void *rule_list, u16 rule_list_sz, * * Add(0x0290) */ -static enum ice_status __maybe_unused +static enum ice_status ice_aq_add_recipe(struct ice_hw *hw, struct ice_aqc_recipe_data_elem *s_recipe_list, u16 num_recipes, struct ice_sq_cd *cd) @@ -590,7 +598,7 @@ ice_aq_add_recipe(struct ice_hw *hw, * The caller must supply enough space in s_recipe_list to hold all possible * recipes and *num_recipes must equal ICE_MAX_NUM_RECIPES. */ -static enum ice_status __maybe_unused +static enum ice_status ice_aq_get_recipe(struct ice_hw *hw, struct ice_aqc_recipe_data_elem *s_recipe_list, u16 *num_recipes, u16 recipe_root, struct ice_sq_cd *cd) @@ -625,7 +633,7 @@ ice_aq_get_recipe(struct ice_hw *hw, * @cd: pointer to command details structure or NULL * Recipe to profile association (0x0291) */ -static enum ice_status __maybe_unused +static enum ice_status ice_aq_map_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap, struct ice_sq_cd *cd) { @@ -651,7 +659,7 @@ ice_aq_map_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap, * @cd: pointer to command details structure or NULL * Associate profile ID with given recipe (0x0293) */ -static enum ice_status __maybe_unused +static enum ice_status ice_aq_get_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap, struct ice_sq_cd *cd) { @@ -675,7 +683,7 @@ ice_aq_get_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap, * @hw: pointer to the hardware structure * @rid: recipe ID returned as response to AQ call */ -static enum ice_status __maybe_unused ice_alloc_recipe(struct ice_hw *hw, u16 *rid) +static enum ice_status ice_alloc_recipe(struct ice_hw *hw, u16 *rid) { struct ice_aqc_alloc_free_res_elem *sw_buf; enum ice_status status; @@ -699,6 +707,203 @@ static enum ice_status __maybe_unused ice_alloc_recipe(struct ice_hw *hw, u16 *r return status; } +/** + * ice_get_recp_to_prof_map - updates recipe to profile mapping + * @hw: pointer to hardware structure + * + * This function is used to populate recipe_to_profile matrix where index to + * this array is the recipe ID and the element is the mapping of which profiles + * is this recipe mapped to. + */ +static void ice_get_recp_to_prof_map(struct ice_hw *hw) +{ + DECLARE_BITMAP(r_bitmap, ICE_MAX_NUM_RECIPES); + u16 i; + + for (i = 0; i < hw->switch_info->max_used_prof_index + 1; i++) { + u16 j; + + bitmap_zero(profile_to_recipe[i], ICE_MAX_NUM_RECIPES); + bitmap_zero(r_bitmap, ICE_MAX_NUM_RECIPES); + if (ice_aq_get_recipe_to_profile(hw, i, (u8 *)r_bitmap, NULL)) + continue; + bitmap_copy(profile_to_recipe[i], r_bitmap, + ICE_MAX_NUM_RECIPES); + for_each_set_bit(j, r_bitmap, ICE_MAX_NUM_RECIPES) + set_bit(i, recipe_to_profile[j]); + } +} + +/** + * ice_collect_result_idx - copy result index values + * @buf: buffer that contains the result index + * @recp: the recipe struct to copy data into + */ +static void +ice_collect_result_idx(struct ice_aqc_recipe_data_elem *buf, + struct ice_sw_recipe *recp) +{ + if (buf->content.result_indx & ICE_AQ_RECIPE_RESULT_EN) + set_bit(buf->content.result_indx & ~ICE_AQ_RECIPE_RESULT_EN, + recp->res_idxs); +} + +/** + * ice_get_recp_frm_fw - update SW bookkeeping from FW recipe entries + * @hw: pointer to hardware structure + * @recps: struct that we need to populate + * @rid: recipe ID that we are populating + * @refresh_required: true if we should get recipe to profile mapping from FW + * + * This function is used to populate all the necessary entries into our + * bookkeeping so that we have a current list of all the recipes that are + * programmed in the firmware. + */ +static enum ice_status +ice_get_recp_frm_fw(struct ice_hw *hw, struct ice_sw_recipe *recps, u8 rid, + bool *refresh_required) +{ + DECLARE_BITMAP(result_bm, ICE_MAX_FV_WORDS); + struct ice_aqc_recipe_data_elem *tmp; + u16 num_recps = ICE_MAX_NUM_RECIPES; + struct ice_prot_lkup_ext *lkup_exts; + enum ice_status status; + u8 fv_word_idx = 0; + u16 sub_recps; + + bitmap_zero(result_bm, ICE_MAX_FV_WORDS); + + /* we need a buffer big enough to accommodate all the recipes */ + tmp = kcalloc(ICE_MAX_NUM_RECIPES, sizeof(*tmp), GFP_KERNEL); + if (!tmp) + return ICE_ERR_NO_MEMORY; + + tmp[0].recipe_indx = rid; + status = ice_aq_get_recipe(hw, tmp, &num_recps, rid, NULL); + /* non-zero status meaning recipe doesn't exist */ + if (status) + goto err_unroll; + + /* Get recipe to profile map so that we can get the fv from lkups that + * we read for a recipe from FW. Since we want to minimize the number of + * times we make this FW call, just make one call and cache the copy + * until a new recipe is added. This operation is only required the + * first time to get the changes from FW. Then to search existing + * entries we don't need to update the cache again until another recipe + * gets added. + */ + if (*refresh_required) { + ice_get_recp_to_prof_map(hw); + *refresh_required = false; + } + + /* Start populating all the entries for recps[rid] based on lkups from + * firmware. Note that we are only creating the root recipe in our + * database. + */ + lkup_exts = &recps[rid].lkup_exts; + + for (sub_recps = 0; sub_recps < num_recps; sub_recps++) { + struct ice_aqc_recipe_data_elem root_bufs = tmp[sub_recps]; + struct ice_recp_grp_entry *rg_entry; + u8 i, prof, idx, prot = 0; + bool is_root; + u16 off = 0; + + rg_entry = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*rg_entry), + GFP_KERNEL); + if (!rg_entry) { + status = ICE_ERR_NO_MEMORY; + goto err_unroll; + } + + idx = root_bufs.recipe_indx; + is_root = root_bufs.content.rid & ICE_AQ_RECIPE_ID_IS_ROOT; + + /* Mark all result indices in this chain */ + if (root_bufs.content.result_indx & ICE_AQ_RECIPE_RESULT_EN) + set_bit(root_bufs.content.result_indx & ~ICE_AQ_RECIPE_RESULT_EN, + result_bm); + + /* get the first profile that is associated with rid */ + prof = find_first_bit(recipe_to_profile[idx], + ICE_MAX_NUM_PROFILES); + for (i = 0; i < ICE_NUM_WORDS_RECIPE; i++) { + u8 lkup_indx = root_bufs.content.lkup_indx[i + 1]; + + rg_entry->fv_idx[i] = lkup_indx; + rg_entry->fv_mask[i] = + le16_to_cpu(root_bufs.content.mask[i + 1]); + + /* If the recipe is a chained recipe then all its + * child recipe's result will have a result index. + * To fill fv_words we should not use those result + * index, we only need the protocol ids and offsets. + * We will skip all the fv_idx which stores result + * index in them. We also need to skip any fv_idx which + * has ICE_AQ_RECIPE_LKUP_IGNORE or 0 since it isn't a + * valid offset value. + */ + if (test_bit(rg_entry->fv_idx[i], hw->switch_info->prof_res_bm[prof]) || + rg_entry->fv_idx[i] & ICE_AQ_RECIPE_LKUP_IGNORE || + rg_entry->fv_idx[i] == 0) + continue; + + ice_find_prot_off(hw, ICE_BLK_SW, prof, + rg_entry->fv_idx[i], &prot, &off); + lkup_exts->fv_words[fv_word_idx].prot_id = prot; + lkup_exts->fv_words[fv_word_idx].off = off; + lkup_exts->field_mask[fv_word_idx] = + rg_entry->fv_mask[i]; + fv_word_idx++; + } + /* populate rg_list with the data from the child entry of this + * recipe + */ + list_add(&rg_entry->l_entry, &recps[rid].rg_list); + + /* Propagate some data to the recipe database */ + recps[idx].is_root = !!is_root; + recps[idx].priority = root_bufs.content.act_ctrl_fwd_priority; + bitmap_zero(recps[idx].res_idxs, ICE_MAX_FV_WORDS); + if (root_bufs.content.result_indx & ICE_AQ_RECIPE_RESULT_EN) { + recps[idx].chain_idx = root_bufs.content.result_indx & + ~ICE_AQ_RECIPE_RESULT_EN; + set_bit(recps[idx].chain_idx, recps[idx].res_idxs); + } else { + recps[idx].chain_idx = ICE_INVAL_CHAIN_IND; + } + + if (!is_root) + continue; + + /* Only do the following for root recipes entries */ + memcpy(recps[idx].r_bitmap, root_bufs.recipe_bitmap, + sizeof(recps[idx].r_bitmap)); + recps[idx].root_rid = root_bufs.content.rid & + ~ICE_AQ_RECIPE_ID_IS_ROOT; + recps[idx].priority = root_bufs.content.act_ctrl_fwd_priority; + } + + /* Complete initialization of the root recipe entry */ + lkup_exts->n_val_words = fv_word_idx; + recps[rid].big_recp = (num_recps > 1); + recps[rid].n_grp_count = (u8)num_recps; + recps[rid].root_buf = devm_kmemdup(ice_hw_to_dev(hw), tmp, + recps[rid].n_grp_count * sizeof(*recps[rid].root_buf), + GFP_KERNEL); + if (!recps[rid].root_buf) + goto err_unroll; + + /* Copy result indexes */ + bitmap_copy(recps[rid].res_idxs, result_bm, ICE_MAX_FV_WORDS); + recps[rid].recp_created = true; + +err_unroll: + kfree(tmp); + return status; +} + /* ice_init_port_info - Initialize port_info with switch configuration data * @pi: pointer to port_info * @vsi_port_num: VSI number or port number @@ -2929,6 +3134,907 @@ ice_free_res_cntr(struct ice_hw *hw, u8 type, u8 alloc_shared, u16 num_items, return status; } +/* This is mapping table entry that maps every word within a given protocol + * structure to the real byte offset as per the specification of that + * protocol header. + * for example dst address is 3 words in ethertype header and corresponding + * bytes are 0, 2, 3 in the actual packet header and src address is at 4, 6, 8 + * IMPORTANT: Every structure part of "ice_prot_hdr" union should have a + * matching entry describing its field. This needs to be updated if new + * structure is added to that union. + */ +static const struct ice_prot_ext_tbl_entry ice_prot_ext[ICE_PROTOCOL_LAST] = { + { ICE_MAC_OFOS, { 0, 2, 4, 6, 8, 10, 12 } }, + { ICE_MAC_IL, { 0, 2, 4, 6, 8, 10, 12 } }, + { ICE_ETYPE_OL, { 0 } }, + { ICE_VLAN_OFOS, { 2, 0 } }, + { ICE_IPV4_OFOS, { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18 } }, + { ICE_IPV4_IL, { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18 } }, + { ICE_IPV6_OFOS, { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, + 26, 28, 30, 32, 34, 36, 38 } }, + { ICE_IPV6_IL, { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, + 26, 28, 30, 32, 34, 36, 38 } }, + { ICE_TCP_IL, { 0, 2 } }, + { ICE_UDP_OF, { 0, 2 } }, + { ICE_UDP_ILOS, { 0, 2 } }, +}; + +static struct ice_protocol_entry ice_prot_id_tbl[ICE_PROTOCOL_LAST] = { + { ICE_MAC_OFOS, ICE_MAC_OFOS_HW }, + { ICE_MAC_IL, ICE_MAC_IL_HW }, + { ICE_ETYPE_OL, ICE_ETYPE_OL_HW }, + { ICE_VLAN_OFOS, ICE_VLAN_OL_HW }, + { ICE_IPV4_OFOS, ICE_IPV4_OFOS_HW }, + { ICE_IPV4_IL, ICE_IPV4_IL_HW }, + { ICE_IPV6_OFOS, ICE_IPV6_OFOS_HW }, + { ICE_IPV6_IL, ICE_IPV6_IL_HW }, + { ICE_TCP_IL, ICE_TCP_IL_HW }, + { ICE_UDP_OF, ICE_UDP_OF_HW }, + { ICE_UDP_ILOS, ICE_UDP_ILOS_HW }, +}; + +/** + * ice_find_recp - find a recipe + * @hw: pointer to the hardware structure + * @lkup_exts: extension sequence to match + * + * Returns index of matching recipe, or ICE_MAX_NUM_RECIPES if not found. + */ +static u16 ice_find_recp(struct ice_hw *hw, struct ice_prot_lkup_ext *lkup_exts) +{ + bool refresh_required = true; + struct ice_sw_recipe *recp; + u8 i; + + /* Walk through existing recipes to find a match */ + recp = hw->switch_info->recp_list; + for (i = 0; i < ICE_MAX_NUM_RECIPES; i++) { + /* If recipe was not created for this ID, in SW bookkeeping, + * check if FW has an entry for this recipe. If the FW has an + * entry update it in our SW bookkeeping and continue with the + * matching. + */ + if (!recp[i].recp_created) + if (ice_get_recp_frm_fw(hw, + hw->switch_info->recp_list, i, + &refresh_required)) + continue; + + /* Skip inverse action recipes */ + if (recp[i].root_buf && recp[i].root_buf->content.act_ctrl & + ICE_AQ_RECIPE_ACT_INV_ACT) + continue; + + /* if number of words we are looking for match */ + if (lkup_exts->n_val_words == recp[i].lkup_exts.n_val_words) { + struct ice_fv_word *ar = recp[i].lkup_exts.fv_words; + struct ice_fv_word *be = lkup_exts->fv_words; + u16 *cr = recp[i].lkup_exts.field_mask; + u16 *de = lkup_exts->field_mask; + bool found = true; + u8 pe, qr; + + /* ar, cr, and qr are related to the recipe words, while + * be, de, and pe are related to the lookup words + */ + for (pe = 0; pe < lkup_exts->n_val_words; pe++) { + for (qr = 0; qr < recp[i].lkup_exts.n_val_words; + qr++) { + if (ar[qr].off == be[pe].off && + ar[qr].prot_id == be[pe].prot_id && + cr[qr] == de[pe]) + /* Found the "pe"th word in the + * given recipe + */ + break; + } + /* After walking through all the words in the + * "i"th recipe if "p"th word was not found then + * this recipe is not what we are looking for. + * So break out from this loop and try the next + * recipe + */ + if (qr >= recp[i].lkup_exts.n_val_words) { + found = false; + break; + } + } + /* If for "i"th recipe the found was never set to false + * then it means we found our match + */ + if (found) + return i; /* Return the recipe ID */ + } + } + return ICE_MAX_NUM_RECIPES; +} + +/** + * ice_prot_type_to_id - get protocol ID from protocol type + * @type: protocol type + * @id: pointer to variable that will receive the ID + * + * Returns true if found, false otherwise + */ +static bool ice_prot_type_to_id(enum ice_protocol_type type, u8 *id) +{ + u8 i; + + for (i = 0; i < ARRAY_SIZE(ice_prot_id_tbl); i++) + if (ice_prot_id_tbl[i].type == type) { + *id = ice_prot_id_tbl[i].protocol_id; + return true; + } + return false; +} + +/** + * ice_fill_valid_words - count valid words + * @rule: advanced rule with lookup information + * @lkup_exts: byte offset extractions of the words that are valid + * + * calculate valid words in a lookup rule using mask value + */ +static u8 +ice_fill_valid_words(struct ice_adv_lkup_elem *rule, + struct ice_prot_lkup_ext *lkup_exts) +{ + u8 j, word, prot_id, ret_val; + + if (!ice_prot_type_to_id(rule->type, &prot_id)) + return 0; + + word = lkup_exts->n_val_words; + + for (j = 0; j < sizeof(rule->m_u) / sizeof(u16); j++) + if (((u16 *)&rule->m_u)[j] && + rule->type < ARRAY_SIZE(ice_prot_ext)) { + /* No more space to accommodate */ + if (word >= ICE_MAX_CHAIN_WORDS) + return 0; + lkup_exts->fv_words[word].off = + ice_prot_ext[rule->type].offs[j]; + lkup_exts->fv_words[word].prot_id = + ice_prot_id_tbl[rule->type].protocol_id; + lkup_exts->field_mask[word] = + be16_to_cpu(((__force __be16 *)&rule->m_u)[j]); + word++; + } + + ret_val = word - lkup_exts->n_val_words; + lkup_exts->n_val_words = word; + + return ret_val; +} + +/** + * ice_create_first_fit_recp_def - Create a recipe grouping + * @hw: pointer to the hardware structure + * @lkup_exts: an array of protocol header extractions + * @rg_list: pointer to a list that stores new recipe groups + * @recp_cnt: pointer to a variable that stores returned number of recipe groups + * + * Using first fit algorithm, take all the words that are still not done + * and start grouping them in 4-word groups. Each group makes up one + * recipe. + */ +static enum ice_status +ice_create_first_fit_recp_def(struct ice_hw *hw, + struct ice_prot_lkup_ext *lkup_exts, + struct list_head *rg_list, + u8 *recp_cnt) +{ + struct ice_pref_recipe_group *grp = NULL; + u8 j; + + *recp_cnt = 0; + + /* Walk through every word in the rule to check if it is not done. If so + * then this word needs to be part of a new recipe. + */ + for (j = 0; j < lkup_exts->n_val_words; j++) + if (!test_bit(j, lkup_exts->done)) { + if (!grp || + grp->n_val_pairs == ICE_NUM_WORDS_RECIPE) { + struct ice_recp_grp_entry *entry; + + entry = devm_kzalloc(ice_hw_to_dev(hw), + sizeof(*entry), + GFP_KERNEL); + if (!entry) + return ICE_ERR_NO_MEMORY; + list_add(&entry->l_entry, rg_list); + grp = &entry->r_group; + (*recp_cnt)++; + } + + grp->pairs[grp->n_val_pairs].prot_id = + lkup_exts->fv_words[j].prot_id; + grp->pairs[grp->n_val_pairs].off = + lkup_exts->fv_words[j].off; + grp->mask[grp->n_val_pairs] = lkup_exts->field_mask[j]; + grp->n_val_pairs++; + } + + return 0; +} + +/** + * ice_fill_fv_word_index - fill in the field vector indices for a recipe group + * @hw: pointer to the hardware structure + * @fv_list: field vector with the extraction sequence information + * @rg_list: recipe groupings with protocol-offset pairs + * + * Helper function to fill in the field vector indices for protocol-offset + * pairs. These indexes are then ultimately programmed into a recipe. + */ +static enum ice_status +ice_fill_fv_word_index(struct ice_hw *hw, struct list_head *fv_list, + struct list_head *rg_list) +{ + struct ice_sw_fv_list_entry *fv; + struct ice_recp_grp_entry *rg; + struct ice_fv_word *fv_ext; + + if (list_empty(fv_list)) + return 0; + + fv = list_first_entry(fv_list, struct ice_sw_fv_list_entry, + list_entry); + fv_ext = fv->fv_ptr->ew; + + list_for_each_entry(rg, rg_list, l_entry) { + u8 i; + + for (i = 0; i < rg->r_group.n_val_pairs; i++) { + struct ice_fv_word *pr; + bool found = false; + u16 mask; + u8 j; + + pr = &rg->r_group.pairs[i]; + mask = rg->r_group.mask[i]; + + for (j = 0; j < hw->blk[ICE_BLK_SW].es.fvw; j++) + if (fv_ext[j].prot_id == pr->prot_id && + fv_ext[j].off == pr->off) { + found = true; + + /* Store index of field vector */ + rg->fv_idx[i] = j; + rg->fv_mask[i] = mask; + break; + } + + /* Protocol/offset could not be found, caller gave an + * invalid pair + */ + if (!found) + return ICE_ERR_PARAM; + } + } + + return 0; +} + +/** + * ice_find_free_recp_res_idx - find free result indexes for recipe + * @hw: pointer to hardware structure + * @profiles: bitmap of profiles that will be associated with the new recipe + * @free_idx: pointer to variable to receive the free index bitmap + * + * The algorithm used here is: + * 1. When creating a new recipe, create a set P which contains all + * Profiles that will be associated with our new recipe + * + * 2. For each Profile p in set P: + * a. Add all recipes associated with Profile p into set R + * b. Optional : PossibleIndexes &= profile[p].possibleIndexes + * [initially PossibleIndexes should be 0xFFFFFFFFFFFFFFFF] + * i. Or just assume they all have the same possible indexes: + * 44, 45, 46, 47 + * i.e., PossibleIndexes = 0x0000F00000000000 + * + * 3. For each Recipe r in set R: + * a. UsedIndexes |= (bitwise or ) recipe[r].res_indexes + * b. FreeIndexes = UsedIndexes ^ PossibleIndexes + * + * FreeIndexes will contain the bits indicating the indexes free for use, + * then the code needs to update the recipe[r].used_result_idx_bits to + * indicate which indexes were selected for use by this recipe. + */ +static u16 +ice_find_free_recp_res_idx(struct ice_hw *hw, const unsigned long *profiles, + unsigned long *free_idx) +{ + DECLARE_BITMAP(possible_idx, ICE_MAX_FV_WORDS); + DECLARE_BITMAP(recipes, ICE_MAX_NUM_RECIPES); + DECLARE_BITMAP(used_idx, ICE_MAX_FV_WORDS); + u16 bit; + + bitmap_zero(possible_idx, ICE_MAX_FV_WORDS); + bitmap_zero(recipes, ICE_MAX_NUM_RECIPES); + bitmap_zero(used_idx, ICE_MAX_FV_WORDS); + bitmap_zero(free_idx, ICE_MAX_FV_WORDS); + + bitmap_set(possible_idx, 0, ICE_MAX_FV_WORDS); + + /* For each profile we are going to associate the recipe with, add the + * recipes that are associated with that profile. This will give us + * the set of recipes that our recipe may collide with. Also, determine + * what possible result indexes are usable given this set of profiles. + */ + for_each_set_bit(bit, profiles, ICE_MAX_NUM_PROFILES) { + bitmap_or(recipes, recipes, profile_to_recipe[bit], + ICE_MAX_NUM_RECIPES); + bitmap_and(possible_idx, possible_idx, + hw->switch_info->prof_res_bm[bit], + ICE_MAX_FV_WORDS); + } + + /* For each recipe that our new recipe may collide with, determine + * which indexes have been used. + */ + for_each_set_bit(bit, recipes, ICE_MAX_NUM_RECIPES) + bitmap_or(used_idx, used_idx, + hw->switch_info->recp_list[bit].res_idxs, + ICE_MAX_FV_WORDS); + + bitmap_xor(free_idx, used_idx, possible_idx, ICE_MAX_FV_WORDS); + + /* return number of free indexes */ + return (u16)bitmap_weight(free_idx, ICE_MAX_FV_WORDS); +} + +/** + * ice_add_sw_recipe - function to call AQ calls to create switch recipe + * @hw: pointer to hardware structure + * @rm: recipe management list entry + * @match_tun_mask: tunnel mask that needs to be programmed + * @profiles: bitmap of profiles that will be associated. + */ +static enum ice_status +ice_add_sw_recipe(struct ice_hw *hw, struct ice_sw_recipe *rm, + u16 match_tun_mask, unsigned long *profiles) +{ + DECLARE_BITMAP(result_idx_bm, ICE_MAX_FV_WORDS); + struct ice_aqc_recipe_data_elem *tmp; + struct ice_aqc_recipe_data_elem *buf; + struct ice_recp_grp_entry *entry; + enum ice_status status; + u16 free_res_idx; + u16 recipe_count; + u8 chain_idx; + u8 recps = 0; + + /* When more than one recipe are required, another recipe is needed to + * chain them together. Matching a tunnel metadata ID takes up one of + * the match fields in the chaining recipe reducing the number of + * chained recipes by one. + */ + /* check number of free result indices */ + bitmap_zero(result_idx_bm, ICE_MAX_FV_WORDS); + free_res_idx = ice_find_free_recp_res_idx(hw, profiles, result_idx_bm); + + ice_debug(hw, ICE_DBG_SW, "Result idx slots: %d, need %d\n", + free_res_idx, rm->n_grp_count); + + if (rm->n_grp_count > 1) { + if (rm->n_grp_count > free_res_idx) + return ICE_ERR_MAX_LIMIT; + + rm->n_grp_count++; + } + + if (rm->n_grp_count > ICE_MAX_CHAIN_RECIPE) + return ICE_ERR_MAX_LIMIT; + + tmp = kcalloc(ICE_MAX_NUM_RECIPES, sizeof(*tmp), GFP_KERNEL); + if (!tmp) + return ICE_ERR_NO_MEMORY; + + buf = devm_kcalloc(ice_hw_to_dev(hw), rm->n_grp_count, sizeof(*buf), + GFP_KERNEL); + if (!buf) { + status = ICE_ERR_NO_MEMORY; + goto err_mem; + } + + bitmap_zero(rm->r_bitmap, ICE_MAX_NUM_RECIPES); + recipe_count = ICE_MAX_NUM_RECIPES; + status = ice_aq_get_recipe(hw, tmp, &recipe_count, ICE_SW_LKUP_MAC, + NULL); + if (status || recipe_count == 0) + goto err_unroll; + + /* Allocate the recipe resources, and configure them according to the + * match fields from protocol headers and extracted field vectors. + */ + chain_idx = find_first_bit(result_idx_bm, ICE_MAX_FV_WORDS); + list_for_each_entry(entry, &rm->rg_list, l_entry) { + u8 i; + + status = ice_alloc_recipe(hw, &entry->rid); + if (status) + goto err_unroll; + + /* Clear the result index of the located recipe, as this will be + * updated, if needed, later in the recipe creation process. + */ + tmp[0].content.result_indx = 0; + + buf[recps] = tmp[0]; + buf[recps].recipe_indx = (u8)entry->rid; + /* if the recipe is a non-root recipe RID should be programmed + * as 0 for the rules to be applied correctly. + */ + buf[recps].content.rid = 0; + memset(&buf[recps].content.lkup_indx, 0, + sizeof(buf[recps].content.lkup_indx)); + + /* All recipes use look-up index 0 to match switch ID. */ + buf[recps].content.lkup_indx[0] = ICE_AQ_SW_ID_LKUP_IDX; + buf[recps].content.mask[0] = + cpu_to_le16(ICE_AQ_SW_ID_LKUP_MASK); + /* Setup lkup_indx 1..4 to INVALID/ignore and set the mask + * to be 0 + */ + for (i = 1; i <= ICE_NUM_WORDS_RECIPE; i++) { + buf[recps].content.lkup_indx[i] = 0x80; + buf[recps].content.mask[i] = 0; + } + + for (i = 0; i < entry->r_group.n_val_pairs; i++) { + buf[recps].content.lkup_indx[i + 1] = entry->fv_idx[i]; + buf[recps].content.mask[i + 1] = + cpu_to_le16(entry->fv_mask[i]); + } + + if (rm->n_grp_count > 1) { + /* Checks to see if there really is a valid result index + * that can be used. + */ + if (chain_idx >= ICE_MAX_FV_WORDS) { + ice_debug(hw, ICE_DBG_SW, "No chain index available\n"); + status = ICE_ERR_MAX_LIMIT; + goto err_unroll; + } + + entry->chain_idx = chain_idx; + buf[recps].content.result_indx = + ICE_AQ_RECIPE_RESULT_EN | + ((chain_idx << ICE_AQ_RECIPE_RESULT_DATA_S) & + ICE_AQ_RECIPE_RESULT_DATA_M); + clear_bit(chain_idx, result_idx_bm); + chain_idx = find_first_bit(result_idx_bm, + ICE_MAX_FV_WORDS); + } + + /* fill recipe dependencies */ + bitmap_zero((unsigned long *)buf[recps].recipe_bitmap, + ICE_MAX_NUM_RECIPES); + set_bit(buf[recps].recipe_indx, + (unsigned long *)buf[recps].recipe_bitmap); + buf[recps].content.act_ctrl_fwd_priority = rm->priority; + recps++; + } + + if (rm->n_grp_count == 1) { + rm->root_rid = buf[0].recipe_indx; + set_bit(buf[0].recipe_indx, rm->r_bitmap); + buf[0].content.rid = rm->root_rid | ICE_AQ_RECIPE_ID_IS_ROOT; + if (sizeof(buf[0].recipe_bitmap) >= sizeof(rm->r_bitmap)) { + memcpy(buf[0].recipe_bitmap, rm->r_bitmap, + sizeof(buf[0].recipe_bitmap)); + } else { + status = ICE_ERR_BAD_PTR; + goto err_unroll; + } + /* Applicable only for ROOT_RECIPE, set the fwd_priority for + * the recipe which is getting created if specified + * by user. Usually any advanced switch filter, which results + * into new extraction sequence, ended up creating a new recipe + * of type ROOT and usually recipes are associated with profiles + * Switch rule referreing newly created recipe, needs to have + * either/or 'fwd' or 'join' priority, otherwise switch rule + * evaluation will not happen correctly. In other words, if + * switch rule to be evaluated on priority basis, then recipe + * needs to have priority, otherwise it will be evaluated last. + */ + buf[0].content.act_ctrl_fwd_priority = rm->priority; + } else { + struct ice_recp_grp_entry *last_chain_entry; + u16 rid, i; + + /* Allocate the last recipe that will chain the outcomes of the + * other recipes together + */ + status = ice_alloc_recipe(hw, &rid); + if (status) + goto err_unroll; + + buf[recps].recipe_indx = (u8)rid; + buf[recps].content.rid = (u8)rid; + buf[recps].content.rid |= ICE_AQ_RECIPE_ID_IS_ROOT; + /* the new entry created should also be part of rg_list to + * make sure we have complete recipe + */ + last_chain_entry = devm_kzalloc(ice_hw_to_dev(hw), + sizeof(*last_chain_entry), + GFP_KERNEL); + if (!last_chain_entry) { + status = ICE_ERR_NO_MEMORY; + goto err_unroll; + } + last_chain_entry->rid = rid; + memset(&buf[recps].content.lkup_indx, 0, + sizeof(buf[recps].content.lkup_indx)); + /* All recipes use look-up index 0 to match switch ID. */ + buf[recps].content.lkup_indx[0] = ICE_AQ_SW_ID_LKUP_IDX; + buf[recps].content.mask[0] = + cpu_to_le16(ICE_AQ_SW_ID_LKUP_MASK); + for (i = 1; i <= ICE_NUM_WORDS_RECIPE; i++) { + buf[recps].content.lkup_indx[i] = + ICE_AQ_RECIPE_LKUP_IGNORE; + buf[recps].content.mask[i] = 0; + } + + i = 1; + /* update r_bitmap with the recp that is used for chaining */ + set_bit(rid, rm->r_bitmap); + /* this is the recipe that chains all the other recipes so it + * should not have a chaining ID to indicate the same + */ + last_chain_entry->chain_idx = ICE_INVAL_CHAIN_IND; + list_for_each_entry(entry, &rm->rg_list, l_entry) { + last_chain_entry->fv_idx[i] = entry->chain_idx; + buf[recps].content.lkup_indx[i] = entry->chain_idx; + buf[recps].content.mask[i++] = cpu_to_le16(0xFFFF); + set_bit(entry->rid, rm->r_bitmap); + } + list_add(&last_chain_entry->l_entry, &rm->rg_list); + if (sizeof(buf[recps].recipe_bitmap) >= + sizeof(rm->r_bitmap)) { + memcpy(buf[recps].recipe_bitmap, rm->r_bitmap, + sizeof(buf[recps].recipe_bitmap)); + } else { + status = ICE_ERR_BAD_PTR; + goto err_unroll; + } + buf[recps].content.act_ctrl_fwd_priority = rm->priority; + + /* To differentiate among different UDP tunnels, a meta data ID + * flag is used. + */ + if (match_tun_mask) { + buf[recps].content.lkup_indx[i] = ICE_TUN_FLAG_FV_IND; + buf[recps].content.mask[i] = + cpu_to_le16(match_tun_mask); + } + + recps++; + rm->root_rid = (u8)rid; + } + status = ice_acquire_change_lock(hw, ICE_RES_WRITE); + if (status) + goto err_unroll; + + status = ice_aq_add_recipe(hw, buf, rm->n_grp_count, NULL); + ice_release_change_lock(hw); + if (status) + goto err_unroll; + + /* Every recipe that just got created add it to the recipe + * book keeping list + */ + list_for_each_entry(entry, &rm->rg_list, l_entry) { + struct ice_switch_info *sw = hw->switch_info; + bool is_root, idx_found = false; + struct ice_sw_recipe *recp; + u16 idx, buf_idx = 0; + + /* find buffer index for copying some data */ + for (idx = 0; idx < rm->n_grp_count; idx++) + if (buf[idx].recipe_indx == entry->rid) { + buf_idx = idx; + idx_found = true; + } + + if (!idx_found) { + status = ICE_ERR_OUT_OF_RANGE; + goto err_unroll; + } + + recp = &sw->recp_list[entry->rid]; + is_root = (rm->root_rid == entry->rid); + recp->is_root = is_root; + + recp->root_rid = entry->rid; + recp->big_recp = (is_root && rm->n_grp_count > 1); + + memcpy(&recp->ext_words, entry->r_group.pairs, + entry->r_group.n_val_pairs * sizeof(struct ice_fv_word)); + + memcpy(recp->r_bitmap, buf[buf_idx].recipe_bitmap, + sizeof(recp->r_bitmap)); + + /* Copy non-result fv index values and masks to recipe. This + * call will also update the result recipe bitmask. + */ + ice_collect_result_idx(&buf[buf_idx], recp); + + /* for non-root recipes, also copy to the root, this allows + * easier matching of a complete chained recipe + */ + if (!is_root) + ice_collect_result_idx(&buf[buf_idx], + &sw->recp_list[rm->root_rid]); + + recp->n_ext_words = entry->r_group.n_val_pairs; + recp->chain_idx = entry->chain_idx; + recp->priority = buf[buf_idx].content.act_ctrl_fwd_priority; + recp->n_grp_count = rm->n_grp_count; + recp->recp_created = true; + } + rm->root_buf = buf; + kfree(tmp); + return status; + +err_unroll: +err_mem: + kfree(tmp); + devm_kfree(ice_hw_to_dev(hw), buf); + return status; +} + +/** + * ice_create_recipe_group - creates recipe group + * @hw: pointer to hardware structure + * @rm: recipe management list entry + * @lkup_exts: lookup elements + */ +static enum ice_status +ice_create_recipe_group(struct ice_hw *hw, struct ice_sw_recipe *rm, + struct ice_prot_lkup_ext *lkup_exts) +{ + enum ice_status status; + u8 recp_count = 0; + + rm->n_grp_count = 0; + + /* Create recipes for words that are marked not done by packing them + * as best fit. + */ + status = ice_create_first_fit_recp_def(hw, lkup_exts, + &rm->rg_list, &recp_count); + if (!status) { + rm->n_grp_count += recp_count; + rm->n_ext_words = lkup_exts->n_val_words; + memcpy(&rm->ext_words, lkup_exts->fv_words, + sizeof(rm->ext_words)); + memcpy(rm->word_masks, lkup_exts->field_mask, + sizeof(rm->word_masks)); + } + + return status; +} + +/** + * ice_get_fv - get field vectors/extraction sequences for spec. lookup types + * @hw: pointer to hardware structure + * @lkups: lookup elements or match criteria for the advanced recipe, one + * structure per protocol header + * @lkups_cnt: number of protocols + * @bm: bitmap of field vectors to consider + * @fv_list: pointer to a list that holds the returned field vectors + */ +static enum ice_status +ice_get_fv(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups, u16 lkups_cnt, + unsigned long *bm, struct list_head *fv_list) +{ + enum ice_status status; + u8 *prot_ids; + u16 i; + + prot_ids = kcalloc(lkups_cnt, sizeof(*prot_ids), GFP_KERNEL); + if (!prot_ids) + return ICE_ERR_NO_MEMORY; + + for (i = 0; i < lkups_cnt; i++) + if (!ice_prot_type_to_id(lkups[i].type, &prot_ids[i])) { + status = ICE_ERR_CFG; + goto free_mem; + } + + /* Find field vectors that include all specified protocol types */ + status = ice_get_sw_fv_list(hw, prot_ids, lkups_cnt, bm, fv_list); + +free_mem: + kfree(prot_ids); + return status; +} + +/* ice_get_compat_fv_bitmap - Get compatible field vector bitmap for rule + * @hw: pointer to hardware structure + * @rinfo: other information regarding the rule e.g. priority and action info + * @bm: pointer to memory for returning the bitmap of field vectors + */ +static void +ice_get_compat_fv_bitmap(struct ice_hw *hw, struct ice_adv_rule_info *rinfo, + unsigned long *bm) +{ + bitmap_zero(bm, ICE_MAX_NUM_PROFILES); + + ice_get_sw_fv_bitmap(hw, ICE_PROF_NON_TUN, bm); +} + +/** + * ice_add_adv_recipe - Add an advanced recipe that is not part of the default + * @hw: pointer to hardware structure + * @lkups: lookup elements or match criteria for the advanced recipe, one + * structure per protocol header + * @lkups_cnt: number of protocols + * @rinfo: other information regarding the rule e.g. priority and action info + * @rid: return the recipe ID of the recipe created + */ +static enum ice_status __maybe_unused +ice_add_adv_recipe(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups, + u16 lkups_cnt, struct ice_adv_rule_info *rinfo, u16 *rid) +{ + DECLARE_BITMAP(fv_bitmap, ICE_MAX_NUM_PROFILES); + DECLARE_BITMAP(profiles, ICE_MAX_NUM_PROFILES); + struct ice_prot_lkup_ext *lkup_exts; + struct ice_recp_grp_entry *r_entry; + struct ice_sw_fv_list_entry *fvit; + struct ice_recp_grp_entry *r_tmp; + struct ice_sw_fv_list_entry *tmp; + enum ice_status status = 0; + struct ice_sw_recipe *rm; + u16 match_tun_mask = 0; + u8 i; + + if (!lkups_cnt) + return ICE_ERR_PARAM; + + lkup_exts = kzalloc(sizeof(*lkup_exts), GFP_KERNEL); + if (!lkup_exts) + return ICE_ERR_NO_MEMORY; + + /* Determine the number of words to be matched and if it exceeds a + * recipe's restrictions + */ + for (i = 0; i < lkups_cnt; i++) { + u16 count; + + if (lkups[i].type >= ICE_PROTOCOL_LAST) { + status = ICE_ERR_CFG; + goto err_free_lkup_exts; + } + + count = ice_fill_valid_words(&lkups[i], lkup_exts); + if (!count) { + status = ICE_ERR_CFG; + goto err_free_lkup_exts; + } + } + + rm = kzalloc(sizeof(*rm), GFP_KERNEL); + if (!rm) { + status = ICE_ERR_NO_MEMORY; + goto err_free_lkup_exts; + } + + /* Get field vectors that contain fields extracted from all the protocol + * headers being programmed. + */ + INIT_LIST_HEAD(&rm->fv_list); + INIT_LIST_HEAD(&rm->rg_list); + + /* Get bitmap of field vectors (profiles) that are compatible with the + * rule request; only these will be searched in the subsequent call to + * ice_get_fv. + */ + ice_get_compat_fv_bitmap(hw, rinfo, fv_bitmap); + + status = ice_get_fv(hw, lkups, lkups_cnt, fv_bitmap, &rm->fv_list); + if (status) + goto err_unroll; + + /* Group match words into recipes using preferred recipe grouping + * criteria. + */ + status = ice_create_recipe_group(hw, rm, lkup_exts); + if (status) + goto err_unroll; + + /* set the recipe priority if specified */ + rm->priority = (u8)rinfo->priority; + + /* Find offsets from the field vector. Pick the first one for all the + * recipes. + */ + status = ice_fill_fv_word_index(hw, &rm->fv_list, &rm->rg_list); + if (status) + goto err_unroll; + + /* get bitmap of all profiles the recipe will be associated with */ + bitmap_zero(profiles, ICE_MAX_NUM_PROFILES); + list_for_each_entry(fvit, &rm->fv_list, list_entry) { + ice_debug(hw, ICE_DBG_SW, "profile: %d\n", fvit->profile_id); + set_bit((u16)fvit->profile_id, profiles); + } + + /* Look for a recipe which matches our requested fv / mask list */ + *rid = ice_find_recp(hw, lkup_exts); + if (*rid < ICE_MAX_NUM_RECIPES) + /* Success if found a recipe that match the existing criteria */ + goto err_unroll; + + /* Recipe we need does not exist, add a recipe */ + status = ice_add_sw_recipe(hw, rm, match_tun_mask, profiles); + if (status) + goto err_unroll; + + /* Associate all the recipes created with all the profiles in the + * common field vector. + */ + list_for_each_entry(fvit, &rm->fv_list, list_entry) { + DECLARE_BITMAP(r_bitmap, ICE_MAX_NUM_RECIPES); + u16 j; + + status = ice_aq_get_recipe_to_profile(hw, fvit->profile_id, + (u8 *)r_bitmap, NULL); + if (status) + goto err_unroll; + + bitmap_or(r_bitmap, r_bitmap, rm->r_bitmap, + ICE_MAX_NUM_RECIPES); + status = ice_acquire_change_lock(hw, ICE_RES_WRITE); + if (status) + goto err_unroll; + + status = ice_aq_map_recipe_to_profile(hw, fvit->profile_id, + (u8 *)r_bitmap, + NULL); + ice_release_change_lock(hw); + + if (status) + goto err_unroll; + + /* Update profile to recipe bitmap array */ + bitmap_copy(profile_to_recipe[fvit->profile_id], r_bitmap, + ICE_MAX_NUM_RECIPES); + + /* Update recipe to profile bitmap array */ + for_each_set_bit(j, rm->r_bitmap, ICE_MAX_NUM_RECIPES) + set_bit((u16)fvit->profile_id, recipe_to_profile[j]); + } + + *rid = rm->root_rid; + memcpy(&hw->switch_info->recp_list[*rid].lkup_exts, lkup_exts, + sizeof(*lkup_exts)); +err_unroll: + list_for_each_entry_safe(r_entry, r_tmp, &rm->rg_list, l_entry) { + list_del(&r_entry->l_entry); + devm_kfree(ice_hw_to_dev(hw), r_entry); + } + + list_for_each_entry_safe(fvit, tmp, &rm->fv_list, list_entry) { + list_del(&fvit->list_entry); + devm_kfree(ice_hw_to_dev(hw), fvit); + } + + if (rm->root_buf) + devm_kfree(ice_hw_to_dev(hw), rm->root_buf); + + kfree(rm); + +err_free_lkup_exts: + kfree(lkup_exts); + + return status; +} + /** * ice_replay_vsi_fltr - Replay filters for requested VSI * @hw: pointer to the hardware structure diff --git a/drivers/net/ethernet/intel/ice/ice_switch.h b/drivers/net/ethernet/intel/ice/ice_switch.h index e6eeffb3dde9..6fd4537106ed 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.h +++ b/drivers/net/ethernet/intel/ice/ice_switch.h @@ -125,30 +125,110 @@ struct ice_fltr_info { u8 lan_en; /* Indicate if packet can be forwarded to the uplink */ }; +struct ice_adv_lkup_elem { + enum ice_protocol_type type; + union ice_prot_hdr h_u; /* Header values */ + union ice_prot_hdr m_u; /* Mask of header values to match */ +}; + +struct ice_sw_act_ctrl { + /* Source VSI for LOOKUP_TX or source port for LOOKUP_RX */ + u16 src; + u16 flag; + enum ice_sw_fwd_act_type fltr_act; + /* Depending on filter action */ + union { + /* This is a queue ID in case of ICE_FWD_TO_Q and starting + * queue ID in case of ICE_FWD_TO_QGRP. + */ + u16 q_id:11; + u16 vsi_id:10; + u16 hw_vsi_id:10; + u16 vsi_list_id:10; + } fwd_id; + /* software VSI handle */ + u16 vsi_handle; + u8 qgrp_size; +}; + +struct ice_rule_query_data { + /* Recipe ID for which the requested rule was added */ + u16 rid; + /* Rule ID that was added or is supposed to be removed */ + u16 rule_id; + /* vsi_handle for which Rule was added or is supposed to be removed */ + u16 vsi_handle; +}; + +struct ice_adv_rule_info { + struct ice_sw_act_ctrl sw_act; + u32 priority; + u8 rx; /* true means LOOKUP_RX otherwise LOOKUP_TX */ + u16 fltr_rule_id; +}; + +/* A collection of one or more four word recipe */ struct ice_sw_recipe { - struct list_head l_entry; - - /* To protect modification of filt_rule list - * defined below + /* For a chained recipe the root recipe is what should be used for + * programming rules */ - struct mutex filt_rule_lock; + u8 is_root; + u8 root_rid; + u8 recp_created; - /* List of type ice_fltr_mgmt_list_entry */ + /* Number of extraction words */ + u8 n_ext_words; + /* Protocol ID and Offset pair (extraction word) to describe the + * recipe + */ + struct ice_fv_word ext_words[ICE_MAX_CHAIN_WORDS]; + u16 word_masks[ICE_MAX_CHAIN_WORDS]; + + /* if this recipe is a collection of other recipe */ + u8 big_recp; + + /* if this recipe is part of another bigger recipe then chain index + * corresponding to this recipe + */ + u8 chain_idx; + + /* if this recipe is a collection of other recipe then count of other + * recipes and recipe IDs of those recipes + */ + u8 n_grp_count; + + /* Bit map specifying the IDs associated with this group of recipe */ + DECLARE_BITMAP(r_bitmap, ICE_MAX_NUM_RECIPES); + + /* List of type ice_fltr_mgmt_list_entry or adv_rule */ + u8 adv_rule; struct list_head filt_rules; struct list_head filt_replay_rules; - /* linked list of type recipe_list_entry */ - struct list_head rg_list; - /* linked list of type ice_sw_fv_list_entry*/ - struct list_head fv_list; - struct ice_aqc_recipe_data_elem *r_buf; - u8 recp_count; - u8 root_rid; - u8 num_profs; - u8 *prof_ids; + struct mutex filt_rule_lock; /* protect filter rule structure */ - /* recipe bitmap: what all recipes makes this recipe */ - DECLARE_BITMAP(r_bitmap, ICE_MAX_NUM_RECIPES); + /* Profiles this recipe should be associated with */ + struct list_head fv_list; + + /* Profiles this recipe is associated with */ + u8 num_profs, *prof_ids; + + /* Bit map for possible result indexes */ + DECLARE_BITMAP(res_idxs, ICE_MAX_FV_WORDS); + + /* This allows user to specify the recipe priority. + * For now, this becomes 'fwd_priority' when recipe + * is created, usually recipes can have 'fwd' and 'join' + * priority. + */ + u8 priority; + + struct list_head rg_list; + + /* AQ buffer associated with this recipe */ + struct ice_aqc_recipe_data_elem *root_buf; + /* This struct saves the fv_words for a given lookup */ + struct ice_prot_lkup_ext lkup_exts; }; /* Bookkeeping structure to hold bitmap of VSIs corresponding to VSI list ID */ From 0f94570d0cae1ab7c322d764c3514d3558137eea Mon Sep 17 00:00:00 2001 From: Grishma Kotecha Date: Fri, 6 Aug 2021 10:49:01 +0200 Subject: [PATCH 085/440] ice: allow adding advanced rules Define dummy packet headers to allow adding advanced rules in HW. This header is used as admin queue command parameter for adding a rule. The firmware will extract correct fields and will use them in look ups. Define each supported packets header and offsets to words used in recipe. Supported headers: - MAC + IPv4 + UDP - MAC + VLAN + IPv4 + UDP - MAC + IPv4 + TCP - MAC + VLAN + IPv4 + TCP - MAC + IPv6 + UDP - MAC + VLAN + IPv6 + UDP - MAC + IPv6 + TCP - MAC + VLAN + IPv6 + TCP Add code for creating an advanced rule. Rule needs to match defined dummy packet, if not return error, which means that this type of rule isn't currently supported. The first step in adding advanced rule is searching for an advanced recipe matching this kind of rule. If it doesn't exist new recipe is created. Dummy packet has to be filled with the correct header field value from the rule definition. It will be used to do look up in HW. Support searching for existing advance rule entry. It is used in case of adding the same rule on different VSI. In this case, instead of creating new rule, the existing one should be updated with refreshed VSI list. Add initialization for prof_res_bm_init flag to zero so that the possible resource for fv in the files can be initialized. Co-developed-by: Dan Nowlin Signed-off-by: Dan Nowlin Signed-off-by: Grishma Kotecha Signed-off-by: Wojciech Drewek Tested-by: Sandeep Penigalapati Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_common.c | 1 + drivers/net/ethernet/intel/ice/ice_switch.c | 832 +++++++++++++++++++- drivers/net/ethernet/intel/ice/ice_switch.h | 14 + drivers/net/ethernet/intel/ice/ice_type.h | 1 + 4 files changed, 847 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 2fb81e359cdf..bff4c09dfa5f 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -568,6 +568,7 @@ static enum ice_status ice_init_fltr_mgmt_struct(struct ice_hw *hw) return ICE_ERR_NO_MEMORY; INIT_LIST_HEAD(&sw->vsi_list_map_head); + sw->prof_res_bm_init = 0; status = ice_init_def_sw_recp(hw); if (status) { diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c index eb7cd1a955f1..4042d91de06c 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.c +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@ -8,6 +8,7 @@ #define ICE_ETH_ETHTYPE_OFFSET 12 #define ICE_ETH_VLAN_TCI_OFFSET 14 #define ICE_MAX_VLAN_ID 0xFFF +#define ICE_IPV6_ETHER_ID 0x86DD /* Dummy ethernet header needed in the ice_aqc_sw_rules_elem * struct to configure any switch filter rules. @@ -29,6 +30,290 @@ static const u8 dummy_eth_header[DUMMY_ETH_HDR_LEN] = { 0x2, 0, 0, 0, 0, 0, 0x2, 0, 0, 0, 0, 0, 0x81, 0, 0, 0}; +struct ice_dummy_pkt_offsets { + enum ice_protocol_type type; + u16 offset; /* ICE_PROTOCOL_LAST indicates end of list */ +}; + +/* offset info for MAC + IPv4 + UDP dummy packet */ +static const struct ice_dummy_pkt_offsets dummy_udp_packet_offsets[] = { + { ICE_MAC_OFOS, 0 }, + { ICE_ETYPE_OL, 12 }, + { ICE_IPV4_OFOS, 14 }, + { ICE_UDP_ILOS, 34 }, + { ICE_PROTOCOL_LAST, 0 }, +}; + +/* Dummy packet for MAC + IPv4 + UDP */ +static const u8 dummy_udp_packet[] = { + 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x08, 0x00, /* ICE_ETYPE_OL 12 */ + + 0x45, 0x00, 0x00, 0x1c, /* ICE_IPV4_OFOS 14 */ + 0x00, 0x01, 0x00, 0x00, + 0x00, 0x11, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x00, 0x00, 0x00, 0x00, /* ICE_UDP_ILOS 34 */ + 0x00, 0x08, 0x00, 0x00, + + 0x00, 0x00, /* 2 bytes for 4 byte alignment */ +}; + +/* offset info for MAC + VLAN + IPv4 + UDP dummy packet */ +static const struct ice_dummy_pkt_offsets dummy_vlan_udp_packet_offsets[] = { + { ICE_MAC_OFOS, 0 }, + { ICE_VLAN_OFOS, 12 }, + { ICE_ETYPE_OL, 16 }, + { ICE_IPV4_OFOS, 18 }, + { ICE_UDP_ILOS, 38 }, + { ICE_PROTOCOL_LAST, 0 }, +}; + +/* C-tag (801.1Q), IPv4:UDP dummy packet */ +static const u8 dummy_vlan_udp_packet[] = { + 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x81, 0x00, 0x00, 0x00, /* ICE_VLAN_OFOS 12 */ + + 0x08, 0x00, /* ICE_ETYPE_OL 16 */ + + 0x45, 0x00, 0x00, 0x1c, /* ICE_IPV4_OFOS 18 */ + 0x00, 0x01, 0x00, 0x00, + 0x00, 0x11, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x00, 0x00, 0x00, 0x00, /* ICE_UDP_ILOS 38 */ + 0x00, 0x08, 0x00, 0x00, + + 0x00, 0x00, /* 2 bytes for 4 byte alignment */ +}; + +/* offset info for MAC + IPv4 + TCP dummy packet */ +static const struct ice_dummy_pkt_offsets dummy_tcp_packet_offsets[] = { + { ICE_MAC_OFOS, 0 }, + { ICE_ETYPE_OL, 12 }, + { ICE_IPV4_OFOS, 14 }, + { ICE_TCP_IL, 34 }, + { ICE_PROTOCOL_LAST, 0 }, +}; + +/* Dummy packet for MAC + IPv4 + TCP */ +static const u8 dummy_tcp_packet[] = { + 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x08, 0x00, /* ICE_ETYPE_OL 12 */ + + 0x45, 0x00, 0x00, 0x28, /* ICE_IPV4_OFOS 14 */ + 0x00, 0x01, 0x00, 0x00, + 0x00, 0x06, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x00, 0x00, 0x00, 0x00, /* ICE_TCP_IL 34 */ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x50, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x00, 0x00, /* 2 bytes for 4 byte alignment */ +}; + +/* offset info for MAC + VLAN (C-tag, 802.1Q) + IPv4 + TCP dummy packet */ +static const struct ice_dummy_pkt_offsets dummy_vlan_tcp_packet_offsets[] = { + { ICE_MAC_OFOS, 0 }, + { ICE_VLAN_OFOS, 12 }, + { ICE_ETYPE_OL, 16 }, + { ICE_IPV4_OFOS, 18 }, + { ICE_TCP_IL, 38 }, + { ICE_PROTOCOL_LAST, 0 }, +}; + +/* C-tag (801.1Q), IPv4:TCP dummy packet */ +static const u8 dummy_vlan_tcp_packet[] = { + 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x81, 0x00, 0x00, 0x00, /* ICE_VLAN_OFOS 12 */ + + 0x08, 0x00, /* ICE_ETYPE_OL 16 */ + + 0x45, 0x00, 0x00, 0x28, /* ICE_IPV4_OFOS 18 */ + 0x00, 0x01, 0x00, 0x00, + 0x00, 0x06, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x00, 0x00, 0x00, 0x00, /* ICE_TCP_IL 38 */ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x50, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x00, 0x00, /* 2 bytes for 4 byte alignment */ +}; + +static const struct ice_dummy_pkt_offsets dummy_tcp_ipv6_packet_offsets[] = { + { ICE_MAC_OFOS, 0 }, + { ICE_ETYPE_OL, 12 }, + { ICE_IPV6_OFOS, 14 }, + { ICE_TCP_IL, 54 }, + { ICE_PROTOCOL_LAST, 0 }, +}; + +static const u8 dummy_tcp_ipv6_packet[] = { + 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x86, 0xDD, /* ICE_ETYPE_OL 12 */ + + 0x60, 0x00, 0x00, 0x00, /* ICE_IPV6_OFOS 40 */ + 0x00, 0x14, 0x06, 0x00, /* Next header is TCP */ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x00, 0x00, 0x00, 0x00, /* ICE_TCP_IL 54 */ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x50, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x00, 0x00, /* 2 bytes for 4 byte alignment */ +}; + +/* C-tag (802.1Q): IPv6 + TCP */ +static const struct ice_dummy_pkt_offsets +dummy_vlan_tcp_ipv6_packet_offsets[] = { + { ICE_MAC_OFOS, 0 }, + { ICE_VLAN_OFOS, 12 }, + { ICE_ETYPE_OL, 16 }, + { ICE_IPV6_OFOS, 18 }, + { ICE_TCP_IL, 58 }, + { ICE_PROTOCOL_LAST, 0 }, +}; + +/* C-tag (802.1Q), IPv6 + TCP dummy packet */ +static const u8 dummy_vlan_tcp_ipv6_packet[] = { + 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x81, 0x00, 0x00, 0x00, /* ICE_VLAN_OFOS 12 */ + + 0x86, 0xDD, /* ICE_ETYPE_OL 16 */ + + 0x60, 0x00, 0x00, 0x00, /* ICE_IPV6_OFOS 18 */ + 0x00, 0x14, 0x06, 0x00, /* Next header is TCP */ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x00, 0x00, 0x00, 0x00, /* ICE_TCP_IL 58 */ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x50, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x00, 0x00, /* 2 bytes for 4 byte alignment */ +}; + +/* IPv6 + UDP */ +static const struct ice_dummy_pkt_offsets dummy_udp_ipv6_packet_offsets[] = { + { ICE_MAC_OFOS, 0 }, + { ICE_ETYPE_OL, 12 }, + { ICE_IPV6_OFOS, 14 }, + { ICE_UDP_ILOS, 54 }, + { ICE_PROTOCOL_LAST, 0 }, +}; + +/* IPv6 + UDP dummy packet */ +static const u8 dummy_udp_ipv6_packet[] = { + 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x86, 0xDD, /* ICE_ETYPE_OL 12 */ + + 0x60, 0x00, 0x00, 0x00, /* ICE_IPV6_OFOS 40 */ + 0x00, 0x10, 0x11, 0x00, /* Next header UDP */ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x00, 0x00, 0x00, 0x00, /* ICE_UDP_ILOS 54 */ + 0x00, 0x10, 0x00, 0x00, + + 0x00, 0x00, 0x00, 0x00, /* needed for ESP packets */ + 0x00, 0x00, 0x00, 0x00, + + 0x00, 0x00, /* 2 bytes for 4 byte alignment */ +}; + +/* C-tag (802.1Q): IPv6 + UDP */ +static const struct ice_dummy_pkt_offsets +dummy_vlan_udp_ipv6_packet_offsets[] = { + { ICE_MAC_OFOS, 0 }, + { ICE_VLAN_OFOS, 12 }, + { ICE_ETYPE_OL, 16 }, + { ICE_IPV6_OFOS, 18 }, + { ICE_UDP_ILOS, 58 }, + { ICE_PROTOCOL_LAST, 0 }, +}; + +/* C-tag (802.1Q), IPv6 + UDP dummy packet */ +static const u8 dummy_vlan_udp_ipv6_packet[] = { + 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x81, 0x00, 0x00, 0x00,/* ICE_VLAN_OFOS 12 */ + + 0x86, 0xDD, /* ICE_ETYPE_OL 16 */ + + 0x60, 0x00, 0x00, 0x00, /* ICE_IPV6_OFOS 18 */ + 0x00, 0x08, 0x11, 0x00, /* Next header UDP */ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x00, 0x00, 0x00, 0x00, /* ICE_UDP_ILOS 58 */ + 0x00, 0x08, 0x00, 0x00, + + 0x00, 0x00, /* 2 bytes for 4 byte alignment */ +}; + #define ICE_SW_RULE_RX_TX_ETH_HDR_SIZE \ (offsetof(struct ice_aqc_sw_rules_elem, pdata.lkup_tx_rx.hdr) + \ (DUMMY_ETH_HDR_LEN * \ @@ -3877,7 +4162,7 @@ ice_get_compat_fv_bitmap(struct ice_hw *hw, struct ice_adv_rule_info *rinfo, * @rinfo: other information regarding the rule e.g. priority and action info * @rid: return the recipe ID of the recipe created */ -static enum ice_status __maybe_unused +static enum ice_status ice_add_adv_recipe(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups, u16 lkups_cnt, struct ice_adv_rule_info *rinfo, u16 *rid) { @@ -4035,6 +4320,551 @@ err_free_lkup_exts: return status; } +/** + * ice_find_dummy_packet - find dummy packet + * + * @lkups: lookup elements or match criteria for the advanced recipe, one + * structure per protocol header + * @lkups_cnt: number of protocols + * @pkt: dummy packet to fill according to filter match criteria + * @pkt_len: packet length of dummy packet + * @offsets: pointer to receive the pointer to the offsets for the packet + */ +static void +ice_find_dummy_packet(struct ice_adv_lkup_elem *lkups, u16 lkups_cnt, + const u8 **pkt, u16 *pkt_len, + const struct ice_dummy_pkt_offsets **offsets) +{ + bool tcp = false, udp = false, ipv6 = false, vlan = false; + u16 i; + + for (i = 0; i < lkups_cnt; i++) { + if (lkups[i].type == ICE_UDP_ILOS) + udp = true; + else if (lkups[i].type == ICE_TCP_IL) + tcp = true; + else if (lkups[i].type == ICE_IPV6_OFOS) + ipv6 = true; + else if (lkups[i].type == ICE_VLAN_OFOS) + vlan = true; + else if (lkups[i].type == ICE_ETYPE_OL && + lkups[i].h_u.ethertype.ethtype_id == + cpu_to_be16(ICE_IPV6_ETHER_ID) && + lkups[i].m_u.ethertype.ethtype_id == + cpu_to_be16(0xFFFF)) + ipv6 = true; + } + + if (udp && !ipv6) { + if (vlan) { + *pkt = dummy_vlan_udp_packet; + *pkt_len = sizeof(dummy_vlan_udp_packet); + *offsets = dummy_vlan_udp_packet_offsets; + return; + } + *pkt = dummy_udp_packet; + *pkt_len = sizeof(dummy_udp_packet); + *offsets = dummy_udp_packet_offsets; + return; + } else if (udp && ipv6) { + if (vlan) { + *pkt = dummy_vlan_udp_ipv6_packet; + *pkt_len = sizeof(dummy_vlan_udp_ipv6_packet); + *offsets = dummy_vlan_udp_ipv6_packet_offsets; + return; + } + *pkt = dummy_udp_ipv6_packet; + *pkt_len = sizeof(dummy_udp_ipv6_packet); + *offsets = dummy_udp_ipv6_packet_offsets; + return; + } else if ((tcp && ipv6) || ipv6) { + if (vlan) { + *pkt = dummy_vlan_tcp_ipv6_packet; + *pkt_len = sizeof(dummy_vlan_tcp_ipv6_packet); + *offsets = dummy_vlan_tcp_ipv6_packet_offsets; + return; + } + *pkt = dummy_tcp_ipv6_packet; + *pkt_len = sizeof(dummy_tcp_ipv6_packet); + *offsets = dummy_tcp_ipv6_packet_offsets; + return; + } + + if (vlan) { + *pkt = dummy_vlan_tcp_packet; + *pkt_len = sizeof(dummy_vlan_tcp_packet); + *offsets = dummy_vlan_tcp_packet_offsets; + } else { + *pkt = dummy_tcp_packet; + *pkt_len = sizeof(dummy_tcp_packet); + *offsets = dummy_tcp_packet_offsets; + } +} + +/** + * ice_fill_adv_dummy_packet - fill a dummy packet with given match criteria + * + * @lkups: lookup elements or match criteria for the advanced recipe, one + * structure per protocol header + * @lkups_cnt: number of protocols + * @s_rule: stores rule information from the match criteria + * @dummy_pkt: dummy packet to fill according to filter match criteria + * @pkt_len: packet length of dummy packet + * @offsets: offset info for the dummy packet + */ +static enum ice_status +ice_fill_adv_dummy_packet(struct ice_adv_lkup_elem *lkups, u16 lkups_cnt, + struct ice_aqc_sw_rules_elem *s_rule, + const u8 *dummy_pkt, u16 pkt_len, + const struct ice_dummy_pkt_offsets *offsets) +{ + u8 *pkt; + u16 i; + + /* Start with a packet with a pre-defined/dummy content. Then, fill + * in the header values to be looked up or matched. + */ + pkt = s_rule->pdata.lkup_tx_rx.hdr; + + memcpy(pkt, dummy_pkt, pkt_len); + + for (i = 0; i < lkups_cnt; i++) { + enum ice_protocol_type type; + u16 offset = 0, len = 0, j; + bool found = false; + + /* find the start of this layer; it should be found since this + * was already checked when search for the dummy packet + */ + type = lkups[i].type; + for (j = 0; offsets[j].type != ICE_PROTOCOL_LAST; j++) { + if (type == offsets[j].type) { + offset = offsets[j].offset; + found = true; + break; + } + } + /* this should never happen in a correct calling sequence */ + if (!found) + return ICE_ERR_PARAM; + + switch (lkups[i].type) { + case ICE_MAC_OFOS: + case ICE_MAC_IL: + len = sizeof(struct ice_ether_hdr); + break; + case ICE_ETYPE_OL: + len = sizeof(struct ice_ethtype_hdr); + break; + case ICE_VLAN_OFOS: + len = sizeof(struct ice_vlan_hdr); + break; + case ICE_IPV4_OFOS: + case ICE_IPV4_IL: + len = sizeof(struct ice_ipv4_hdr); + break; + case ICE_IPV6_OFOS: + case ICE_IPV6_IL: + len = sizeof(struct ice_ipv6_hdr); + break; + case ICE_TCP_IL: + case ICE_UDP_OF: + case ICE_UDP_ILOS: + len = sizeof(struct ice_l4_hdr); + break; + case ICE_SCTP_IL: + len = sizeof(struct ice_sctp_hdr); + break; + default: + return ICE_ERR_PARAM; + } + + /* the length should be a word multiple */ + if (len % ICE_BYTES_PER_WORD) + return ICE_ERR_CFG; + + /* We have the offset to the header start, the length, the + * caller's header values and mask. Use this information to + * copy the data into the dummy packet appropriately based on + * the mask. Note that we need to only write the bits as + * indicated by the mask to make sure we don't improperly write + * over any significant packet data. + */ + for (j = 0; j < len / sizeof(u16); j++) + if (((u16 *)&lkups[i].m_u)[j]) + ((u16 *)(pkt + offset))[j] = + (((u16 *)(pkt + offset))[j] & + ~((u16 *)&lkups[i].m_u)[j]) | + (((u16 *)&lkups[i].h_u)[j] & + ((u16 *)&lkups[i].m_u)[j]); + } + + s_rule->pdata.lkup_tx_rx.hdr_len = cpu_to_le16(pkt_len); + + return 0; +} + +/** + * ice_find_adv_rule_entry - Search a rule entry + * @hw: pointer to the hardware structure + * @lkups: lookup elements or match criteria for the advanced recipe, one + * structure per protocol header + * @lkups_cnt: number of protocols + * @recp_id: recipe ID for which we are finding the rule + * @rinfo: other information regarding the rule e.g. priority and action info + * + * Helper function to search for a given advance rule entry + * Returns pointer to entry storing the rule if found + */ +static struct ice_adv_fltr_mgmt_list_entry * +ice_find_adv_rule_entry(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups, + u16 lkups_cnt, u16 recp_id, + struct ice_adv_rule_info *rinfo) +{ + struct ice_adv_fltr_mgmt_list_entry *list_itr; + struct ice_switch_info *sw = hw->switch_info; + int i; + + list_for_each_entry(list_itr, &sw->recp_list[recp_id].filt_rules, + list_entry) { + bool lkups_matched = true; + + if (lkups_cnt != list_itr->lkups_cnt) + continue; + for (i = 0; i < list_itr->lkups_cnt; i++) + if (memcmp(&list_itr->lkups[i], &lkups[i], + sizeof(*lkups))) { + lkups_matched = false; + break; + } + if (rinfo->sw_act.flag == list_itr->rule_info.sw_act.flag && + lkups_matched) + return list_itr; + } + return NULL; +} + +/** + * ice_adv_add_update_vsi_list + * @hw: pointer to the hardware structure + * @m_entry: pointer to current adv filter management list entry + * @cur_fltr: filter information from the book keeping entry + * @new_fltr: filter information with the new VSI to be added + * + * Call AQ command to add or update previously created VSI list with new VSI. + * + * Helper function to do book keeping associated with adding filter information + * The algorithm to do the booking keeping is described below : + * When a VSI needs to subscribe to a given advanced filter + * if only one VSI has been added till now + * Allocate a new VSI list and add two VSIs + * to this list using switch rule command + * Update the previously created switch rule with the + * newly created VSI list ID + * if a VSI list was previously created + * Add the new VSI to the previously created VSI list set + * using the update switch rule command + */ +static enum ice_status +ice_adv_add_update_vsi_list(struct ice_hw *hw, + struct ice_adv_fltr_mgmt_list_entry *m_entry, + struct ice_adv_rule_info *cur_fltr, + struct ice_adv_rule_info *new_fltr) +{ + enum ice_status status; + u16 vsi_list_id = 0; + + if (cur_fltr->sw_act.fltr_act == ICE_FWD_TO_Q || + cur_fltr->sw_act.fltr_act == ICE_FWD_TO_QGRP || + cur_fltr->sw_act.fltr_act == ICE_DROP_PACKET) + return ICE_ERR_NOT_IMPL; + + if ((new_fltr->sw_act.fltr_act == ICE_FWD_TO_Q || + new_fltr->sw_act.fltr_act == ICE_FWD_TO_QGRP) && + (cur_fltr->sw_act.fltr_act == ICE_FWD_TO_VSI || + cur_fltr->sw_act.fltr_act == ICE_FWD_TO_VSI_LIST)) + return ICE_ERR_NOT_IMPL; + + if (m_entry->vsi_count < 2 && !m_entry->vsi_list_info) { + /* Only one entry existed in the mapping and it was not already + * a part of a VSI list. So, create a VSI list with the old and + * new VSIs. + */ + struct ice_fltr_info tmp_fltr; + u16 vsi_handle_arr[2]; + + /* A rule already exists with the new VSI being added */ + if (cur_fltr->sw_act.fwd_id.hw_vsi_id == + new_fltr->sw_act.fwd_id.hw_vsi_id) + return ICE_ERR_ALREADY_EXISTS; + + vsi_handle_arr[0] = cur_fltr->sw_act.vsi_handle; + vsi_handle_arr[1] = new_fltr->sw_act.vsi_handle; + status = ice_create_vsi_list_rule(hw, &vsi_handle_arr[0], 2, + &vsi_list_id, + ICE_SW_LKUP_LAST); + if (status) + return status; + + memset(&tmp_fltr, 0, sizeof(tmp_fltr)); + tmp_fltr.flag = m_entry->rule_info.sw_act.flag; + tmp_fltr.fltr_rule_id = cur_fltr->fltr_rule_id; + tmp_fltr.fltr_act = ICE_FWD_TO_VSI_LIST; + tmp_fltr.fwd_id.vsi_list_id = vsi_list_id; + tmp_fltr.lkup_type = ICE_SW_LKUP_LAST; + + /* Update the previous switch rule of "forward to VSI" to + * "fwd to VSI list" + */ + status = ice_update_pkt_fwd_rule(hw, &tmp_fltr); + if (status) + return status; + + cur_fltr->sw_act.fwd_id.vsi_list_id = vsi_list_id; + cur_fltr->sw_act.fltr_act = ICE_FWD_TO_VSI_LIST; + m_entry->vsi_list_info = + ice_create_vsi_list_map(hw, &vsi_handle_arr[0], 2, + vsi_list_id); + } else { + u16 vsi_handle = new_fltr->sw_act.vsi_handle; + + if (!m_entry->vsi_list_info) + return ICE_ERR_CFG; + + /* A rule already exists with the new VSI being added */ + if (test_bit(vsi_handle, m_entry->vsi_list_info->vsi_map)) + return 0; + + /* Update the previously created VSI list set with + * the new VSI ID passed in + */ + vsi_list_id = cur_fltr->sw_act.fwd_id.vsi_list_id; + + status = ice_update_vsi_list_rule(hw, &vsi_handle, 1, + vsi_list_id, false, + ice_aqc_opc_update_sw_rules, + ICE_SW_LKUP_LAST); + /* update VSI list mapping info with new VSI ID */ + if (!status) + set_bit(vsi_handle, m_entry->vsi_list_info->vsi_map); + } + if (!status) + m_entry->vsi_count++; + return status; +} + +/** + * ice_add_adv_rule - helper function to create an advanced switch rule + * @hw: pointer to the hardware structure + * @lkups: information on the words that needs to be looked up. All words + * together makes one recipe + * @lkups_cnt: num of entries in the lkups array + * @rinfo: other information related to the rule that needs to be programmed + * @added_entry: this will return recipe_id, rule_id and vsi_handle. should be + * ignored is case of error. + * + * This function can program only 1 rule at a time. The lkups is used to + * describe the all the words that forms the "lookup" portion of the recipe. + * These words can span multiple protocols. Callers to this function need to + * pass in a list of protocol headers with lookup information along and mask + * that determines which words are valid from the given protocol header. + * rinfo describes other information related to this rule such as forwarding + * IDs, priority of this rule, etc. + */ +enum ice_status +ice_add_adv_rule(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups, + u16 lkups_cnt, struct ice_adv_rule_info *rinfo, + struct ice_rule_query_data *added_entry) +{ + struct ice_adv_fltr_mgmt_list_entry *m_entry, *adv_fltr = NULL; + u16 rid = 0, i, pkt_len, rule_buf_sz, vsi_handle; + const struct ice_dummy_pkt_offsets *pkt_offsets; + struct ice_aqc_sw_rules_elem *s_rule = NULL; + struct list_head *rule_head; + struct ice_switch_info *sw; + enum ice_status status; + const u8 *pkt = NULL; + u16 word_cnt; + u32 act = 0; + u8 q_rgn; + + /* Initialize profile to result index bitmap */ + if (!hw->switch_info->prof_res_bm_init) { + hw->switch_info->prof_res_bm_init = 1; + ice_init_prof_result_bm(hw); + } + + if (!lkups_cnt) + return ICE_ERR_PARAM; + + /* get # of words we need to match */ + word_cnt = 0; + for (i = 0; i < lkups_cnt; i++) { + u16 j, *ptr; + + ptr = (u16 *)&lkups[i].m_u; + for (j = 0; j < sizeof(lkups->m_u) / sizeof(u16); j++) + if (ptr[j] != 0) + word_cnt++; + } + + if (!word_cnt || word_cnt > ICE_MAX_CHAIN_WORDS) + return ICE_ERR_PARAM; + + /* make sure that we can locate a dummy packet */ + ice_find_dummy_packet(lkups, lkups_cnt, &pkt, &pkt_len, + &pkt_offsets); + if (!pkt) { + status = ICE_ERR_PARAM; + goto err_ice_add_adv_rule; + } + + if (!(rinfo->sw_act.fltr_act == ICE_FWD_TO_VSI || + rinfo->sw_act.fltr_act == ICE_FWD_TO_Q || + rinfo->sw_act.fltr_act == ICE_FWD_TO_QGRP || + rinfo->sw_act.fltr_act == ICE_DROP_PACKET)) + return ICE_ERR_CFG; + + vsi_handle = rinfo->sw_act.vsi_handle; + if (!ice_is_vsi_valid(hw, vsi_handle)) + return ICE_ERR_PARAM; + + if (rinfo->sw_act.fltr_act == ICE_FWD_TO_VSI) + rinfo->sw_act.fwd_id.hw_vsi_id = + ice_get_hw_vsi_num(hw, vsi_handle); + if (rinfo->sw_act.flag & ICE_FLTR_TX) + rinfo->sw_act.src = ice_get_hw_vsi_num(hw, vsi_handle); + + status = ice_add_adv_recipe(hw, lkups, lkups_cnt, rinfo, &rid); + if (status) + return status; + m_entry = ice_find_adv_rule_entry(hw, lkups, lkups_cnt, rid, rinfo); + if (m_entry) { + /* we have to add VSI to VSI_LIST and increment vsi_count. + * Also Update VSI list so that we can change forwarding rule + * if the rule already exists, we will check if it exists with + * same vsi_id, if not then add it to the VSI list if it already + * exists if not then create a VSI list and add the existing VSI + * ID and the new VSI ID to the list + * We will add that VSI to the list + */ + status = ice_adv_add_update_vsi_list(hw, m_entry, + &m_entry->rule_info, + rinfo); + if (added_entry) { + added_entry->rid = rid; + added_entry->rule_id = m_entry->rule_info.fltr_rule_id; + added_entry->vsi_handle = rinfo->sw_act.vsi_handle; + } + return status; + } + rule_buf_sz = ICE_SW_RULE_RX_TX_NO_HDR_SIZE + pkt_len; + s_rule = kzalloc(rule_buf_sz, GFP_KERNEL); + if (!s_rule) + return ICE_ERR_NO_MEMORY; + act |= ICE_SINGLE_ACT_LB_ENABLE | ICE_SINGLE_ACT_LAN_ENABLE; + switch (rinfo->sw_act.fltr_act) { + case ICE_FWD_TO_VSI: + act |= (rinfo->sw_act.fwd_id.hw_vsi_id << + ICE_SINGLE_ACT_VSI_ID_S) & ICE_SINGLE_ACT_VSI_ID_M; + act |= ICE_SINGLE_ACT_VSI_FORWARDING | ICE_SINGLE_ACT_VALID_BIT; + break; + case ICE_FWD_TO_Q: + act |= ICE_SINGLE_ACT_TO_Q; + act |= (rinfo->sw_act.fwd_id.q_id << ICE_SINGLE_ACT_Q_INDEX_S) & + ICE_SINGLE_ACT_Q_INDEX_M; + break; + case ICE_FWD_TO_QGRP: + q_rgn = rinfo->sw_act.qgrp_size > 0 ? + (u8)ilog2(rinfo->sw_act.qgrp_size) : 0; + act |= ICE_SINGLE_ACT_TO_Q; + act |= (rinfo->sw_act.fwd_id.q_id << ICE_SINGLE_ACT_Q_INDEX_S) & + ICE_SINGLE_ACT_Q_INDEX_M; + act |= (q_rgn << ICE_SINGLE_ACT_Q_REGION_S) & + ICE_SINGLE_ACT_Q_REGION_M; + break; + case ICE_DROP_PACKET: + act |= ICE_SINGLE_ACT_VSI_FORWARDING | ICE_SINGLE_ACT_DROP | + ICE_SINGLE_ACT_VALID_BIT; + break; + default: + status = ICE_ERR_CFG; + goto err_ice_add_adv_rule; + } + + /* set the rule LOOKUP type based on caller specified 'Rx' + * instead of hardcoding it to be either LOOKUP_TX/RX + * + * for 'Rx' set the source to be the port number + * for 'Tx' set the source to be the source HW VSI number (determined + * by caller) + */ + if (rinfo->rx) { + s_rule->type = cpu_to_le16(ICE_AQC_SW_RULES_T_LKUP_RX); + s_rule->pdata.lkup_tx_rx.src = + cpu_to_le16(hw->port_info->lport); + } else { + s_rule->type = cpu_to_le16(ICE_AQC_SW_RULES_T_LKUP_TX); + s_rule->pdata.lkup_tx_rx.src = cpu_to_le16(rinfo->sw_act.src); + } + + s_rule->pdata.lkup_tx_rx.recipe_id = cpu_to_le16(rid); + s_rule->pdata.lkup_tx_rx.act = cpu_to_le32(act); + + status = ice_fill_adv_dummy_packet(lkups, lkups_cnt, s_rule, pkt, + pkt_len, pkt_offsets); + if (status) + goto err_ice_add_adv_rule; + + status = ice_aq_sw_rules(hw, (struct ice_aqc_sw_rules *)s_rule, + rule_buf_sz, 1, ice_aqc_opc_add_sw_rules, + NULL); + if (status) + goto err_ice_add_adv_rule; + adv_fltr = devm_kzalloc(ice_hw_to_dev(hw), + sizeof(struct ice_adv_fltr_mgmt_list_entry), + GFP_KERNEL); + if (!adv_fltr) { + status = ICE_ERR_NO_MEMORY; + goto err_ice_add_adv_rule; + } + + adv_fltr->lkups = devm_kmemdup(ice_hw_to_dev(hw), lkups, + lkups_cnt * sizeof(*lkups), GFP_KERNEL); + if (!adv_fltr->lkups) { + status = ICE_ERR_NO_MEMORY; + goto err_ice_add_adv_rule; + } + + adv_fltr->lkups_cnt = lkups_cnt; + adv_fltr->rule_info = *rinfo; + adv_fltr->rule_info.fltr_rule_id = + le16_to_cpu(s_rule->pdata.lkup_tx_rx.index); + sw = hw->switch_info; + sw->recp_list[rid].adv_rule = true; + rule_head = &sw->recp_list[rid].filt_rules; + + if (rinfo->sw_act.fltr_act == ICE_FWD_TO_VSI) + adv_fltr->vsi_count = 1; + + /* Add rule entry to book keeping list */ + list_add(&adv_fltr->list_entry, rule_head); + if (added_entry) { + added_entry->rid = rid; + added_entry->rule_id = adv_fltr->rule_info.fltr_rule_id; + added_entry->vsi_handle = rinfo->sw_act.vsi_handle; + } +err_ice_add_adv_rule: + if (status && adv_fltr) { + devm_kfree(ice_hw_to_dev(hw), adv_fltr->lkups); + devm_kfree(ice_hw_to_dev(hw), adv_fltr); + } + + kfree(s_rule); + + return status; +} + /** * ice_replay_vsi_fltr - Replay filters for requested VSI * @hw: pointer to the hardware structure diff --git a/drivers/net/ethernet/intel/ice/ice_switch.h b/drivers/net/ethernet/intel/ice/ice_switch.h index 6fd4537106ed..5175371f7ae9 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.h +++ b/drivers/net/ethernet/intel/ice/ice_switch.h @@ -266,6 +266,16 @@ struct ice_fltr_mgmt_list_entry { u8 counter_index; }; +struct ice_adv_fltr_mgmt_list_entry { + struct list_head list_entry; + + struct ice_adv_lkup_elem *lkups; + struct ice_adv_rule_info rule_info; + u16 lkups_cnt; + struct ice_vsi_list_map_info *vsi_list_info; + u16 vsi_count; +}; + enum ice_promisc_flags { ICE_PROMISC_UCAST_RX = 0x1, ICE_PROMISC_UCAST_TX = 0x2, @@ -301,6 +311,10 @@ ice_free_res_cntr(struct ice_hw *hw, u8 type, u8 alloc_shared, u16 num_items, u16 counter_id); /* Switch/bridge related commands */ +enum ice_status +ice_add_adv_rule(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups, + u16 lkups_cnt, struct ice_adv_rule_info *rinfo, + struct ice_rule_query_data *added_entry); enum ice_status ice_update_sw_rule_bridge_mode(struct ice_hw *hw); enum ice_status ice_add_mac(struct ice_hw *hw, struct list_head *m_lst); enum ice_status ice_remove_mac(struct ice_hw *hw, struct list_head *m_lst); diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h index b2786783b80f..d22ac1d430d0 100644 --- a/drivers/net/ethernet/intel/ice/ice_type.h +++ b/drivers/net/ethernet/intel/ice/ice_type.h @@ -677,6 +677,7 @@ struct ice_port_info { struct ice_switch_info { struct list_head vsi_list_map_head; struct ice_sw_recipe *recp_list; + u16 prof_res_bm_init; u16 max_used_prof_index; DECLARE_BITMAP(prof_res_bm[ICE_MAX_NUM_PROFILES], ICE_MAX_FV_WORDS); From 8bb98f33dead401daa5941abb357983362e0f6ca Mon Sep 17 00:00:00 2001 From: Shivanshu Shukla Date: Fri, 6 Aug 2021 10:49:02 +0200 Subject: [PATCH 086/440] ice: allow deleting advanced rules To remove advanced rule the same protocols list like in adding should be send to function. Based on this information list of advanced rules is searched to find the correct rule id. Remove advanced rule if it forwards to only one VSI. If it forwards to list of VSI remove only input VSI from this list. Introduce function to remove rule by id. It is used in case rule needs to be removed even if it forwards to the list of VSI. Allow removing all advanced rules from a particular VSI. It is useful in rebuilding VSI path. Co-developed-by: Dan Nowlin Signed-off-by: Dan Nowlin Signed-off-by: Shivanshu Shukla Signed-off-by: Wojciech Drewek Tested-by: Sandeep Penigalapati Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_switch.c | 223 ++++++++++++++++++++ drivers/net/ethernet/intel/ice/ice_switch.h | 6 + 2 files changed, 229 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c index 4042d91de06c..81dee4ba2dc3 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.c +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@ -4922,6 +4922,229 @@ end: return status; } +/** + * ice_adv_rem_update_vsi_list + * @hw: pointer to the hardware structure + * @vsi_handle: VSI handle of the VSI to remove + * @fm_list: filter management entry for which the VSI list management needs to + * be done + */ +static enum ice_status +ice_adv_rem_update_vsi_list(struct ice_hw *hw, u16 vsi_handle, + struct ice_adv_fltr_mgmt_list_entry *fm_list) +{ + struct ice_vsi_list_map_info *vsi_list_info; + enum ice_sw_lkup_type lkup_type; + enum ice_status status; + u16 vsi_list_id; + + if (fm_list->rule_info.sw_act.fltr_act != ICE_FWD_TO_VSI_LIST || + fm_list->vsi_count == 0) + return ICE_ERR_PARAM; + + /* A rule with the VSI being removed does not exist */ + if (!test_bit(vsi_handle, fm_list->vsi_list_info->vsi_map)) + return ICE_ERR_DOES_NOT_EXIST; + + lkup_type = ICE_SW_LKUP_LAST; + vsi_list_id = fm_list->rule_info.sw_act.fwd_id.vsi_list_id; + status = ice_update_vsi_list_rule(hw, &vsi_handle, 1, vsi_list_id, true, + ice_aqc_opc_update_sw_rules, + lkup_type); + if (status) + return status; + + fm_list->vsi_count--; + clear_bit(vsi_handle, fm_list->vsi_list_info->vsi_map); + vsi_list_info = fm_list->vsi_list_info; + if (fm_list->vsi_count == 1) { + struct ice_fltr_info tmp_fltr; + u16 rem_vsi_handle; + + rem_vsi_handle = find_first_bit(vsi_list_info->vsi_map, + ICE_MAX_VSI); + if (!ice_is_vsi_valid(hw, rem_vsi_handle)) + return ICE_ERR_OUT_OF_RANGE; + + /* Make sure VSI list is empty before removing it below */ + status = ice_update_vsi_list_rule(hw, &rem_vsi_handle, 1, + vsi_list_id, true, + ice_aqc_opc_update_sw_rules, + lkup_type); + if (status) + return status; + + memset(&tmp_fltr, 0, sizeof(tmp_fltr)); + tmp_fltr.flag = fm_list->rule_info.sw_act.flag; + tmp_fltr.fltr_rule_id = fm_list->rule_info.fltr_rule_id; + fm_list->rule_info.sw_act.fltr_act = ICE_FWD_TO_VSI; + tmp_fltr.fltr_act = ICE_FWD_TO_VSI; + tmp_fltr.fwd_id.hw_vsi_id = + ice_get_hw_vsi_num(hw, rem_vsi_handle); + fm_list->rule_info.sw_act.fwd_id.hw_vsi_id = + ice_get_hw_vsi_num(hw, rem_vsi_handle); + fm_list->rule_info.sw_act.vsi_handle = rem_vsi_handle; + + /* Update the previous switch rule of "MAC forward to VSI" to + * "MAC fwd to VSI list" + */ + status = ice_update_pkt_fwd_rule(hw, &tmp_fltr); + if (status) { + ice_debug(hw, ICE_DBG_SW, "Failed to update pkt fwd rule to FWD_TO_VSI on HW VSI %d, error %d\n", + tmp_fltr.fwd_id.hw_vsi_id, status); + return status; + } + fm_list->vsi_list_info->ref_cnt--; + + /* Remove the VSI list since it is no longer used */ + status = ice_remove_vsi_list_rule(hw, vsi_list_id, lkup_type); + if (status) { + ice_debug(hw, ICE_DBG_SW, "Failed to remove VSI list %d, error %d\n", + vsi_list_id, status); + return status; + } + + list_del(&vsi_list_info->list_entry); + devm_kfree(ice_hw_to_dev(hw), vsi_list_info); + fm_list->vsi_list_info = NULL; + } + + return status; +} + +/** + * ice_rem_adv_rule - removes existing advanced switch rule + * @hw: pointer to the hardware structure + * @lkups: information on the words that needs to be looked up. All words + * together makes one recipe + * @lkups_cnt: num of entries in the lkups array + * @rinfo: Its the pointer to the rule information for the rule + * + * This function can be used to remove 1 rule at a time. The lkups is + * used to describe all the words that forms the "lookup" portion of the + * rule. These words can span multiple protocols. Callers to this function + * need to pass in a list of protocol headers with lookup information along + * and mask that determines which words are valid from the given protocol + * header. rinfo describes other information related to this rule such as + * forwarding IDs, priority of this rule, etc. + */ +static enum ice_status +ice_rem_adv_rule(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups, + u16 lkups_cnt, struct ice_adv_rule_info *rinfo) +{ + struct ice_adv_fltr_mgmt_list_entry *list_elem; + struct ice_prot_lkup_ext lkup_exts; + enum ice_status status = 0; + bool remove_rule = false; + struct mutex *rule_lock; /* Lock to protect filter rule list */ + u16 i, rid, vsi_handle; + + memset(&lkup_exts, 0, sizeof(lkup_exts)); + for (i = 0; i < lkups_cnt; i++) { + u16 count; + + if (lkups[i].type >= ICE_PROTOCOL_LAST) + return ICE_ERR_CFG; + + count = ice_fill_valid_words(&lkups[i], &lkup_exts); + if (!count) + return ICE_ERR_CFG; + } + + rid = ice_find_recp(hw, &lkup_exts); + /* If did not find a recipe that match the existing criteria */ + if (rid == ICE_MAX_NUM_RECIPES) + return ICE_ERR_PARAM; + + rule_lock = &hw->switch_info->recp_list[rid].filt_rule_lock; + list_elem = ice_find_adv_rule_entry(hw, lkups, lkups_cnt, rid, rinfo); + /* the rule is already removed */ + if (!list_elem) + return 0; + mutex_lock(rule_lock); + if (list_elem->rule_info.sw_act.fltr_act != ICE_FWD_TO_VSI_LIST) { + remove_rule = true; + } else if (list_elem->vsi_count > 1) { + remove_rule = false; + vsi_handle = rinfo->sw_act.vsi_handle; + status = ice_adv_rem_update_vsi_list(hw, vsi_handle, list_elem); + } else { + vsi_handle = rinfo->sw_act.vsi_handle; + status = ice_adv_rem_update_vsi_list(hw, vsi_handle, list_elem); + if (status) { + mutex_unlock(rule_lock); + return status; + } + if (list_elem->vsi_count == 0) + remove_rule = true; + } + mutex_unlock(rule_lock); + if (remove_rule) { + struct ice_aqc_sw_rules_elem *s_rule; + u16 rule_buf_sz; + + rule_buf_sz = ICE_SW_RULE_RX_TX_NO_HDR_SIZE; + s_rule = kzalloc(rule_buf_sz, GFP_KERNEL); + if (!s_rule) + return ICE_ERR_NO_MEMORY; + s_rule->pdata.lkup_tx_rx.act = 0; + s_rule->pdata.lkup_tx_rx.index = + cpu_to_le16(list_elem->rule_info.fltr_rule_id); + s_rule->pdata.lkup_tx_rx.hdr_len = 0; + status = ice_aq_sw_rules(hw, (struct ice_aqc_sw_rules *)s_rule, + rule_buf_sz, 1, + ice_aqc_opc_remove_sw_rules, NULL); + if (!status || status == ICE_ERR_DOES_NOT_EXIST) { + struct ice_switch_info *sw = hw->switch_info; + + mutex_lock(rule_lock); + list_del(&list_elem->list_entry); + devm_kfree(ice_hw_to_dev(hw), list_elem->lkups); + devm_kfree(ice_hw_to_dev(hw), list_elem); + mutex_unlock(rule_lock); + if (list_empty(&sw->recp_list[rid].filt_rules)) + sw->recp_list[rid].adv_rule = false; + } + kfree(s_rule); + } + return status; +} + +/** + * ice_rem_adv_rule_by_id - removes existing advanced switch rule by ID + * @hw: pointer to the hardware structure + * @remove_entry: data struct which holds rule_id, VSI handle and recipe ID + * + * This function is used to remove 1 rule at a time. The removal is based on + * the remove_entry parameter. This function will remove rule for a given + * vsi_handle with a given rule_id which is passed as parameter in remove_entry + */ +enum ice_status +ice_rem_adv_rule_by_id(struct ice_hw *hw, + struct ice_rule_query_data *remove_entry) +{ + struct ice_adv_fltr_mgmt_list_entry *list_itr; + struct list_head *list_head; + struct ice_adv_rule_info rinfo; + struct ice_switch_info *sw; + + sw = hw->switch_info; + if (!sw->recp_list[remove_entry->rid].recp_created) + return ICE_ERR_PARAM; + list_head = &sw->recp_list[remove_entry->rid].filt_rules; + list_for_each_entry(list_itr, list_head, list_entry) { + if (list_itr->rule_info.fltr_rule_id == + remove_entry->rule_id) { + rinfo = list_itr->rule_info; + rinfo.sw_act.vsi_handle = remove_entry->vsi_handle; + return ice_rem_adv_rule(hw, list_itr->lkups, + list_itr->lkups_cnt, &rinfo); + } + } + /* either list is empty or unable to find rule */ + return ICE_ERR_DOES_NOT_EXIST; +} + /** * ice_replay_vsi_all_fltr - replay all filters stored in bookkeeping lists * @hw: pointer to the hardware structure diff --git a/drivers/net/ethernet/intel/ice/ice_switch.h b/drivers/net/ethernet/intel/ice/ice_switch.h index 5175371f7ae9..34b7f74b1ab8 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.h +++ b/drivers/net/ethernet/intel/ice/ice_switch.h @@ -342,6 +342,12 @@ enum ice_status ice_set_vlan_vsi_promisc(struct ice_hw *hw, u16 vsi_handle, u8 promisc_mask, bool rm_vlan_promisc); +enum ice_status +ice_rem_adv_rule_for_vsi(struct ice_hw *hw, u16 vsi_handle); +enum ice_status +ice_rem_adv_rule_by_id(struct ice_hw *hw, + struct ice_rule_query_data *remove_entry); + enum ice_status ice_init_def_sw_recp(struct ice_hw *hw); u16 ice_get_hw_vsi_num(struct ice_hw *hw, u16 vsi_handle); From 8b8ef05b776e660a95d8347716b9b6c38ddf97c8 Mon Sep 17 00:00:00 2001 From: Victor Raj Date: Fri, 6 Aug 2021 10:49:03 +0200 Subject: [PATCH 087/440] ice: cleanup rules info Change ICE_SW_LKUP_LAST to ICE_MAX_NUM_RECIPES as for now there also can be recipes other than the default. Free all structures created for advanced recipes in cleanup function. Write a function to clean allocated structures on advanced rule info. Signed-off-by: Victor Raj Signed-off-by: Wojciech Drewek Tested-by: Sandeep Penigalapati Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_common.c | 41 +++++++++++++++++---- drivers/net/ethernet/intel/ice/ice_switch.c | 28 +++++++++++++- 2 files changed, 59 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index bff4c09dfa5f..152a1405e353 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -595,17 +595,42 @@ static void ice_cleanup_fltr_mgmt_struct(struct ice_hw *hw) list_del(&v_pos_map->list_entry); devm_kfree(ice_hw_to_dev(hw), v_pos_map); } - recps = hw->switch_info->recp_list; - for (i = 0; i < ICE_SW_LKUP_LAST; i++) { - struct ice_fltr_mgmt_list_entry *lst_itr, *tmp_entry; + recps = sw->recp_list; + for (i = 0; i < ICE_MAX_NUM_RECIPES; i++) { + struct ice_recp_grp_entry *rg_entry, *tmprg_entry; recps[i].root_rid = i; - mutex_destroy(&recps[i].filt_rule_lock); - list_for_each_entry_safe(lst_itr, tmp_entry, - &recps[i].filt_rules, list_entry) { - list_del(&lst_itr->list_entry); - devm_kfree(ice_hw_to_dev(hw), lst_itr); + list_for_each_entry_safe(rg_entry, tmprg_entry, + &recps[i].rg_list, l_entry) { + list_del(&rg_entry->l_entry); + devm_kfree(ice_hw_to_dev(hw), rg_entry); } + + if (recps[i].adv_rule) { + struct ice_adv_fltr_mgmt_list_entry *tmp_entry; + struct ice_adv_fltr_mgmt_list_entry *lst_itr; + + mutex_destroy(&recps[i].filt_rule_lock); + list_for_each_entry_safe(lst_itr, tmp_entry, + &recps[i].filt_rules, + list_entry) { + list_del(&lst_itr->list_entry); + devm_kfree(ice_hw_to_dev(hw), lst_itr->lkups); + devm_kfree(ice_hw_to_dev(hw), lst_itr); + } + } else { + struct ice_fltr_mgmt_list_entry *lst_itr, *tmp_entry; + + mutex_destroy(&recps[i].filt_rule_lock); + list_for_each_entry_safe(lst_itr, tmp_entry, + &recps[i].filt_rules, + list_entry) { + list_del(&lst_itr->list_entry); + devm_kfree(ice_hw_to_dev(hw), lst_itr); + } + } + if (recps[i].root_buf) + devm_kfree(ice_hw_to_dev(hw), recps[i].root_buf); } ice_rm_all_sw_replay_rule_info(hw); devm_kfree(ice_hw_to_dev(hw), sw->recp_list); diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c index 81dee4ba2dc3..0d07547b40f1 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.c +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@ -2682,6 +2682,27 @@ ice_rem_sw_rule_info(struct ice_hw *hw, struct list_head *rule_head) } } +/** + * ice_rem_adv_rule_info + * @hw: pointer to the hardware structure + * @rule_head: pointer to the switch list structure that we want to delete + */ +static void +ice_rem_adv_rule_info(struct ice_hw *hw, struct list_head *rule_head) +{ + struct ice_adv_fltr_mgmt_list_entry *tmp_entry; + struct ice_adv_fltr_mgmt_list_entry *lst_itr; + + if (list_empty(rule_head)) + return; + + list_for_each_entry_safe(lst_itr, tmp_entry, rule_head, list_entry) { + list_del(&lst_itr->list_entry); + devm_kfree(ice_hw_to_dev(hw), lst_itr->lkups); + devm_kfree(ice_hw_to_dev(hw), lst_itr); + } +} + /** * ice_cfg_dflt_vsi - change state of VSI to set/clear default * @hw: pointer to the hardware structure @@ -5183,12 +5204,15 @@ void ice_rm_all_sw_replay_rule_info(struct ice_hw *hw) if (!sw) return; - for (i = 0; i < ICE_SW_LKUP_LAST; i++) { + for (i = 0; i < ICE_MAX_NUM_RECIPES; i++) { if (!list_empty(&sw->recp_list[i].filt_replay_rules)) { struct list_head *l_head; l_head = &sw->recp_list[i].filt_replay_rules; - ice_rem_sw_rule_info(hw, l_head); + if (!sw->recp_list[i].adv_rule) + ice_rem_sw_rule_info(hw, l_head); + else + ice_rem_adv_rule_info(hw, l_head); } } } From 572b820dfa61fc23b5ba59fd5baa92cd1ec2bacb Mon Sep 17 00:00:00 2001 From: Michal Swiatkowski Date: Fri, 6 Aug 2021 10:49:04 +0200 Subject: [PATCH 088/440] ice: Allow changing lan_en and lb_en on all kinds of filters There is no way to change default lan_en and lb_en flags while adding new rule. Add function that allows changing these flags on rule determined by rule id and recipe id. Function checks if the rule is presented on regular rules list or advance rules list and call the appropriate function to update rule entry. As rules with ICE_SW_LKUP_DFLT recipe aren't tracked in a list, implement function which updates flags without searching for rules based only on rule id. Signed-off-by: Michal Swiatkowski Signed-off-by: Wojciech Drewek Tested-by: Sandeep Penigalapati Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_fltr.c | 127 ++++++++++++++++++++++ 1 file changed, 127 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_fltr.c b/drivers/net/ethernet/intel/ice/ice_fltr.c index c2e78eaf4ccb..f8c59df4ff9c 100644 --- a/drivers/net/ethernet/intel/ice/ice_fltr.c +++ b/drivers/net/ethernet/intel/ice/ice_fltr.c @@ -453,6 +453,133 @@ static u32 ice_fltr_build_action(u16 vsi_id) ICE_SINGLE_ACT_VSI_FORWARDING | ICE_SINGLE_ACT_VALID_BIT; } +/** + * ice_fltr_find_adv_entry - find advanced rule + * @rules: list of rules + * @rule_id: id of wanted rule + */ +static struct ice_adv_fltr_mgmt_list_entry * +ice_fltr_find_adv_entry(struct list_head *rules, u16 rule_id) +{ + struct ice_adv_fltr_mgmt_list_entry *entry; + + list_for_each_entry(entry, rules, list_entry) { + if (entry->rule_info.fltr_rule_id == rule_id) + return entry; + } + + return NULL; +} + +/** + * ice_fltr_update_adv_rule_flags - update flags on advanced rule + * @vsi: pointer to VSI + * @recipe_id: id of recipe + * @entry: advanced rule entry + * @new_flags: flags to update + */ +static enum ice_status +ice_fltr_update_adv_rule_flags(struct ice_vsi *vsi, u16 recipe_id, + struct ice_adv_fltr_mgmt_list_entry *entry, + u32 new_flags) +{ + struct ice_adv_rule_info *info = &entry->rule_info; + struct ice_sw_act_ctrl *act = &info->sw_act; + u32 action; + + if (act->fltr_act != ICE_FWD_TO_VSI) + return ICE_ERR_NOT_SUPPORTED; + + action = ice_fltr_build_action(act->fwd_id.hw_vsi_id); + + return ice_fltr_update_rule_flags(&vsi->back->hw, info->fltr_rule_id, + recipe_id, action, info->sw_act.flag, + act->src, new_flags); +} + +/** + * ice_fltr_find_regular_entry - find regular rule + * @rules: list of rules + * @rule_id: id of wanted rule + */ +static struct ice_fltr_mgmt_list_entry * +ice_fltr_find_regular_entry(struct list_head *rules, u16 rule_id) +{ + struct ice_fltr_mgmt_list_entry *entry; + + list_for_each_entry(entry, rules, list_entry) { + if (entry->fltr_info.fltr_rule_id == rule_id) + return entry; + } + + return NULL; +} + +/** + * ice_fltr_update_regular_rule - update flags on regular rule + * @vsi: pointer to VSI + * @recipe_id: id of recipe + * @entry: regular rule entry + * @new_flags: flags to update + */ +static enum ice_status +ice_fltr_update_regular_rule(struct ice_vsi *vsi, u16 recipe_id, + struct ice_fltr_mgmt_list_entry *entry, + u32 new_flags) +{ + struct ice_fltr_info *info = &entry->fltr_info; + u32 action; + + if (info->fltr_act != ICE_FWD_TO_VSI) + return ICE_ERR_NOT_SUPPORTED; + + action = ice_fltr_build_action(info->fwd_id.hw_vsi_id); + + return ice_fltr_update_rule_flags(&vsi->back->hw, info->fltr_rule_id, + recipe_id, action, info->flag, + info->src, new_flags); +} + +/** + * ice_fltr_update_flags - update flags on rule + * @vsi: pointer to VSI + * @rule_id: id of rule + * @recipe_id: id of recipe + * @new_flags: flags to update + * + * Function updates flags on regular and advance rule. + * + * Flags should be a combination of ICE_SINGLE_ACT_LB_ENABLE and + * ICE_SINGLE_ACT_LAN_ENABLE. + */ +enum ice_status +ice_fltr_update_flags(struct ice_vsi *vsi, u16 rule_id, u16 recipe_id, + u32 new_flags) +{ + struct ice_adv_fltr_mgmt_list_entry *adv_entry; + struct ice_fltr_mgmt_list_entry *regular_entry; + struct ice_hw *hw = &vsi->back->hw; + struct ice_sw_recipe *recp_list; + struct list_head *fltr_rules; + + recp_list = &hw->switch_info->recp_list[recipe_id]; + if (!recp_list) + return ICE_ERR_DOES_NOT_EXIST; + + fltr_rules = &recp_list->filt_rules; + regular_entry = ice_fltr_find_regular_entry(fltr_rules, rule_id); + if (regular_entry) + return ice_fltr_update_regular_rule(vsi, recipe_id, + regular_entry, new_flags); + + adv_entry = ice_fltr_find_adv_entry(fltr_rules, rule_id); + if (adv_entry) + return ice_fltr_update_adv_rule_flags(vsi, recipe_id, + adv_entry, new_flags); + + return ICE_ERR_DOES_NOT_EXIST; +} + /** * ice_fltr_update_flags_dflt_rule - update flags on default rule * @vsi: pointer to VSI From 0d08a441fb1a5c4fdfa3f5fe2f2eb2f620f5b20d Mon Sep 17 00:00:00 2001 From: Kiran Patil Date: Fri, 6 Aug 2021 10:49:05 +0200 Subject: [PATCH 089/440] ice: ndo_setup_tc implementation for PF Implement ndo_setup_tc net device callback for TC HW offload on PF device. ndo_setup_tc provides support for HW offloading various TC filters. Add support for configuring the following filter with tc-flower: - default L2 filters (src/dst mac addresses, ethertype, VLAN) - variations of L3, L3+L4, L2+L3+L4 filters using advanced filters (including ipv4 and ipv6 addresses). Allow for adding/removing TC flows when PF device is configured in eswitch switchdev mode. Two types of actions are supported at the moment: FLOW_ACTION_DROP and FLOW_ACTION_REDIRECT. Co-developed-by: Priyalee Kushwaha Signed-off-by: Priyalee Kushwaha Signed-off-by: Kiran Patil Signed-off-by: Wojciech Drewek Tested-by: Sandeep Penigalapati Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/Makefile | 3 +- drivers/net/ethernet/intel/ice/ice.h | 4 + drivers/net/ethernet/intel/ice/ice_main.c | 71 ++ drivers/net/ethernet/intel/ice/ice_tc_lib.c | 838 ++++++++++++++++++++ drivers/net/ethernet/intel/ice/ice_tc_lib.h | 129 +++ 5 files changed, 1044 insertions(+), 1 deletion(-) create mode 100644 drivers/net/ethernet/intel/ice/ice_tc_lib.c create mode 100644 drivers/net/ethernet/intel/ice/ice_tc_lib.h diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile index 1866be50095d..c36faa7d1471 100644 --- a/drivers/net/ethernet/intel/ice/Makefile +++ b/drivers/net/ethernet/intel/ice/Makefile @@ -27,7 +27,8 @@ ice-y := ice_main.o \ ice_fw_update.o \ ice_lag.o \ ice_ethtool.o \ - ice_repr.o + ice_repr.o \ + ice_tc_lib.o ice-$(CONFIG_PCI_IOV) += ice_virtchnl_allowlist.o ice-$(CONFIG_PCI_IOV) += ice_virtchnl_pf.o ice_sriov.o ice_virtchnl_fdir.o ice-$(CONFIG_PTP_1588_CLOCK) += ice_ptp.o ice_ptp_hw.o diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 0d44a3767bad..6bfbf704eddf 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -64,6 +64,7 @@ #include "ice_xsk.h" #include "ice_arfs.h" #include "ice_repr.h" +#include "ice_eswitch.h" #include "ice_lag.h" #define ICE_BAR0 0 @@ -400,6 +401,7 @@ enum ice_pf_flags { ICE_FLAG_PTP, /* PTP is enabled by software */ ICE_FLAG_AUX_ENA, ICE_FLAG_ADV_FEATURES, + ICE_FLAG_CLS_FLOWER, ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, ICE_FLAG_TOTAL_PORT_SHUTDOWN_ENA, ICE_FLAG_NO_MEDIA, @@ -512,6 +514,8 @@ struct ice_pf { int aux_idx; u32 sw_int_count; + struct hlist_head tc_flower_fltr_list; + __le64 nvm_phy_type_lo; /* NVM PHY type low */ __le64 nvm_phy_type_hi; /* NVM PHY type high */ struct ice_link_default_override_tlv link_dflt_override; diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index b3066cfca6b7..ceb0912e5850 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -20,6 +20,7 @@ #define CREATE_TRACE_POINTS #include "ice_trace.h" #include "ice_eswitch.h" +#include "ice_tc_lib.h" #define DRV_SUMMARY "Intel(R) Ethernet Connection E800 Series Linux Driver" static const char ice_driver_string[] = DRV_SUMMARY; @@ -3107,6 +3108,9 @@ static void ice_set_netdev_features(struct net_device *netdev) /* enable features */ netdev->features |= netdev->hw_features; + + netdev->hw_features |= NETIF_F_HW_TC; + /* encap and VLAN devices inherit default, csumo and tso features */ netdev->hw_enc_features |= dflt_features | csumo_features | tso_features; @@ -7069,6 +7073,72 @@ static void ice_tx_timeout(struct net_device *netdev, unsigned int txqueue) pf->tx_timeout_recovery_level++; } +/** + * ice_setup_tc_cls_flower - flower classifier offloads + * @np: net device to configure + * @filter_dev: device on which filter is added + * @cls_flower: offload data + */ +static int +ice_setup_tc_cls_flower(struct ice_netdev_priv *np, + struct net_device *filter_dev, + struct flow_cls_offload *cls_flower) +{ + struct ice_vsi *vsi = np->vsi; + + if (cls_flower->common.chain_index) + return -EOPNOTSUPP; + + switch (cls_flower->command) { + case FLOW_CLS_REPLACE: + return ice_add_cls_flower(filter_dev, vsi, cls_flower); + case FLOW_CLS_DESTROY: + return ice_del_cls_flower(vsi, cls_flower); + default: + return -EINVAL; + } +} + +/** + * ice_setup_tc_block_cb - callback handler registered for TC block + * @type: TC SETUP type + * @type_data: TC flower offload data that contains user input + * @cb_priv: netdev private data + */ +static int +ice_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv) +{ + struct ice_netdev_priv *np = cb_priv; + + switch (type) { + case TC_SETUP_CLSFLOWER: + return ice_setup_tc_cls_flower(np, np->vsi->netdev, + type_data); + default: + return -EOPNOTSUPP; + } +} + +static LIST_HEAD(ice_block_cb_list); + +static int +ice_setup_tc(struct net_device *netdev, enum tc_setup_type type, + void *type_data) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + + switch (type) { + case TC_SETUP_BLOCK: + return flow_block_cb_setup_simple(type_data, + &ice_block_cb_list, + ice_setup_tc_block_cb, + np, np, true); + default: + return -EOPNOTSUPP; + } + return -EOPNOTSUPP; +} + /** * ice_open - Called when a network interface becomes active * @netdev: network interface device structure @@ -7273,6 +7343,7 @@ static const struct net_device_ops ice_netdev_ops = { .ndo_get_vf_stats = ice_get_vf_stats, .ndo_vlan_rx_add_vid = ice_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = ice_vlan_rx_kill_vid, + .ndo_setup_tc = ice_setup_tc, .ndo_set_features = ice_set_features, .ndo_bridge_getlink = ice_bridge_getlink, .ndo_bridge_setlink = ice_bridge_setlink, diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.c b/drivers/net/ethernet/intel/ice/ice_tc_lib.c new file mode 100644 index 000000000000..d2b6490efd6b --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.c @@ -0,0 +1,838 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2019-2021, Intel Corporation. */ + +#include "ice.h" +#include "ice_tc_lib.h" +#include "ice_lib.h" +#include "ice_fltr.h" + +/** + * ice_tc_count_lkups - determine lookup count for switch filter + * @flags: TC-flower flags + * @headers: Pointer to TC flower filter header structure + * @fltr: Pointer to outer TC filter structure + * + * Determine lookup count based on TC flower input for switch filter. + */ +static int +ice_tc_count_lkups(u32 flags, struct ice_tc_flower_lyr_2_4_hdrs *headers, + struct ice_tc_flower_fltr *fltr) +{ + int lkups_cnt = 0; + + if (flags & ICE_TC_FLWR_FIELD_ETH_TYPE_ID) + lkups_cnt++; + + /* are MAC fields specified? */ + if (flags & (ICE_TC_FLWR_FIELD_DST_MAC | ICE_TC_FLWR_FIELD_SRC_MAC)) + lkups_cnt++; + + /* is VLAN specified? */ + if (flags & ICE_TC_FLWR_FIELD_VLAN) + lkups_cnt++; + + /* are IPv[4|6] fields specified? */ + if (flags & (ICE_TC_FLWR_FIELD_DEST_IPV4 | ICE_TC_FLWR_FIELD_SRC_IPV4)) + lkups_cnt++; + else if (flags & (ICE_TC_FLWR_FIELD_DEST_IPV6 | + ICE_TC_FLWR_FIELD_SRC_IPV6)) + lkups_cnt++; + + /* is L4 (TCP/UDP/any other L4 protocol fields) specified? */ + if (flags & (ICE_TC_FLWR_FIELD_DEST_L4_PORT | + ICE_TC_FLWR_FIELD_SRC_L4_PORT)) + lkups_cnt++; + + return lkups_cnt; +} + +/** + * ice_tc_fill_rules - fill filter rules based on TC fltr + * @hw: pointer to HW structure + * @flags: tc flower field flags + * @tc_fltr: pointer to TC flower filter + * @list: list of advance rule elements + * @rule_info: pointer to information about rule + * @l4_proto: pointer to information such as L4 proto type + * + * Fill ice_adv_lkup_elem list based on TC flower flags and + * TC flower headers. This list should be used to add + * advance filter in hardware. + */ +static int +ice_tc_fill_rules(struct ice_hw *hw, u32 flags, + struct ice_tc_flower_fltr *tc_fltr, + struct ice_adv_lkup_elem *list, + struct ice_adv_rule_info *rule_info, + u16 *l4_proto) +{ + struct ice_tc_flower_lyr_2_4_hdrs *headers = &tc_fltr->outer_headers; + int i = 0; + + if (flags & ICE_TC_FLWR_FIELD_ETH_TYPE_ID) { + list[i].type = ICE_ETYPE_OL; + list[i].h_u.ethertype.ethtype_id = headers->l2_key.n_proto; + list[i].m_u.ethertype.ethtype_id = headers->l2_mask.n_proto; + i++; + } + + if (flags & (ICE_TC_FLWR_FIELD_DST_MAC | + ICE_TC_FLWR_FIELD_SRC_MAC)) { + struct ice_tc_l2_hdr *l2_key, *l2_mask; + + l2_key = &headers->l2_key; + l2_mask = &headers->l2_mask; + + list[i].type = ICE_MAC_OFOS; + if (flags & ICE_TC_FLWR_FIELD_DST_MAC) { + ether_addr_copy(list[i].h_u.eth_hdr.dst_addr, + l2_key->dst_mac); + ether_addr_copy(list[i].m_u.eth_hdr.dst_addr, + l2_mask->dst_mac); + } + if (flags & ICE_TC_FLWR_FIELD_SRC_MAC) { + ether_addr_copy(list[i].h_u.eth_hdr.src_addr, + l2_key->src_mac); + ether_addr_copy(list[i].m_u.eth_hdr.src_addr, + l2_mask->src_mac); + } + i++; + } + + /* copy VLAN info */ + if (flags & ICE_TC_FLWR_FIELD_VLAN) { + list[i].type = ICE_VLAN_OFOS; + list[i].h_u.vlan_hdr.vlan = headers->vlan_hdr.vlan_id; + list[i].m_u.vlan_hdr.vlan = cpu_to_be16(0xFFFF); + i++; + } + + /* copy L3 (IPv[4|6]: src, dest) address */ + if (flags & (ICE_TC_FLWR_FIELD_DEST_IPV4 | + ICE_TC_FLWR_FIELD_SRC_IPV4)) { + struct ice_tc_l3_hdr *l3_key, *l3_mask; + + list[i].type = ICE_IPV4_OFOS; + l3_key = &headers->l3_key; + l3_mask = &headers->l3_mask; + if (flags & ICE_TC_FLWR_FIELD_DEST_IPV4) { + list[i].h_u.ipv4_hdr.dst_addr = l3_key->dst_ipv4; + list[i].m_u.ipv4_hdr.dst_addr = l3_mask->dst_ipv4; + } + if (flags & ICE_TC_FLWR_FIELD_SRC_IPV4) { + list[i].h_u.ipv4_hdr.src_addr = l3_key->src_ipv4; + list[i].m_u.ipv4_hdr.src_addr = l3_mask->src_ipv4; + } + i++; + } else if (flags & (ICE_TC_FLWR_FIELD_DEST_IPV6 | + ICE_TC_FLWR_FIELD_SRC_IPV6)) { + struct ice_ipv6_hdr *ipv6_hdr, *ipv6_mask; + struct ice_tc_l3_hdr *l3_key, *l3_mask; + + list[i].type = ICE_IPV6_OFOS; + ipv6_hdr = &list[i].h_u.ipv6_hdr; + ipv6_mask = &list[i].m_u.ipv6_hdr; + l3_key = &headers->l3_key; + l3_mask = &headers->l3_mask; + + if (flags & ICE_TC_FLWR_FIELD_DEST_IPV6) { + memcpy(&ipv6_hdr->dst_addr, &l3_key->dst_ipv6_addr, + sizeof(l3_key->dst_ipv6_addr)); + memcpy(&ipv6_mask->dst_addr, &l3_mask->dst_ipv6_addr, + sizeof(l3_mask->dst_ipv6_addr)); + } + if (flags & ICE_TC_FLWR_FIELD_SRC_IPV6) { + memcpy(&ipv6_hdr->src_addr, &l3_key->src_ipv6_addr, + sizeof(l3_key->src_ipv6_addr)); + memcpy(&ipv6_mask->src_addr, &l3_mask->src_ipv6_addr, + sizeof(l3_mask->src_ipv6_addr)); + } + i++; + } + + /* copy L4 (src, dest) port */ + if (flags & (ICE_TC_FLWR_FIELD_DEST_L4_PORT | + ICE_TC_FLWR_FIELD_SRC_L4_PORT)) { + struct ice_tc_l4_hdr *l4_key, *l4_mask; + + l4_key = &headers->l4_key; + l4_mask = &headers->l4_mask; + if (headers->l3_key.ip_proto == IPPROTO_TCP) { + list[i].type = ICE_TCP_IL; + /* detected L4 proto is TCP */ + if (l4_proto) + *l4_proto = IPPROTO_TCP; + } else if (headers->l3_key.ip_proto == IPPROTO_UDP) { + list[i].type = ICE_UDP_ILOS; + /* detected L4 proto is UDP */ + if (l4_proto) + *l4_proto = IPPROTO_UDP; + } + if (flags & ICE_TC_FLWR_FIELD_DEST_L4_PORT) { + list[i].h_u.l4_hdr.dst_port = l4_key->dst_port; + list[i].m_u.l4_hdr.dst_port = l4_mask->dst_port; + } + if (flags & ICE_TC_FLWR_FIELD_SRC_L4_PORT) { + list[i].h_u.l4_hdr.src_port = l4_key->src_port; + list[i].m_u.l4_hdr.src_port = l4_mask->src_port; + } + i++; + } + + return i; +} + +static int +ice_eswitch_tc_parse_action(struct ice_tc_flower_fltr *fltr, + struct flow_action_entry *act) +{ + struct ice_repr *repr; + + switch (act->id) { + case FLOW_ACTION_DROP: + fltr->action.fltr_act = ICE_DROP_PACKET; + break; + + case FLOW_ACTION_REDIRECT: + fltr->action.fltr_act = ICE_FWD_TO_VSI; + + if (ice_is_port_repr_netdev(act->dev)) { + repr = ice_netdev_to_repr(act->dev); + + fltr->dest_vsi = repr->src_vsi; + fltr->direction = ICE_ESWITCH_FLTR_INGRESS; + } else if (netif_is_ice(act->dev)) { + struct ice_netdev_priv *np = netdev_priv(act->dev); + + fltr->dest_vsi = np->vsi; + fltr->direction = ICE_ESWITCH_FLTR_EGRESS; + } else { + NL_SET_ERR_MSG_MOD(fltr->extack, "Unsupported netdevice in switchdev mode"); + return -EINVAL; + } + + break; + + default: + NL_SET_ERR_MSG_MOD(fltr->extack, "Unsupported action in switchdev mode"); + return -EINVAL; + } + + return 0; +} + +static int +ice_eswitch_add_tc_fltr(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr) +{ + struct ice_tc_flower_lyr_2_4_hdrs *headers = &fltr->outer_headers; + struct ice_adv_rule_info rule_info = { 0 }; + struct ice_rule_query_data rule_added; + struct ice_hw *hw = &vsi->back->hw; + struct ice_adv_lkup_elem *list; + u32 flags = fltr->flags; + enum ice_status status; + int lkups_cnt; + int ret = 0; + int i; + + if (!flags || (flags & (ICE_TC_FLWR_FIELD_ENC_DEST_IPV4 | + ICE_TC_FLWR_FIELD_ENC_SRC_IPV4 | + ICE_TC_FLWR_FIELD_ENC_DEST_IPV6 | + ICE_TC_FLWR_FIELD_ENC_SRC_IPV6 | + ICE_TC_FLWR_FIELD_ENC_SRC_L4_PORT))) { + NL_SET_ERR_MSG_MOD(fltr->extack, "Unsupported encap field(s)"); + return -EOPNOTSUPP; + } + + lkups_cnt = ice_tc_count_lkups(flags, headers, fltr); + list = kcalloc(lkups_cnt, sizeof(*list), GFP_ATOMIC); + if (!list) + return -ENOMEM; + + i = ice_tc_fill_rules(hw, flags, fltr, list, &rule_info, NULL); + if (i != lkups_cnt) { + ret = -EINVAL; + goto exit; + } + + rule_info.sw_act.fltr_act = fltr->action.fltr_act; + if (fltr->action.fltr_act != ICE_DROP_PACKET) + rule_info.sw_act.vsi_handle = fltr->dest_vsi->idx; + /* For now, making priority to be highest, and it also becomes + * the priority for recipe which will get created as a result of + * new extraction sequence based on input set. + * Priority '7' is max val for switch recipe, higher the number + * results into order of switch rule evaluation. + */ + rule_info.priority = 7; + + if (fltr->direction == ICE_ESWITCH_FLTR_INGRESS) { + rule_info.sw_act.flag |= ICE_FLTR_RX; + rule_info.sw_act.src = hw->pf_id; + rule_info.rx = true; + } else { + rule_info.sw_act.flag |= ICE_FLTR_TX; + rule_info.sw_act.src = vsi->idx; + rule_info.rx = false; + } + + /* specify the cookie as filter_rule_id */ + rule_info.fltr_rule_id = fltr->cookie; + + status = ice_add_adv_rule(hw, list, lkups_cnt, &rule_info, &rule_added); + if (status == ICE_ERR_ALREADY_EXISTS) { + NL_SET_ERR_MSG_MOD(fltr->extack, "Unable to add filter because it already exist"); + ret = -EINVAL; + goto exit; + } else if (status) { + NL_SET_ERR_MSG_MOD(fltr->extack, "Unable to add filter due to error"); + ret = -EIO; + goto exit; + } + + /* store the output params, which are needed later for removing + * advanced switch filter + */ + fltr->rid = rule_added.rid; + fltr->rule_id = rule_added.rule_id; + + if (fltr->direction == ICE_ESWITCH_FLTR_EGRESS) { + if (ice_fltr_update_flags(vsi, fltr->rule_id, fltr->rid, + ICE_SINGLE_ACT_LAN_ENABLE)) + ice_rem_adv_rule_by_id(hw, &rule_added); + } + +exit: + kfree(list); + return ret; +} + +/** + * ice_tc_set_ipv4 - Parse IPv4 addresses from TC flower filter + * @match: Pointer to flow match structure + * @fltr: Pointer to filter structure + * @headers: inner or outer header fields + */ +static int +ice_tc_set_ipv4(struct flow_match_ipv4_addrs *match, + struct ice_tc_flower_fltr *fltr, + struct ice_tc_flower_lyr_2_4_hdrs *headers) +{ + if (match->key->dst) { + fltr->flags |= ICE_TC_FLWR_FIELD_DEST_IPV4; + headers->l3_key.dst_ipv4 = match->key->dst; + headers->l3_mask.dst_ipv4 = match->mask->dst; + } + if (match->key->src) { + fltr->flags |= ICE_TC_FLWR_FIELD_SRC_IPV4; + headers->l3_key.src_ipv4 = match->key->src; + headers->l3_mask.src_ipv4 = match->mask->src; + } + return 0; +} + +/** + * ice_tc_set_ipv6 - Parse IPv6 addresses from TC flower filter + * @match: Pointer to flow match structure + * @fltr: Pointer to filter structure + * @headers: inner or outer header fields + */ +static int +ice_tc_set_ipv6(struct flow_match_ipv6_addrs *match, + struct ice_tc_flower_fltr *fltr, + struct ice_tc_flower_lyr_2_4_hdrs *headers) +{ + struct ice_tc_l3_hdr *l3_key, *l3_mask; + + /* src and dest IPV6 address should not be LOOPBACK + * (0:0:0:0:0:0:0:1), which can be represented as ::1 + */ + if (ipv6_addr_loopback(&match->key->dst) || + ipv6_addr_loopback(&match->key->src)) { + NL_SET_ERR_MSG_MOD(fltr->extack, "Bad IPv6, addr is LOOPBACK"); + return -EINVAL; + } + /* if src/dest IPv6 address is *,* error */ + if (ipv6_addr_any(&match->mask->dst) && + ipv6_addr_any(&match->mask->src)) { + NL_SET_ERR_MSG_MOD(fltr->extack, "Bad src/dest IPv6, addr is any"); + return -EINVAL; + } + if (!ipv6_addr_any(&match->mask->dst)) + fltr->flags |= ICE_TC_FLWR_FIELD_DEST_IPV6; + if (!ipv6_addr_any(&match->mask->src)) + fltr->flags |= ICE_TC_FLWR_FIELD_SRC_IPV6; + + l3_key = &headers->l3_key; + l3_mask = &headers->l3_mask; + + if (fltr->flags & ICE_TC_FLWR_FIELD_SRC_IPV6) { + memcpy(&l3_key->src_ipv6_addr, &match->key->src.s6_addr, + sizeof(match->key->src.s6_addr)); + memcpy(&l3_mask->src_ipv6_addr, &match->mask->src.s6_addr, + sizeof(match->mask->src.s6_addr)); + } + if (fltr->flags & ICE_TC_FLWR_FIELD_DEST_IPV6) { + memcpy(&l3_key->dst_ipv6_addr, &match->key->dst.s6_addr, + sizeof(match->key->dst.s6_addr)); + memcpy(&l3_mask->dst_ipv6_addr, &match->mask->dst.s6_addr, + sizeof(match->mask->dst.s6_addr)); + } + + return 0; +} + +/** + * ice_tc_set_port - Parse ports from TC flower filter + * @match: Flow match structure + * @fltr: Pointer to filter structure + * @headers: inner or outer header fields + */ +static int +ice_tc_set_port(struct flow_match_ports match, + struct ice_tc_flower_fltr *fltr, + struct ice_tc_flower_lyr_2_4_hdrs *headers) +{ + if (match.key->dst) { + fltr->flags |= ICE_TC_FLWR_FIELD_DEST_L4_PORT; + headers->l4_key.dst_port = match.key->dst; + headers->l4_mask.dst_port = match.mask->dst; + } + if (match.key->src) { + fltr->flags |= ICE_TC_FLWR_FIELD_SRC_L4_PORT; + headers->l4_key.src_port = match.key->src; + headers->l4_mask.src_port = match.mask->src; + } + return 0; +} + +/** + * ice_parse_cls_flower - Parse TC flower filters provided by kernel + * @vsi: Pointer to the VSI + * @filter_dev: Pointer to device on which filter is being added + * @f: Pointer to struct flow_cls_offload + * @fltr: Pointer to filter structure + */ +static int +ice_parse_cls_flower(struct net_device *filter_dev, struct ice_vsi *vsi, + struct flow_cls_offload *f, + struct ice_tc_flower_fltr *fltr) +{ + struct ice_tc_flower_lyr_2_4_hdrs *headers = &fltr->outer_headers; + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + u16 n_proto_mask = 0, n_proto_key = 0, addr_type = 0; + struct flow_dissector *dissector; + + dissector = rule->match.dissector; + + if (dissector->used_keys & + ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) | + BIT(FLOW_DISSECTOR_KEY_BASIC) | + BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_VLAN) | + BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | + BIT(FLOW_DISSECTOR_KEY_ENC_IP) | + BIT(FLOW_DISSECTOR_KEY_PORTS))) { + NL_SET_ERR_MSG_MOD(fltr->extack, "Unsupported key used"); + return -EOPNOTSUPP; + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { + struct flow_match_basic match; + + flow_rule_match_basic(rule, &match); + + n_proto_key = ntohs(match.key->n_proto); + n_proto_mask = ntohs(match.mask->n_proto); + + if (n_proto_key == ETH_P_ALL || n_proto_key == 0) { + n_proto_key = 0; + n_proto_mask = 0; + } else { + fltr->flags |= ICE_TC_FLWR_FIELD_ETH_TYPE_ID; + } + + headers->l2_key.n_proto = cpu_to_be16(n_proto_key); + headers->l2_mask.n_proto = cpu_to_be16(n_proto_mask); + headers->l3_key.ip_proto = match.key->ip_proto; + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { + struct flow_match_eth_addrs match; + + flow_rule_match_eth_addrs(rule, &match); + + if (!is_zero_ether_addr(match.key->dst)) { + ether_addr_copy(headers->l2_key.dst_mac, + match.key->dst); + ether_addr_copy(headers->l2_mask.dst_mac, + match.mask->dst); + fltr->flags |= ICE_TC_FLWR_FIELD_DST_MAC; + } + + if (!is_zero_ether_addr(match.key->src)) { + ether_addr_copy(headers->l2_key.src_mac, + match.key->src); + ether_addr_copy(headers->l2_mask.src_mac, + match.mask->src); + fltr->flags |= ICE_TC_FLWR_FIELD_SRC_MAC; + } + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN) || + is_vlan_dev(filter_dev)) { + struct flow_dissector_key_vlan mask; + struct flow_dissector_key_vlan key; + struct flow_match_vlan match; + + if (is_vlan_dev(filter_dev)) { + match.key = &key; + match.key->vlan_id = vlan_dev_vlan_id(filter_dev); + match.key->vlan_priority = 0; + match.mask = &mask; + memset(match.mask, 0xff, sizeof(*match.mask)); + match.mask->vlan_priority = 0; + } else { + flow_rule_match_vlan(rule, &match); + } + + if (match.mask->vlan_id) { + if (match.mask->vlan_id == VLAN_VID_MASK) { + fltr->flags |= ICE_TC_FLWR_FIELD_VLAN; + } else { + NL_SET_ERR_MSG_MOD(fltr->extack, "Bad VLAN mask"); + return -EINVAL; + } + } + + headers->vlan_hdr.vlan_id = + cpu_to_be16(match.key->vlan_id & VLAN_VID_MASK); + if (match.mask->vlan_priority) + headers->vlan_hdr.vlan_prio = match.key->vlan_priority; + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) { + struct flow_match_control match; + + flow_rule_match_control(rule, &match); + + addr_type = match.key->addr_type; + } + + if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { + struct flow_match_ipv4_addrs match; + + flow_rule_match_ipv4_addrs(rule, &match); + if (ice_tc_set_ipv4(&match, fltr, headers)) + return -EINVAL; + } + + if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { + struct flow_match_ipv6_addrs match; + + flow_rule_match_ipv6_addrs(rule, &match); + if (ice_tc_set_ipv6(&match, fltr, headers)) + return -EINVAL; + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) { + struct flow_match_ports match; + + flow_rule_match_ports(rule, &match); + if (ice_tc_set_port(match, fltr, headers)) + return -EINVAL; + switch (headers->l3_key.ip_proto) { + case IPPROTO_TCP: + case IPPROTO_UDP: + break; + default: + NL_SET_ERR_MSG_MOD(fltr->extack, "Only UDP and TCP transport are supported"); + return -EINVAL; + } + } + return 0; +} + +/** + * ice_add_switch_fltr - Add TC flower filters + * @vsi: Pointer to VSI + * @fltr: Pointer to struct ice_tc_flower_fltr + * + * Add filter in HW switch block + */ +static int +ice_add_switch_fltr(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr) +{ + if (ice_is_eswitch_mode_switchdev(vsi->back)) + return ice_eswitch_add_tc_fltr(vsi, fltr); + + return -EOPNOTSUPP; +} + +/** + * ice_handle_tclass_action - Support directing to a traffic class + * @vsi: Pointer to VSI + * @cls_flower: Pointer to TC flower offload structure + * @fltr: Pointer to TC flower filter structure + * + * Support directing traffic to a traffic class + */ +static int +ice_handle_tclass_action(struct ice_vsi *vsi, + struct flow_cls_offload *cls_flower, + struct ice_tc_flower_fltr *fltr) +{ + int tc = tc_classid_to_hwtc(vsi->netdev, cls_flower->classid); + + if (tc < 0) { + NL_SET_ERR_MSG_MOD(fltr->extack, "Unable to add filter because specified destination is invalid"); + return -EINVAL; + } + if (!tc) { + NL_SET_ERR_MSG_MOD(fltr->extack, "Unable to add filter because of invalid destination"); + return -EINVAL; + } + + if (!(vsi->tc_cfg.ena_tc & BIT(tc))) { + NL_SET_ERR_MSG_MOD(fltr->extack, "Unable to add filter because of non-existence destination"); + return -EINVAL; + } + + /* Redirect to a TC class or Queue Group */ + fltr->action.fltr_act = ICE_FWD_TO_QGRP; + fltr->action.tc_class = tc; + + return 0; +} + +/** + * ice_parse_tc_flower_actions - Parse the actions for a TC filter + * @vsi: Pointer to VSI + * @cls_flower: Pointer to TC flower offload structure + * @fltr: Pointer to TC flower filter structure + * + * Parse the actions for a TC filter + */ +static int +ice_parse_tc_flower_actions(struct ice_vsi *vsi, + struct flow_cls_offload *cls_flower, + struct ice_tc_flower_fltr *fltr) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(cls_flower); + struct flow_action *flow_action = &rule->action; + struct flow_action_entry *act; + int i; + + if (cls_flower->classid) + return ice_handle_tclass_action(vsi, cls_flower, fltr); + + if (!flow_action_has_entries(flow_action)) + return -EINVAL; + + flow_action_for_each(i, act, flow_action) { + if (ice_is_eswitch_mode_switchdev(vsi->back)) { + int err = ice_eswitch_tc_parse_action(fltr, act); + + if (err) + return err; + continue; + } + /* Allow only one rule per filter */ + + /* Drop action */ + if (act->id == FLOW_ACTION_DROP) { + fltr->action.fltr_act = ICE_DROP_PACKET; + return 0; + } + fltr->action.fltr_act = ICE_FWD_TO_VSI; + } + return 0; +} + +/** + * ice_del_tc_fltr - deletes a filter from HW table + * @vsi: Pointer to VSI + * @fltr: Pointer to struct ice_tc_flower_fltr + * + * This function deletes a filter from HW table and manages book-keeping + */ +static int ice_del_tc_fltr(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr) +{ + struct ice_rule_query_data rule_rem; + struct ice_pf *pf = vsi->back; + int err; + + rule_rem.rid = fltr->rid; + rule_rem.rule_id = fltr->rule_id; + rule_rem.vsi_handle = fltr->dest_id; + err = ice_rem_adv_rule_by_id(&pf->hw, &rule_rem); + if (err) { + if (err == ICE_ERR_DOES_NOT_EXIST) { + NL_SET_ERR_MSG_MOD(fltr->extack, "Filter does not exist"); + return -ENOENT; + } + NL_SET_ERR_MSG_MOD(fltr->extack, "Failed to delete TC flower filter"); + return -EIO; + } + + return 0; +} + +/** + * ice_add_tc_fltr - adds a TC flower filter + * @netdev: Pointer to netdev + * @vsi: Pointer to VSI + * @f: Pointer to flower offload structure + * @__fltr: Pointer to struct ice_tc_flower_fltr + * + * This function parses TC-flower input fields, parses action, + * and adds a filter. + */ +static int +ice_add_tc_fltr(struct net_device *netdev, struct ice_vsi *vsi, + struct flow_cls_offload *f, + struct ice_tc_flower_fltr **__fltr) +{ + struct ice_tc_flower_fltr *fltr; + int err; + + /* by default, set output to be INVALID */ + *__fltr = NULL; + + fltr = kzalloc(sizeof(*fltr), GFP_KERNEL); + if (!fltr) + return -ENOMEM; + + fltr->cookie = f->cookie; + fltr->extack = f->common.extack; + fltr->src_vsi = vsi; + INIT_HLIST_NODE(&fltr->tc_flower_node); + + err = ice_parse_cls_flower(netdev, vsi, f, fltr); + if (err < 0) + goto err; + + err = ice_parse_tc_flower_actions(vsi, f, fltr); + if (err < 0) + goto err; + + err = ice_add_switch_fltr(vsi, fltr); + if (err < 0) + goto err; + + /* return the newly created filter */ + *__fltr = fltr; + + return 0; +err: + kfree(fltr); + return err; +} + +/** + * ice_find_tc_flower_fltr - Find the TC flower filter in the list + * @pf: Pointer to PF + * @cookie: filter specific cookie + */ +static struct ice_tc_flower_fltr * +ice_find_tc_flower_fltr(struct ice_pf *pf, unsigned long cookie) +{ + struct ice_tc_flower_fltr *fltr; + + hlist_for_each_entry(fltr, &pf->tc_flower_fltr_list, tc_flower_node) + if (cookie == fltr->cookie) + return fltr; + + return NULL; +} + +/** + * ice_add_cls_flower - add TC flower filters + * @netdev: Pointer to filter device + * @vsi: Pointer to VSI + * @cls_flower: Pointer to flower offload structure + */ +int +ice_add_cls_flower(struct net_device *netdev, struct ice_vsi *vsi, + struct flow_cls_offload *cls_flower) +{ + struct netlink_ext_ack *extack = cls_flower->common.extack; + struct net_device *vsi_netdev = vsi->netdev; + struct ice_tc_flower_fltr *fltr; + struct ice_pf *pf = vsi->back; + int err; + + if (ice_is_reset_in_progress(pf->state)) + return -EBUSY; + if (test_bit(ICE_FLAG_FW_LLDP_AGENT, pf->flags)) + return -EINVAL; + + if (ice_is_port_repr_netdev(netdev)) + vsi_netdev = netdev; + + if (!(vsi_netdev->features & NETIF_F_HW_TC) && + !test_bit(ICE_FLAG_CLS_FLOWER, pf->flags)) { + /* Based on TC indirect notifications from kernel, all ice + * devices get an instance of rule from higher level device. + * Avoid triggering explicit error in this case. + */ + if (netdev == vsi_netdev) + NL_SET_ERR_MSG_MOD(extack, "can't apply TC flower filters, turn ON hw-tc-offload and try again"); + return -EINVAL; + } + + /* avoid duplicate entries, if exists - return error */ + fltr = ice_find_tc_flower_fltr(pf, cls_flower->cookie); + if (fltr) { + NL_SET_ERR_MSG_MOD(extack, "filter cookie already exists, ignoring"); + return -EEXIST; + } + + /* prep and add TC-flower filter in HW */ + err = ice_add_tc_fltr(netdev, vsi, cls_flower, &fltr); + if (err) + return err; + + /* add filter into an ordered list */ + hlist_add_head(&fltr->tc_flower_node, &pf->tc_flower_fltr_list); + return 0; +} + +/** + * ice_del_cls_flower - delete TC flower filters + * @vsi: Pointer to VSI + * @cls_flower: Pointer to struct flow_cls_offload + */ +int +ice_del_cls_flower(struct ice_vsi *vsi, struct flow_cls_offload *cls_flower) +{ + struct ice_tc_flower_fltr *fltr; + struct ice_pf *pf = vsi->back; + int err; + + /* find filter */ + fltr = ice_find_tc_flower_fltr(pf, cls_flower->cookie); + if (!fltr) { + if (hlist_empty(&pf->tc_flower_fltr_list)) + return 0; + + NL_SET_ERR_MSG_MOD(cls_flower->common.extack, "failed to delete TC flower filter because unable to find it"); + return -EINVAL; + } + + fltr->extack = cls_flower->common.extack; + /* delete filter from HW */ + err = ice_del_tc_fltr(vsi, fltr); + if (err) + return err; + + /* delete filter from an ordered list */ + hlist_del(&fltr->tc_flower_node); + + /* free the filter node */ + kfree(fltr); + + return 0; +} diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.h b/drivers/net/ethernet/intel/ice/ice_tc_lib.h new file mode 100644 index 000000000000..883d2891f92c --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.h @@ -0,0 +1,129 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2019-2021, Intel Corporation. */ + +#ifndef _ICE_TC_LIB_H_ +#define _ICE_TC_LIB_H_ + +#define ICE_TC_FLWR_FIELD_DST_MAC BIT(0) +#define ICE_TC_FLWR_FIELD_SRC_MAC BIT(1) +#define ICE_TC_FLWR_FIELD_VLAN BIT(2) +#define ICE_TC_FLWR_FIELD_DEST_IPV4 BIT(3) +#define ICE_TC_FLWR_FIELD_SRC_IPV4 BIT(4) +#define ICE_TC_FLWR_FIELD_DEST_IPV6 BIT(5) +#define ICE_TC_FLWR_FIELD_SRC_IPV6 BIT(6) +#define ICE_TC_FLWR_FIELD_DEST_L4_PORT BIT(7) +#define ICE_TC_FLWR_FIELD_SRC_L4_PORT BIT(8) +#define ICE_TC_FLWR_FIELD_TENANT_ID BIT(9) +#define ICE_TC_FLWR_FIELD_ENC_DEST_IPV4 BIT(10) +#define ICE_TC_FLWR_FIELD_ENC_SRC_IPV4 BIT(11) +#define ICE_TC_FLWR_FIELD_ENC_DEST_IPV6 BIT(12) +#define ICE_TC_FLWR_FIELD_ENC_SRC_IPV6 BIT(13) +#define ICE_TC_FLWR_FIELD_ENC_DEST_L4_PORT BIT(14) +#define ICE_TC_FLWR_FIELD_ENC_SRC_L4_PORT BIT(15) +#define ICE_TC_FLWR_FIELD_ENC_DST_MAC BIT(16) +#define ICE_TC_FLWR_FIELD_ETH_TYPE_ID BIT(17) + +struct ice_tc_flower_action { + u32 tc_class; + enum ice_sw_fwd_act_type fltr_act; +}; + +struct ice_tc_vlan_hdr { + __be16 vlan_id; /* Only last 12 bits valid */ + u16 vlan_prio; /* Only last 3 bits valid (valid values: 0..7) */ +}; + +struct ice_tc_l2_hdr { + u8 dst_mac[ETH_ALEN]; + u8 src_mac[ETH_ALEN]; + __be16 n_proto; /* Ethernet Protocol */ +}; + +struct ice_tc_l3_hdr { + u8 ip_proto; /* IPPROTO value */ + union { + struct { + struct in_addr dst_ip; + struct in_addr src_ip; + } v4; + struct { + struct in6_addr dst_ip6; + struct in6_addr src_ip6; + } v6; + } ip; +#define dst_ipv6 ip.v6.dst_ip6.s6_addr32 +#define dst_ipv6_addr ip.v6.dst_ip6.s6_addr +#define src_ipv6 ip.v6.src_ip6.s6_addr32 +#define src_ipv6_addr ip.v6.src_ip6.s6_addr +#define dst_ipv4 ip.v4.dst_ip.s_addr +#define src_ipv4 ip.v4.src_ip.s_addr + + u8 tos; + u8 ttl; +}; + +struct ice_tc_l4_hdr { + __be16 dst_port; + __be16 src_port; +}; + +struct ice_tc_flower_lyr_2_4_hdrs { + /* L2 layer fields with their mask */ + struct ice_tc_l2_hdr l2_key; + struct ice_tc_l2_hdr l2_mask; + struct ice_tc_vlan_hdr vlan_hdr; + /* L3 (IPv4[6]) layer fields with their mask */ + struct ice_tc_l3_hdr l3_key; + struct ice_tc_l3_hdr l3_mask; + + /* L4 layer fields with their mask */ + struct ice_tc_l4_hdr l4_key; + struct ice_tc_l4_hdr l4_mask; +}; + +enum ice_eswitch_fltr_direction { + ICE_ESWITCH_FLTR_INGRESS, + ICE_ESWITCH_FLTR_EGRESS, +}; + +struct ice_tc_flower_fltr { + struct hlist_node tc_flower_node; + + /* cookie becomes filter_rule_id if rule is added successfully */ + unsigned long cookie; + + /* add_adv_rule returns information like recipe ID, rule_id. Store + * those values since they are needed to remove advanced rule + */ + u16 rid; + u16 rule_id; + /* this could be queue/vsi_idx (sw handle)/queue_group, depending upon + * destination type + */ + u16 dest_id; + /* if dest_id is vsi_idx, then need to store destination VSI ptr */ + struct ice_vsi *dest_vsi; + /* direction of fltr for eswitch use case */ + enum ice_eswitch_fltr_direction direction; + + /* Parsed TC flower configuration params */ + struct ice_tc_flower_lyr_2_4_hdrs outer_headers; + struct ice_tc_flower_lyr_2_4_hdrs inner_headers; + struct ice_vsi *src_vsi; + __be32 tenant_id; + u32 flags; + struct ice_tc_flower_action action; + + /* cache ptr which is used wherever needed to communicate netlink + * messages + */ + struct netlink_ext_ack *extack; +}; + +int +ice_add_cls_flower(struct net_device *netdev, struct ice_vsi *vsi, + struct flow_cls_offload *cls_flower); +int +ice_del_cls_flower(struct ice_vsi *vsi, struct flow_cls_offload *cls_flower); + +#endif /* _ICE_TC_LIB_H_ */ From 7fde6d8b445f3af9caa5f3c2be384c2f797e21c5 Mon Sep 17 00:00:00 2001 From: Michal Swiatkowski Date: Fri, 6 Aug 2021 10:49:06 +0200 Subject: [PATCH 090/440] ice: ndo_setup_tc implementation for PR Add tc-flower support for VF port representor devices. Implement ndo_setup_tc callback for TC HW offload on VF port representors devices. Implemented both methods: add and delete tc-flower flows. Mark NETIF_F_HW_TC bit in net device's feature set to enable offload TC infrastructure for port representor. Implement TC filters replay function required to restore filters settings while switchdev configuration is rebuilt. Signed-off-by: Michal Swiatkowski Signed-off-by: Wojciech Drewek Tested-by: Sandeep Penigalapati Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_eswitch.c | 3 ++ drivers/net/ethernet/intel/ice/ice_repr.c | 53 ++++++++++++++++++++ drivers/net/ethernet/intel/ice/ice_tc_lib.c | 17 +++++++ drivers/net/ethernet/intel/ice/ice_tc_lib.h | 1 + 4 files changed, 74 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch.c b/drivers/net/ethernet/intel/ice/ice_eswitch.c index 477e3f2d616d..d91a7834f91f 100644 --- a/drivers/net/ethernet/intel/ice/ice_eswitch.c +++ b/drivers/net/ethernet/intel/ice/ice_eswitch.c @@ -7,6 +7,7 @@ #include "ice_fltr.h" #include "ice_repr.h" #include "ice_devlink.h" +#include "ice_tc_lib.h" /** * ice_eswitch_setup_env - configure switchdev HW filters @@ -645,6 +646,8 @@ int ice_eswitch_rebuild(struct ice_pf *pf) ice_eswitch_remap_rings_to_vectors(pf); + ice_replay_tc_fltrs(pf); + status = ice_vsi_open(ctrl_vsi); if (status) return status; diff --git a/drivers/net/ethernet/intel/ice/ice_repr.c b/drivers/net/ethernet/intel/ice/ice_repr.c index cb83f58d7c71..c49eeea7cb67 100644 --- a/drivers/net/ethernet/intel/ice/ice_repr.c +++ b/drivers/net/ethernet/intel/ice/ice_repr.c @@ -5,6 +5,7 @@ #include "ice_eswitch.h" #include "ice_devlink.h" #include "ice_virtchnl_pf.h" +#include "ice_tc_lib.h" /** * ice_repr_get_sw_port_id - get port ID associated with representor @@ -141,6 +142,55 @@ ice_repr_get_devlink_port(struct net_device *netdev) return &repr->vf->devlink_port; } +static int +ice_repr_setup_tc_cls_flower(struct ice_repr *repr, + struct flow_cls_offload *flower) +{ + switch (flower->command) { + case FLOW_CLS_REPLACE: + return ice_add_cls_flower(repr->netdev, repr->src_vsi, flower); + case FLOW_CLS_DESTROY: + return ice_del_cls_flower(repr->src_vsi, flower); + default: + return -EINVAL; + } +} + +static int +ice_repr_setup_tc_block_cb(enum tc_setup_type type, void *type_data, + void *cb_priv) +{ + struct flow_cls_offload *flower = (struct flow_cls_offload *)type_data; + struct ice_netdev_priv *np = (struct ice_netdev_priv *)cb_priv; + + switch (type) { + case TC_SETUP_CLSFLOWER: + return ice_repr_setup_tc_cls_flower(np->repr, flower); + default: + return -EOPNOTSUPP; + } +} + +static LIST_HEAD(ice_repr_block_cb_list); + +static int +ice_repr_setup_tc(struct net_device *netdev, enum tc_setup_type type, + void *type_data) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + + switch (type) { + case TC_SETUP_BLOCK: + return flow_block_cb_setup_simple((struct flow_block_offload *) + type_data, + &ice_repr_block_cb_list, + ice_repr_setup_tc_block_cb, + np, np, true); + default: + return -EOPNOTSUPP; + } +} + static const struct net_device_ops ice_repr_netdev_ops = { .ndo_get_phys_port_name = ice_repr_get_phys_port_name, .ndo_get_stats64 = ice_repr_get_stats64, @@ -148,6 +198,7 @@ static const struct net_device_ops ice_repr_netdev_ops = { .ndo_stop = ice_repr_stop, .ndo_start_xmit = ice_eswitch_port_start_xmit, .ndo_get_devlink_port = ice_repr_get_devlink_port, + .ndo_setup_tc = ice_repr_setup_tc, }; /** @@ -170,6 +221,8 @@ ice_repr_reg_netdev(struct net_device *netdev) netdev->netdev_ops = &ice_repr_netdev_ops; ice_set_ethtool_repr_ops(netdev); + netdev->hw_features |= NETIF_F_HW_TC; + netif_carrier_off(netdev); netif_tx_stop_all_queues(netdev); diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.c b/drivers/net/ethernet/intel/ice/ice_tc_lib.c index d2b6490efd6b..4c1daa1a02a1 100644 --- a/drivers/net/ethernet/intel/ice/ice_tc_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.c @@ -836,3 +836,20 @@ ice_del_cls_flower(struct ice_vsi *vsi, struct flow_cls_offload *cls_flower) return 0; } + +/** + * ice_replay_tc_fltrs - replay TC filters + * @pf: pointer to PF struct + */ +void ice_replay_tc_fltrs(struct ice_pf *pf) +{ + struct ice_tc_flower_fltr *fltr; + struct hlist_node *node; + + hlist_for_each_entry_safe(fltr, node, + &pf->tc_flower_fltr_list, + tc_flower_node) { + fltr->extack = NULL; + ice_add_switch_fltr(fltr->src_vsi, fltr); + } +} diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.h b/drivers/net/ethernet/intel/ice/ice_tc_lib.h index 883d2891f92c..d90e9e37ae25 100644 --- a/drivers/net/ethernet/intel/ice/ice_tc_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.h @@ -125,5 +125,6 @@ ice_add_cls_flower(struct net_device *netdev, struct ice_vsi *vsi, struct flow_cls_offload *cls_flower); int ice_del_cls_flower(struct ice_vsi *vsi, struct flow_cls_offload *cls_flower); +void ice_replay_tc_fltrs(struct ice_pf *pf); #endif /* _ICE_TC_LIB_H_ */ From 2cb67ab153d59473d67713c709a198922b7e87a3 Mon Sep 17 00:00:00 2001 From: Yangchun Fu Date: Mon, 11 Oct 2021 08:36:44 -0700 Subject: [PATCH 091/440] gve: Switch to use napi_complete_done Use napi_complete_done to allow for the use of gro_flush_timeout. Fixes: f5cedc84a30d2 ("gve: Add transmit and receive support") Signed-off-by: Yangchun Fu Signed-off-by: Catherine Sullivan Signed-off-by: David Awogbemila Signed-off-by: David S. Miller --- drivers/net/ethernet/google/gve/gve.h | 5 ++- drivers/net/ethernet/google/gve/gve_main.c | 38 +++++++++++++--------- drivers/net/ethernet/google/gve/gve_rx.c | 37 +++++++++++---------- 3 files changed, 43 insertions(+), 37 deletions(-) diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h index 2f93ed470590..4abd53bdde73 100644 --- a/drivers/net/ethernet/google/gve/gve.h +++ b/drivers/net/ethernet/google/gve/gve.h @@ -825,11 +825,10 @@ __be32 gve_tx_load_event_counter(struct gve_priv *priv, struct gve_tx_ring *tx); /* rx handling */ void gve_rx_write_doorbell(struct gve_priv *priv, struct gve_rx_ring *rx); -bool gve_rx_poll(struct gve_notify_block *block, int budget); +int gve_rx_poll(struct gve_notify_block *block, int budget); +bool gve_rx_work_pending(struct gve_rx_ring *rx); int gve_rx_alloc_rings(struct gve_priv *priv); void gve_rx_free_rings_gqi(struct gve_priv *priv); -bool gve_clean_rx_done(struct gve_rx_ring *rx, int budget, - netdev_features_t feat); /* Reset */ void gve_schedule_reset(struct gve_priv *priv); int gve_reset(struct gve_priv *priv, bool attempt_teardown); diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index 5b5dcaaeed7f..b41679ab0dbe 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -192,34 +192,40 @@ static int gve_napi_poll(struct napi_struct *napi, int budget) __be32 __iomem *irq_doorbell; bool reschedule = false; struct gve_priv *priv; + int work_done = 0; block = container_of(napi, struct gve_notify_block, napi); priv = block->priv; if (block->tx) reschedule |= gve_tx_poll(block, budget); - if (block->rx) - reschedule |= gve_rx_poll(block, budget); + if (block->rx) { + work_done = gve_rx_poll(block, budget); + reschedule |= work_done == budget; + } if (reschedule) return budget; - napi_complete(napi); - irq_doorbell = gve_irq_doorbell(priv, block); - iowrite32be(GVE_IRQ_ACK | GVE_IRQ_EVENT, irq_doorbell); + /* Complete processing - don't unmask irq if busy polling is enabled */ + if (likely(napi_complete_done(napi, work_done))) { + irq_doorbell = gve_irq_doorbell(priv, block); + iowrite32be(GVE_IRQ_ACK | GVE_IRQ_EVENT, irq_doorbell); - /* Double check we have no extra work. - * Ensure unmask synchronizes with checking for work. - */ - mb(); - if (block->tx) - reschedule |= gve_tx_poll(block, -1); - if (block->rx) - reschedule |= gve_rx_poll(block, -1); - if (reschedule && napi_reschedule(napi)) - iowrite32be(GVE_IRQ_MASK, irq_doorbell); + /* Double check we have no extra work. + * Ensure unmask synchronizes with checking for work. + */ + mb(); - return 0; + if (block->tx) + reschedule |= gve_tx_poll(block, -1); + if (block->rx) + reschedule |= gve_rx_work_pending(block->rx); + + if (reschedule && napi_reschedule(napi)) + iowrite32be(GVE_IRQ_MASK, irq_doorbell); + } + return work_done; } static int gve_napi_poll_dqo(struct napi_struct *napi, int budget) diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c index 94941d4e4744..3347879a4a5d 100644 --- a/drivers/net/ethernet/google/gve/gve_rx.c +++ b/drivers/net/ethernet/google/gve/gve_rx.c @@ -456,7 +456,7 @@ static bool gve_rx(struct gve_rx_ring *rx, struct gve_rx_desc *rx_desc, return true; } -static bool gve_rx_work_pending(struct gve_rx_ring *rx) +bool gve_rx_work_pending(struct gve_rx_ring *rx) { struct gve_rx_desc *desc; __be16 flags_seq; @@ -524,8 +524,8 @@ static bool gve_rx_refill_buffers(struct gve_priv *priv, struct gve_rx_ring *rx) return true; } -bool gve_clean_rx_done(struct gve_rx_ring *rx, int budget, - netdev_features_t feat) +static int gve_clean_rx_done(struct gve_rx_ring *rx, int budget, + netdev_features_t feat) { struct gve_priv *priv = rx->gve; u32 work_done = 0, packets = 0; @@ -559,13 +559,15 @@ bool gve_clean_rx_done(struct gve_rx_ring *rx, int budget, } if (!work_done && rx->fill_cnt - cnt > rx->db_threshold) - return false; + return 0; - u64_stats_update_begin(&rx->statss); - rx->rpackets += packets; - rx->rbytes += bytes; - u64_stats_update_end(&rx->statss); - rx->cnt = cnt; + if (work_done) { + u64_stats_update_begin(&rx->statss); + rx->rpackets += packets; + rx->rbytes += bytes; + u64_stats_update_end(&rx->statss); + rx->cnt = cnt; + } /* restock ring slots */ if (!rx->data.raw_addressing) { @@ -576,26 +578,26 @@ bool gve_clean_rx_done(struct gve_rx_ring *rx, int budget, * falls below a threshold. */ if (!gve_rx_refill_buffers(priv, rx)) - return false; + return 0; /* If we were not able to completely refill buffers, we'll want * to schedule this queue for work again to refill buffers. */ if (rx->fill_cnt - cnt <= rx->db_threshold) { gve_rx_write_doorbell(priv, rx); - return true; + return budget; } } gve_rx_write_doorbell(priv, rx); - return gve_rx_work_pending(rx); + return work_done; } -bool gve_rx_poll(struct gve_notify_block *block, int budget) +int gve_rx_poll(struct gve_notify_block *block, int budget) { struct gve_rx_ring *rx = block->rx; netdev_features_t feat; - bool repoll = false; + int work_done = 0; feat = block->napi.dev->features; @@ -604,8 +606,7 @@ bool gve_rx_poll(struct gve_notify_block *block, int budget) budget = INT_MAX; if (budget > 0) - repoll |= gve_clean_rx_done(rx, budget, feat); - else - repoll |= gve_rx_work_pending(rx); - return repoll; + work_done = gve_clean_rx_done(rx, budget, feat); + + return work_done; } From 58401b2a46e7d5090873e7725ac786685475d638 Mon Sep 17 00:00:00 2001 From: Catherine Sullivan Date: Mon, 11 Oct 2021 08:36:45 -0700 Subject: [PATCH 092/440] gve: Add rx buffer pagecnt bias Add a pagecnt bias field to rx buffer info struct to eliminate needing to increment the atomic page ref count on every pass in the rx hotpath. Also prefetch two packet pages ahead. Fixes: ede3fcf5ec67f ("gve: Add support for raw addressing to the rx path") Signed-off-by: Yanchun Fu Signed-off-by: Nathan Lewis Signed-off-by: Catherine Sullivan Signed-off-by: David Awogbemila Signed-off-by: David S. Miller --- drivers/net/ethernet/google/gve/gve_rx.c | 52 +++++++++++++++++------- 1 file changed, 37 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c index 3347879a4a5d..41b21b527470 100644 --- a/drivers/net/ethernet/google/gve/gve_rx.c +++ b/drivers/net/ethernet/google/gve/gve_rx.c @@ -16,19 +16,23 @@ static void gve_rx_free_buffer(struct device *dev, dma_addr_t dma = (dma_addr_t)(be64_to_cpu(data_slot->addr) & GVE_DATA_SLOT_ADDR_PAGE_MASK); + page_ref_sub(page_info->page, page_info->pagecnt_bias - 1); gve_free_page(dev, page_info->page, dma, DMA_FROM_DEVICE); } static void gve_rx_unfill_pages(struct gve_priv *priv, struct gve_rx_ring *rx) { - if (rx->data.raw_addressing) { - u32 slots = rx->mask + 1; - int i; + u32 slots = rx->mask + 1; + int i; + if (rx->data.raw_addressing) { for (i = 0; i < slots; i++) gve_rx_free_buffer(&priv->pdev->dev, &rx->data.page_info[i], &rx->data.data_ring[i]); } else { + for (i = 0; i < slots; i++) + page_ref_sub(rx->data.page_info[i].page, + rx->data.page_info[i].pagecnt_bias - 1); gve_unassign_qpl(priv, rx->data.qpl->id); rx->data.qpl = NULL; } @@ -69,6 +73,9 @@ static void gve_setup_rx_buffer(struct gve_rx_slot_page_info *page_info, page_info->page_offset = 0; page_info->page_address = page_address(page); *slot_addr = cpu_to_be64(addr); + /* The page already has 1 ref */ + page_ref_add(page, INT_MAX - 1); + page_info->pagecnt_bias = INT_MAX; } static int gve_rx_alloc_buffer(struct gve_priv *priv, struct device *dev, @@ -299,17 +306,18 @@ static bool gve_rx_can_flip_buffers(struct net_device *netdev) ? netdev->mtu + GVE_RX_PAD + ETH_HLEN <= PAGE_SIZE / 2 : false; } -static int gve_rx_can_recycle_buffer(struct page *page) +static int gve_rx_can_recycle_buffer(struct gve_rx_slot_page_info *page_info) { - int pagecount = page_count(page); + int pagecount = page_count(page_info->page); /* This page is not being used by any SKBs - reuse */ - if (pagecount == 1) + if (pagecount == page_info->pagecnt_bias) return 1; /* This page is still being used by an SKB - we can't reuse */ - else if (pagecount >= 2) + else if (pagecount > page_info->pagecnt_bias) return 0; - WARN(pagecount < 1, "Pagecount should never be < 1"); + WARN(pagecount < page_info->pagecnt_bias, + "Pagecount should never be less than the bias."); return -1; } @@ -325,11 +333,11 @@ gve_rx_raw_addressing(struct device *dev, struct net_device *netdev, if (!skb) return NULL; - /* Optimistically stop the kernel from freeing the page by increasing - * the page bias. We will check the refcount in refill to determine if - * we need to alloc a new page. + /* Optimistically stop the kernel from freeing the page. + * We will check again in refill to determine if we need to alloc a + * new page. */ - get_page(page_info->page); + gve_dec_pagecnt_bias(page_info); return skb; } @@ -352,7 +360,7 @@ gve_rx_qpl(struct device *dev, struct net_device *netdev, /* No point in recycling if we didn't get the skb */ if (skb) { /* Make sure that the page isn't freed. */ - get_page(page_info->page); + gve_dec_pagecnt_bias(page_info); gve_rx_flip_buff(page_info, &data_slot->qpl_offset); } } else { @@ -376,8 +384,18 @@ static bool gve_rx(struct gve_rx_ring *rx, struct gve_rx_desc *rx_desc, union gve_rx_data_slot *data_slot; struct sk_buff *skb = NULL; dma_addr_t page_bus; + void *va; u16 len; + /* Prefetch two packet pages ahead, we will need it soon. */ + page_info = &rx->data.page_info[(idx + 2) & rx->mask]; + va = page_info->page_address + GVE_RX_PAD + + page_info->page_offset; + + prefetch(page_info->page); /* Kernel page struct. */ + prefetch(va); /* Packet header. */ + prefetch(va + 64); /* Next cacheline too. */ + /* drop this packet */ if (unlikely(rx_desc->flags_seq & GVE_RXF_ERR)) { u64_stats_update_begin(&rx->statss); @@ -408,7 +426,7 @@ static bool gve_rx(struct gve_rx_ring *rx, struct gve_rx_desc *rx_desc, int recycle = 0; if (can_flip) { - recycle = gve_rx_can_recycle_buffer(page_info->page); + recycle = gve_rx_can_recycle_buffer(page_info); if (recycle < 0) { if (!rx->data.raw_addressing) gve_schedule_reset(priv); @@ -499,7 +517,7 @@ static bool gve_rx_refill_buffers(struct gve_priv *priv, struct gve_rx_ring *rx) * owns half the page it is impossible to tell which half. Either * the whole page is free or it needs to be replaced. */ - int recycle = gve_rx_can_recycle_buffer(page_info->page); + int recycle = gve_rx_can_recycle_buffer(page_info); if (recycle < 0) { if (!rx->data.raw_addressing) @@ -546,6 +564,10 @@ static int gve_clean_rx_done(struct gve_rx_ring *rx, int budget, "[%d] seqno=%d rx->desc.seqno=%d\n", rx->q_num, GVE_SEQNO(desc->flags_seq), rx->desc.seqno); + + /* prefetch two descriptors ahead */ + prefetch(rx->desc.desc_ring + ((cnt + 2) & rx->mask)); + dropped = !gve_rx(rx, desc, feat, idx); if (!dropped) { bytes += be16_to_cpu(desc->len) - GVE_RX_PAD; From 61d72c7e486b63340f1fadcf3bed4cae98f03d9b Mon Sep 17 00:00:00 2001 From: Tao Liu Date: Mon, 11 Oct 2021 08:36:46 -0700 Subject: [PATCH 093/440] gve: Do lazy cleanup in TX path When TX queue is full, attemt to process enough TX completions to avoid stalling the queue. Fixes: f5cedc84a30d2 ("gve: Add transmit and receive support") Signed-off-by: Tao Liu Signed-off-by: Catherine Sullivan Signed-off-by: David S. Miller --- drivers/net/ethernet/google/gve/gve.h | 9 +- drivers/net/ethernet/google/gve/gve_ethtool.c | 3 +- drivers/net/ethernet/google/gve/gve_main.c | 6 +- drivers/net/ethernet/google/gve/gve_tx.c | 92 +++++++++++-------- 4 files changed, 61 insertions(+), 49 deletions(-) diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h index 4abd53bdde73..3de561e22659 100644 --- a/drivers/net/ethernet/google/gve/gve.h +++ b/drivers/net/ethernet/google/gve/gve.h @@ -341,8 +341,8 @@ struct gve_tx_ring { union { /* GQI fields */ struct { - /* NIC tail pointer */ - __be32 last_nic_done; + /* Spinlock for when cleanup in progress */ + spinlock_t clean_lock; }; /* DQO fields. */ @@ -821,8 +821,9 @@ netdev_tx_t gve_tx(struct sk_buff *skb, struct net_device *dev); bool gve_tx_poll(struct gve_notify_block *block, int budget); int gve_tx_alloc_rings(struct gve_priv *priv); void gve_tx_free_rings_gqi(struct gve_priv *priv); -__be32 gve_tx_load_event_counter(struct gve_priv *priv, - struct gve_tx_ring *tx); +u32 gve_tx_load_event_counter(struct gve_priv *priv, + struct gve_tx_ring *tx); +bool gve_tx_clean_pending(struct gve_priv *priv, struct gve_tx_ring *tx); /* rx handling */ void gve_rx_write_doorbell(struct gve_priv *priv, struct gve_rx_ring *rx); int gve_rx_poll(struct gve_notify_block *block, int budget); diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c index 716e6240305d..618a3e1d858e 100644 --- a/drivers/net/ethernet/google/gve/gve_ethtool.c +++ b/drivers/net/ethernet/google/gve/gve_ethtool.c @@ -330,8 +330,7 @@ gve_get_ethtool_stats(struct net_device *netdev, data[i++] = tmp_tx_bytes; data[i++] = tx->wake_queue; data[i++] = tx->stop_queue; - data[i++] = be32_to_cpu(gve_tx_load_event_counter(priv, - tx)); + data[i++] = gve_tx_load_event_counter(priv, tx); data[i++] = tx->dma_mapping_error; /* stats from NIC */ if (skip_nic_stats) { diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index b41679ab0dbe..b6805ad2011b 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -212,13 +212,13 @@ static int gve_napi_poll(struct napi_struct *napi, int budget) irq_doorbell = gve_irq_doorbell(priv, block); iowrite32be(GVE_IRQ_ACK | GVE_IRQ_EVENT, irq_doorbell); - /* Double check we have no extra work. - * Ensure unmask synchronizes with checking for work. + /* Ensure IRQ ACK is visible before we check pending work. + * If queue had issued updates, it would be truly visible. */ mb(); if (block->tx) - reschedule |= gve_tx_poll(block, -1); + reschedule |= gve_tx_clean_pending(priv, block->tx); if (block->rx) reschedule |= gve_rx_work_pending(block->rx); diff --git a/drivers/net/ethernet/google/gve/gve_tx.c b/drivers/net/ethernet/google/gve/gve_tx.c index 9922ce46a635..a9cb241fedf4 100644 --- a/drivers/net/ethernet/google/gve/gve_tx.c +++ b/drivers/net/ethernet/google/gve/gve_tx.c @@ -144,7 +144,7 @@ static void gve_tx_free_ring(struct gve_priv *priv, int idx) gve_tx_remove_from_block(priv, idx); slots = tx->mask + 1; - gve_clean_tx_done(priv, tx, tx->req, false); + gve_clean_tx_done(priv, tx, priv->tx_desc_cnt, false); netdev_tx_reset_queue(tx->netdev_txq); dma_free_coherent(hdev, sizeof(*tx->q_resources), @@ -176,6 +176,7 @@ static int gve_tx_alloc_ring(struct gve_priv *priv, int idx) /* Make sure everything is zeroed to start */ memset(tx, 0, sizeof(*tx)); + spin_lock_init(&tx->clean_lock); tx->q_num = idx; tx->mask = slots - 1; @@ -328,10 +329,16 @@ static inline bool gve_can_tx(struct gve_tx_ring *tx, int bytes_required) return (gve_tx_avail(tx) >= MAX_TX_DESC_NEEDED && can_alloc); } +static_assert(NAPI_POLL_WEIGHT >= MAX_TX_DESC_NEEDED); + /* Stops the queue if the skb cannot be transmitted. */ -static int gve_maybe_stop_tx(struct gve_tx_ring *tx, struct sk_buff *skb) +static int gve_maybe_stop_tx(struct gve_priv *priv, struct gve_tx_ring *tx, + struct sk_buff *skb) { int bytes_required = 0; + u32 nic_done; + u32 to_do; + int ret; if (!tx->raw_addressing) bytes_required = gve_skb_fifo_bytes_required(tx, skb); @@ -339,29 +346,28 @@ static int gve_maybe_stop_tx(struct gve_tx_ring *tx, struct sk_buff *skb) if (likely(gve_can_tx(tx, bytes_required))) return 0; - /* No space, so stop the queue */ - tx->stop_queue++; - netif_tx_stop_queue(tx->netdev_txq); - smp_mb(); /* sync with restarting queue in gve_clean_tx_done() */ + ret = -EBUSY; + spin_lock(&tx->clean_lock); + nic_done = gve_tx_load_event_counter(priv, tx); + to_do = nic_done - tx->done; - /* Now check for resources again, in case gve_clean_tx_done() freed - * resources after we checked and we stopped the queue after - * gve_clean_tx_done() checked. - * - * gve_maybe_stop_tx() gve_clean_tx_done() - * nsegs/can_alloc test failed - * gve_tx_free_fifo() - * if (tx queue stopped) - * netif_tx_queue_wake() - * netif_tx_stop_queue() - * Need to check again for space here! - */ - if (likely(!gve_can_tx(tx, bytes_required))) - return -EBUSY; + /* Only try to clean if there is hope for TX */ + if (to_do + gve_tx_avail(tx) >= MAX_TX_DESC_NEEDED) { + if (to_do > 0) { + to_do = min_t(u32, to_do, NAPI_POLL_WEIGHT); + gve_clean_tx_done(priv, tx, to_do, false); + } + if (likely(gve_can_tx(tx, bytes_required))) + ret = 0; + } + if (ret) { + /* No space, so stop the queue */ + tx->stop_queue++; + netif_tx_stop_queue(tx->netdev_txq); + } + spin_unlock(&tx->clean_lock); - netif_tx_start_queue(tx->netdev_txq); - tx->wake_queue++; - return 0; + return ret; } static void gve_tx_fill_pkt_desc(union gve_tx_desc *pkt_desc, @@ -576,7 +582,7 @@ netdev_tx_t gve_tx(struct sk_buff *skb, struct net_device *dev) WARN(skb_get_queue_mapping(skb) >= priv->tx_cfg.num_queues, "skb queue index out of range"); tx = &priv->tx[skb_get_queue_mapping(skb)]; - if (unlikely(gve_maybe_stop_tx(tx, skb))) { + if (unlikely(gve_maybe_stop_tx(priv, tx, skb))) { /* We need to ring the txq doorbell -- we have stopped the Tx * queue for want of resources, but prior calls to gve_tx() * may have added descriptors without ringing the doorbell. @@ -672,19 +678,19 @@ static int gve_clean_tx_done(struct gve_priv *priv, struct gve_tx_ring *tx, return pkts; } -__be32 gve_tx_load_event_counter(struct gve_priv *priv, - struct gve_tx_ring *tx) +u32 gve_tx_load_event_counter(struct gve_priv *priv, + struct gve_tx_ring *tx) { - u32 counter_index = be32_to_cpu((tx->q_resources->counter_index)); + u32 counter_index = be32_to_cpu(tx->q_resources->counter_index); + __be32 counter = READ_ONCE(priv->counter_array[counter_index]); - return READ_ONCE(priv->counter_array[counter_index]); + return be32_to_cpu(counter); } bool gve_tx_poll(struct gve_notify_block *block, int budget) { struct gve_priv *priv = block->priv; struct gve_tx_ring *tx = block->tx; - bool repoll = false; u32 nic_done; u32 to_do; @@ -692,17 +698,23 @@ bool gve_tx_poll(struct gve_notify_block *block, int budget) if (budget == 0) budget = INT_MAX; + /* In TX path, it may try to clean completed pkts in order to xmit, + * to avoid cleaning conflict, use spin_lock(), it yields better + * concurrency between xmit/clean than netif's lock. + */ + spin_lock(&tx->clean_lock); /* Find out how much work there is to be done */ - tx->last_nic_done = gve_tx_load_event_counter(priv, tx); - nic_done = be32_to_cpu(tx->last_nic_done); - if (budget > 0) { - /* Do as much work as we have that the budget will - * allow - */ - to_do = min_t(u32, (nic_done - tx->done), budget); - gve_clean_tx_done(priv, tx, to_do, true); - } + nic_done = gve_tx_load_event_counter(priv, tx); + to_do = min_t(u32, (nic_done - tx->done), budget); + gve_clean_tx_done(priv, tx, to_do, true); + spin_unlock(&tx->clean_lock); /* If we still have work we want to repoll */ - repoll |= (nic_done != tx->done); - return repoll; + return nic_done != tx->done; +} + +bool gve_tx_clean_pending(struct gve_priv *priv, struct gve_tx_ring *tx) +{ + u32 nic_done = gve_tx_load_event_counter(priv, tx); + + return nic_done != tx->done; } From 87a7f321bb6a45e54b7d6c90d032ee5636a6ad97 Mon Sep 17 00:00:00 2001 From: John Fraker Date: Mon, 11 Oct 2021 08:36:47 -0700 Subject: [PATCH 094/440] gve: Recover from queue stall due to missed IRQ Don't always reset the driver on a TX timeout. Attempt to recover by kicking the queue in case an IRQ was missed. Fixes: 9e5f7d26a4c08 ("gve: Add workqueue and reset support") Signed-off-by: John Fraker Signed-off-by: David Awogbemila Signed-off-by: David S. Miller --- drivers/net/ethernet/google/gve/gve.h | 4 +- drivers/net/ethernet/google/gve/gve_adminq.h | 1 + drivers/net/ethernet/google/gve/gve_main.c | 48 +++++++++++++++++++- 3 files changed, 51 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h index 3de561e22659..51ed8fe71d2d 100644 --- a/drivers/net/ethernet/google/gve/gve.h +++ b/drivers/net/ethernet/google/gve/gve.h @@ -30,7 +30,7 @@ #define GVE_MIN_MSIX 3 /* Numbers of gve tx/rx stats in stats report. */ -#define GVE_TX_STATS_REPORT_NUM 5 +#define GVE_TX_STATS_REPORT_NUM 6 #define GVE_RX_STATS_REPORT_NUM 2 /* Interval to schedule a stats report update, 20000ms. */ @@ -413,7 +413,9 @@ struct gve_tx_ring { u32 q_num ____cacheline_aligned; /* queue idx */ u32 stop_queue; /* count of queue stops */ u32 wake_queue; /* count of queue wakes */ + u32 queue_timeout; /* count of queue timeouts */ u32 ntfy_id; /* notification block index */ + u32 last_kick_msec; /* Last time the queue was kicked */ dma_addr_t bus; /* dma address of the descr ring */ dma_addr_t q_resources_bus; /* dma address of the queue resources */ dma_addr_t complq_bus_dqo; /* dma address of the dqo.compl_ring */ diff --git a/drivers/net/ethernet/google/gve/gve_adminq.h b/drivers/net/ethernet/google/gve/gve_adminq.h index 47c3d8f313fc..3953f6f7a427 100644 --- a/drivers/net/ethernet/google/gve/gve_adminq.h +++ b/drivers/net/ethernet/google/gve/gve_adminq.h @@ -270,6 +270,7 @@ enum gve_stat_names { TX_LAST_COMPLETION_PROCESSED = 5, RX_NEXT_EXPECTED_SEQUENCE = 6, RX_BUFFERS_POSTED = 7, + TX_TIMEOUT_CNT = 8, // stats from NIC RX_QUEUE_DROP_CNT = 65, RX_NO_BUFFERS_POSTED = 66, diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index b6805ad2011b..7647cd05b1d2 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -24,6 +24,9 @@ #define GVE_VERSION "1.0.0" #define GVE_VERSION_PREFIX "GVE-" +// Minimum amount of time between queue kicks in msec (10 seconds) +#define MIN_TX_TIMEOUT_GAP (1000 * 10) + const char gve_version_str[] = GVE_VERSION; static const char gve_version_prefix[] = GVE_VERSION_PREFIX; @@ -1121,9 +1124,47 @@ static void gve_turnup(struct gve_priv *priv) static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue) { - struct gve_priv *priv = netdev_priv(dev); + struct gve_notify_block *block; + struct gve_tx_ring *tx = NULL; + struct gve_priv *priv; + u32 last_nic_done; + u32 current_time; + u32 ntfy_idx; + netdev_info(dev, "Timeout on tx queue, %d", txqueue); + priv = netdev_priv(dev); + if (txqueue > priv->tx_cfg.num_queues) + goto reset; + + ntfy_idx = gve_tx_idx_to_ntfy(priv, txqueue); + if (ntfy_idx > priv->num_ntfy_blks) + goto reset; + + block = &priv->ntfy_blocks[ntfy_idx]; + tx = block->tx; + + current_time = jiffies_to_msecs(jiffies); + if (tx->last_kick_msec + MIN_TX_TIMEOUT_GAP > current_time) + goto reset; + + /* Check to see if there are missed completions, which will allow us to + * kick the queue. + */ + last_nic_done = gve_tx_load_event_counter(priv, tx); + if (last_nic_done - tx->done) { + netdev_info(dev, "Kicking queue %d", txqueue); + iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block)); + napi_schedule(&block->napi); + tx->last_kick_msec = current_time; + goto out; + } // Else reset. + +reset: gve_schedule_reset(priv); + +out: + if (tx) + tx->queue_timeout++; priv->tx_timeo_cnt++; } @@ -1252,6 +1293,11 @@ void gve_handle_report_stats(struct gve_priv *priv) .value = cpu_to_be64(last_completion), .queue_id = cpu_to_be32(idx), }; + stats[stats_idx++] = (struct stats) { + .stat_name = cpu_to_be32(TX_TIMEOUT_CNT), + .value = cpu_to_be64(priv->tx[idx].queue_timeout), + .queue_id = cpu_to_be32(idx), + }; } } /* rx stats */ From 4edf8249bcd1c554b1a34166ca4ea5630031ecf5 Mon Sep 17 00:00:00 2001 From: Catherine Sullivan Date: Mon, 11 Oct 2021 08:36:48 -0700 Subject: [PATCH 095/440] gve: Add netif_set_xps_queue call Configure XPS when adding tx queues to the notification blocks. Fixes: dbdaa67540512 ("gve: Move some static functions to a common file") Signed-off-by: Catherine Sullivan Signed-off-by: David Awogbemila Signed-off-by: David S. Miller --- drivers/net/ethernet/google/gve/gve_utils.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/google/gve/gve_utils.c b/drivers/net/ethernet/google/gve/gve_utils.c index 93f3dcbeeea9..45ff7a9ab5f9 100644 --- a/drivers/net/ethernet/google/gve/gve_utils.c +++ b/drivers/net/ethernet/google/gve/gve_utils.c @@ -18,12 +18,16 @@ void gve_tx_remove_from_block(struct gve_priv *priv, int queue_idx) void gve_tx_add_to_block(struct gve_priv *priv, int queue_idx) { + unsigned int active_cpus = min_t(int, priv->num_ntfy_blks / 2, + num_online_cpus()); int ntfy_idx = gve_tx_idx_to_ntfy(priv, queue_idx); struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; struct gve_tx_ring *tx = &priv->tx[queue_idx]; block->tx = tx; tx->ntfy_id = ntfy_idx; + netif_set_xps_queue(priv->dev, get_cpu_mask(ntfy_idx % active_cpus), + queue_idx); } void gve_rx_remove_from_block(struct gve_priv *priv, int queue_idx) From ea5d3455adf1f3d47130604e6d353b23c3ee4a50 Mon Sep 17 00:00:00 2001 From: Jordan Kim Date: Mon, 11 Oct 2021 08:36:49 -0700 Subject: [PATCH 096/440] gve: Allow pageflips on larger pages Half pages are just used for small enough packets. This change allows this to also apply for systems with pages larger than 4 KB. Fixes: 02b0e0c18ba75 ("gve: Rx Buffer Recycling") Signed-off-by: Jordan Kim Signed-off-by: Jeroen de Borst Signed-off-by: David S. Miller --- drivers/net/ethernet/google/gve/gve_rx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c index 41b21b527470..98ba981cd534 100644 --- a/drivers/net/ethernet/google/gve/gve_rx.c +++ b/drivers/net/ethernet/google/gve/gve_rx.c @@ -302,7 +302,7 @@ static void gve_rx_flip_buff(struct gve_rx_slot_page_info *page_info, __be64 *sl static bool gve_rx_can_flip_buffers(struct net_device *netdev) { - return PAGE_SIZE == 4096 + return PAGE_SIZE >= 4096 ? netdev->mtu + GVE_RX_PAD + ETH_HLEN <= PAGE_SIZE / 2 : false; } From 1b4d1c9bab091ac6e20a3ff80c30c5cefe192bf4 Mon Sep 17 00:00:00 2001 From: Catherine Sullivan Date: Mon, 11 Oct 2021 08:36:50 -0700 Subject: [PATCH 097/440] gve: Track RX buffer allocation failures The rx_buf_alloc_fail counter wasn't getting updated. Fixes: 433e274b8f7b0 ("gve: Add stats for gve.") Signed-off-by: Catherine Sullivan Signed-off-by: Jeroen de Borst Signed-off-by: David S. Miller --- drivers/net/ethernet/google/gve/gve_rx.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c index 98ba981cd534..95bc4d8a1811 100644 --- a/drivers/net/ethernet/google/gve/gve_rx.c +++ b/drivers/net/ethernet/google/gve/gve_rx.c @@ -532,8 +532,13 @@ static bool gve_rx_refill_buffers(struct gve_priv *priv, struct gve_rx_ring *rx) gve_rx_free_buffer(dev, page_info, data_slot); page_info->page = NULL; - if (gve_rx_alloc_buffer(priv, dev, page_info, data_slot)) + if (gve_rx_alloc_buffer(priv, dev, page_info, + data_slot)) { + u64_stats_update_begin(&rx->statss); + rx->rx_buf_alloc_fail++; + u64_stats_update_end(&rx->statss); break; + } } } fill_cnt++; From 5b25a5bf5e047745c598d55833ada8889af28989 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 11 Oct 2021 15:38:29 +0200 Subject: [PATCH 098/440] nfc: drop unneeded debug prints ftrace is a preferred and standard way to debug entering and exiting functions so drop useless debug prints. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Jakub Kicinski --- net/nfc/hci/command.c | 16 ---------------- net/nfc/hci/llc_shdlc.c | 12 ------------ net/nfc/llcp_commands.c | 8 -------- net/nfc/llcp_core.c | 5 +---- net/nfc/nci/core.c | 4 ---- net/nfc/nci/hci.c | 4 ---- net/nfc/nci/ntf.c | 9 --------- 7 files changed, 1 insertion(+), 57 deletions(-) diff --git a/net/nfc/hci/command.c b/net/nfc/hci/command.c index 3a89bd9b89fc..af6bacb3ba98 100644 --- a/net/nfc/hci/command.c +++ b/net/nfc/hci/command.c @@ -114,8 +114,6 @@ int nfc_hci_send_cmd(struct nfc_hci_dev *hdev, u8 gate, u8 cmd, { u8 pipe; - pr_debug("\n"); - pipe = hdev->gate2pipe[gate]; if (pipe == NFC_HCI_INVALID_PIPE) return -EADDRNOTAVAIL; @@ -130,8 +128,6 @@ int nfc_hci_send_cmd_async(struct nfc_hci_dev *hdev, u8 gate, u8 cmd, { u8 pipe; - pr_debug("\n"); - pipe = hdev->gate2pipe[gate]; if (pipe == NFC_HCI_INVALID_PIPE) return -EADDRNOTAVAIL; @@ -205,8 +201,6 @@ static int nfc_hci_open_pipe(struct nfc_hci_dev *hdev, u8 pipe) static int nfc_hci_close_pipe(struct nfc_hci_dev *hdev, u8 pipe) { - pr_debug("\n"); - return nfc_hci_execute_cmd(hdev, pipe, NFC_HCI_ANY_CLOSE_PIPE, NULL, 0, NULL); } @@ -242,8 +236,6 @@ static u8 nfc_hci_create_pipe(struct nfc_hci_dev *hdev, u8 dest_host, static int nfc_hci_delete_pipe(struct nfc_hci_dev *hdev, u8 pipe) { - pr_debug("\n"); - return nfc_hci_execute_cmd(hdev, NFC_HCI_ADMIN_PIPE, NFC_HCI_ADM_DELETE_PIPE, &pipe, 1, NULL); } @@ -256,8 +248,6 @@ static int nfc_hci_clear_all_pipes(struct nfc_hci_dev *hdev) /* TODO: Find out what the identity reference data is * and fill param with it. HCI spec 6.1.3.5 */ - pr_debug("\n"); - if (test_bit(NFC_HCI_QUIRK_SHORT_CLEAR, &hdev->quirks)) param_len = 0; @@ -271,8 +261,6 @@ int nfc_hci_disconnect_gate(struct nfc_hci_dev *hdev, u8 gate) int r; u8 pipe = hdev->gate2pipe[gate]; - pr_debug("\n"); - if (pipe == NFC_HCI_INVALID_PIPE) return -EADDRNOTAVAIL; @@ -296,8 +284,6 @@ int nfc_hci_disconnect_all_gates(struct nfc_hci_dev *hdev) { int r; - pr_debug("\n"); - r = nfc_hci_clear_all_pipes(hdev); if (r < 0) return r; @@ -314,8 +300,6 @@ int nfc_hci_connect_gate(struct nfc_hci_dev *hdev, u8 dest_host, u8 dest_gate, bool pipe_created = false; int r; - pr_debug("\n"); - if (pipe == NFC_HCI_DO_NOT_CREATE_PIPE) return 0; diff --git a/net/nfc/hci/llc_shdlc.c b/net/nfc/hci/llc_shdlc.c index 71e10347e6a9..e90f70385813 100644 --- a/net/nfc/hci/llc_shdlc.c +++ b/net/nfc/hci/llc_shdlc.c @@ -363,8 +363,6 @@ static int llc_shdlc_connect_initiate(const struct llc_shdlc *shdlc) { struct sk_buff *skb; - pr_debug("\n"); - skb = llc_shdlc_alloc_skb(shdlc, 2); if (skb == NULL) return -ENOMEM; @@ -379,8 +377,6 @@ static int llc_shdlc_connect_send_ua(const struct llc_shdlc *shdlc) { struct sk_buff *skb; - pr_debug("\n"); - skb = llc_shdlc_alloc_skb(shdlc, 0); if (skb == NULL) return -ENOMEM; @@ -570,8 +566,6 @@ static void llc_shdlc_connect_timeout(struct timer_list *t) { struct llc_shdlc *shdlc = from_timer(shdlc, t, connect_timer); - pr_debug("\n"); - schedule_work(&shdlc->sm_work); } @@ -598,8 +592,6 @@ static void llc_shdlc_sm_work(struct work_struct *work) struct llc_shdlc *shdlc = container_of(work, struct llc_shdlc, sm_work); int r; - pr_debug("\n"); - mutex_lock(&shdlc->state_mutex); switch (shdlc->state) { @@ -681,8 +673,6 @@ static int llc_shdlc_connect(struct llc_shdlc *shdlc) { DECLARE_WAIT_QUEUE_HEAD_ONSTACK(connect_wq); - pr_debug("\n"); - mutex_lock(&shdlc->state_mutex); shdlc->state = SHDLC_CONNECTING; @@ -701,8 +691,6 @@ static int llc_shdlc_connect(struct llc_shdlc *shdlc) static void llc_shdlc_disconnect(struct llc_shdlc *shdlc) { - pr_debug("\n"); - mutex_lock(&shdlc->state_mutex); shdlc->state = SHDLC_DISCONNECTED; diff --git a/net/nfc/llcp_commands.c b/net/nfc/llcp_commands.c index 3c4172a5aeb5..41e3a20c8935 100644 --- a/net/nfc/llcp_commands.c +++ b/net/nfc/llcp_commands.c @@ -337,8 +337,6 @@ int nfc_llcp_send_disconnect(struct nfc_llcp_sock *sock) struct nfc_dev *dev; struct nfc_llcp_local *local; - pr_debug("Sending DISC\n"); - local = sock->local; if (local == NULL) return -ENODEV; @@ -362,8 +360,6 @@ int nfc_llcp_send_symm(struct nfc_dev *dev) struct nfc_llcp_local *local; u16 size = 0; - pr_debug("Sending SYMM\n"); - local = nfc_llcp_find_local(dev); if (local == NULL) return -ENODEV; @@ -399,8 +395,6 @@ int nfc_llcp_send_connect(struct nfc_llcp_sock *sock) u16 size = 0; __be16 miux; - pr_debug("Sending CONNECT\n"); - local = sock->local; if (local == NULL) return -ENODEV; @@ -475,8 +469,6 @@ int nfc_llcp_send_cc(struct nfc_llcp_sock *sock) u16 size = 0; __be16 miux; - pr_debug("Sending CC\n"); - local = sock->local; if (local == NULL) return -ENODEV; diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c index eaeb2b1cfa6a..5ad5157aa9c5 100644 --- a/net/nfc/llcp_core.c +++ b/net/nfc/llcp_core.c @@ -45,8 +45,6 @@ static void nfc_llcp_socket_purge(struct nfc_llcp_sock *sock) struct nfc_llcp_local *local = sock->local; struct sk_buff *s, *tmp; - pr_debug("%p\n", &sock->sk); - skb_queue_purge(&sock->tx_queue); skb_queue_purge(&sock->tx_pending_queue); @@ -1505,9 +1503,8 @@ void nfc_llcp_recv(void *data, struct sk_buff *skb, int err) { struct nfc_llcp_local *local = (struct nfc_llcp_local *) data; - pr_debug("Received an LLCP PDU\n"); if (err < 0) { - pr_err("err %d\n", err); + pr_err("LLCP PDU receive err %d\n", err); return; } diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 82ab39d80726..6fd873aa86be 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -930,8 +930,6 @@ static void nci_deactivate_target(struct nfc_dev *nfc_dev, struct nci_dev *ndev = nfc_get_drvdata(nfc_dev); unsigned long nci_mode = NCI_DEACTIVATE_TYPE_IDLE_MODE; - pr_debug("entry\n"); - if (!ndev->target_active_prot) { pr_err("unable to deactivate target, no active target\n"); return; @@ -977,8 +975,6 @@ static int nci_dep_link_down(struct nfc_dev *nfc_dev) struct nci_dev *ndev = nfc_get_drvdata(nfc_dev); int rc; - pr_debug("entry\n"); - if (nfc_dev->rf_mode == NFC_RF_INITIATOR) { nci_deactivate_target(nfc_dev, NULL, NCI_DEACTIVATE_TYPE_IDLE_MODE); } else { diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c index e199912ee1e5..19703a649b5a 100644 --- a/net/nfc/nci/hci.c +++ b/net/nfc/nci/hci.c @@ -432,8 +432,6 @@ void nci_hci_data_received_cb(void *context, struct sk_buff *frag_skb; int msg_len; - pr_debug("\n"); - if (err) { nci_req_complete(ndev, err); return; @@ -547,8 +545,6 @@ static u8 nci_hci_create_pipe(struct nci_dev *ndev, u8 dest_host, static int nci_hci_delete_pipe(struct nci_dev *ndev, u8 pipe) { - pr_debug("\n"); - return nci_hci_send_cmd(ndev, NCI_HCI_ADMIN_GATE, NCI_HCI_ADM_DELETE_PIPE, &pipe, 1, NULL); } diff --git a/net/nfc/nci/ntf.c b/net/nfc/nci/ntf.c index c5eacaac41ae..282c51051dcc 100644 --- a/net/nfc/nci/ntf.c +++ b/net/nfc/nci/ntf.c @@ -738,8 +738,6 @@ static void nci_nfcee_discover_ntf_packet(struct nci_dev *ndev, const struct nci_nfcee_discover_ntf *nfcee_ntf = (struct nci_nfcee_discover_ntf *)skb->data; - pr_debug("\n"); - /* NFCForum NCI 9.2.1 HCI Network Specific Handling * If the NFCC supports the HCI Network, it SHALL return one, * and only one, NFCEE_DISCOVER_NTF with a Protocol type of @@ -751,12 +749,6 @@ static void nci_nfcee_discover_ntf_packet(struct nci_dev *ndev, nci_req_complete(ndev, status); } -static void nci_nfcee_action_ntf_packet(struct nci_dev *ndev, - const struct sk_buff *skb) -{ - pr_debug("\n"); -} - void nci_ntf_packet(struct nci_dev *ndev, struct sk_buff *skb) { __u16 ntf_opcode = nci_opcode(skb->data); @@ -813,7 +805,6 @@ void nci_ntf_packet(struct nci_dev *ndev, struct sk_buff *skb) break; case NCI_OP_RF_NFCEE_ACTION_NTF: - nci_nfcee_action_ntf_packet(ndev, skb); break; default: From f141cfe364ef58d5f34ac0bc28518383889add49 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 11 Oct 2021 15:38:30 +0200 Subject: [PATCH 099/440] nfc: nci: replace GPLv2 boilerplate with SPDX Replace standard GPLv2 license text with SPDX tag. Although the comment mentions GPLv2-only, it refers to the full license file which allows later GPL versions. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Jakub Kicinski --- net/nfc/nci/uart.c | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/net/nfc/nci/uart.c b/net/nfc/nci/uart.c index 502e7a3f8948..57500c262cc3 100644 --- a/net/nfc/nci/uart.c +++ b/net/nfc/nci/uart.c @@ -1,20 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) 2015, Marvell International Ltd. * - * This software file (the "File") is distributed by Marvell International - * Ltd. under the terms of the GNU General Public License Version 2, June 1991 - * (the "License"). You may use, redistribute and/or modify this File in - * accordance with the terms and conditions of the License, a copy of which - * is available on the worldwide web at - * http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt. - * - * THE FILE IS DISTRIBUTED AS-IS, WITHOUT WARRANTY OF ANY KIND, AND THE - * IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE - * ARE EXPRESSLY DISCLAIMED. The License provides additional details about - * this warranty disclaimer. - */ - -/* Inspired (hugely) by HCI LDISC implementation in Bluetooth. + * Inspired (hugely) by HCI LDISC implementation in Bluetooth. * * Copyright (C) 2000-2001 Qualcomm Incorporated * Copyright (C) 2002-2003 Maxim Krasnyansky From edfa5366ef424f46a20ab81d9927be2e8ff3f7ec Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 11 Oct 2021 15:38:31 +0200 Subject: [PATCH 100/440] nfc: s3fwrn5: simplify dereferencing pointer to struct device Simplify the code dereferencing several pointers to reach the struct device. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Jakub Kicinski --- drivers/nfc/s3fwrn5/firmware.c | 29 +++++++++++------------------ drivers/nfc/s3fwrn5/nci.c | 18 +++++++----------- 2 files changed, 18 insertions(+), 29 deletions(-) diff --git a/drivers/nfc/s3fwrn5/firmware.c b/drivers/nfc/s3fwrn5/firmware.c index 1af7a1e632cf..c20fdbac51c5 100644 --- a/drivers/nfc/s3fwrn5/firmware.c +++ b/drivers/nfc/s3fwrn5/firmware.c @@ -357,6 +357,7 @@ s3fwrn5_fw_is_custom(const struct s3fwrn5_fw_cmd_get_bootinfo_rsp *bootinfo) int s3fwrn5_fw_setup(struct s3fwrn5_fw_info *fw_info) { + struct device *dev = &fw_info->ndev->nfc_dev->dev; struct s3fwrn5_fw_cmd_get_bootinfo_rsp bootinfo; int ret; @@ -364,8 +365,7 @@ int s3fwrn5_fw_setup(struct s3fwrn5_fw_info *fw_info) ret = s3fwrn5_fw_get_bootinfo(fw_info, &bootinfo); if (ret < 0) { - dev_err(&fw_info->ndev->nfc_dev->dev, - "Failed to get bootinfo, ret=%02x\n", ret); + dev_err(dev, "Failed to get bootinfo, ret=%02x\n", ret); goto err; } @@ -373,8 +373,7 @@ int s3fwrn5_fw_setup(struct s3fwrn5_fw_info *fw_info) ret = s3fwrn5_fw_get_base_addr(&bootinfo, &fw_info->base_addr); if (ret < 0) { - dev_err(&fw_info->ndev->nfc_dev->dev, - "Unknown hardware version\n"); + dev_err(dev, "Unknown hardware version\n"); goto err; } @@ -409,6 +408,7 @@ bool s3fwrn5_fw_check_version(const struct s3fwrn5_fw_info *fw_info, u32 version int s3fwrn5_fw_download(struct s3fwrn5_fw_info *fw_info) { + struct device *dev = &fw_info->ndev->nfc_dev->dev; struct s3fwrn5_fw_image *fw = &fw_info->fw; u8 hash_data[SHA1_DIGEST_SIZE]; struct crypto_shash *tfm; @@ -421,8 +421,7 @@ int s3fwrn5_fw_download(struct s3fwrn5_fw_info *fw_info) tfm = crypto_alloc_shash("sha1", 0, 0); if (IS_ERR(tfm)) { - dev_err(&fw_info->ndev->nfc_dev->dev, - "Cannot allocate shash (code=%pe)\n", tfm); + dev_err(dev, "Cannot allocate shash (code=%pe)\n", tfm); return PTR_ERR(tfm); } @@ -430,21 +429,18 @@ int s3fwrn5_fw_download(struct s3fwrn5_fw_info *fw_info) crypto_free_shash(tfm); if (ret) { - dev_err(&fw_info->ndev->nfc_dev->dev, - "Cannot compute hash (code=%d)\n", ret); + dev_err(dev, "Cannot compute hash (code=%d)\n", ret); return ret; } /* Firmware update process */ - dev_info(&fw_info->ndev->nfc_dev->dev, - "Firmware update: %s\n", fw_info->fw_name); + dev_info(dev, "Firmware update: %s\n", fw_info->fw_name); ret = s3fwrn5_fw_enter_update_mode(fw_info, hash_data, SHA1_DIGEST_SIZE, fw_info->sig, fw_info->sig_size); if (ret < 0) { - dev_err(&fw_info->ndev->nfc_dev->dev, - "Unable to enter update mode\n"); + dev_err(dev, "Unable to enter update mode\n"); return ret; } @@ -452,21 +448,18 @@ int s3fwrn5_fw_download(struct s3fwrn5_fw_info *fw_info) ret = s3fwrn5_fw_update_sector(fw_info, fw_info->base_addr + off, fw->image + off); if (ret < 0) { - dev_err(&fw_info->ndev->nfc_dev->dev, - "Firmware update error (code=%d)\n", ret); + dev_err(dev, "Firmware update error (code=%d)\n", ret); return ret; } } ret = s3fwrn5_fw_complete_update_mode(fw_info); if (ret < 0) { - dev_err(&fw_info->ndev->nfc_dev->dev, - "Unable to complete update mode\n"); + dev_err(dev, "Unable to complete update mode\n"); return ret; } - dev_info(&fw_info->ndev->nfc_dev->dev, - "Firmware update: success\n"); + dev_info(dev, "Firmware update: success\n"); return ret; } diff --git a/drivers/nfc/s3fwrn5/nci.c b/drivers/nfc/s3fwrn5/nci.c index e374e670b36b..ca6828f55ba0 100644 --- a/drivers/nfc/s3fwrn5/nci.c +++ b/drivers/nfc/s3fwrn5/nci.c @@ -47,6 +47,7 @@ const struct nci_driver_ops s3fwrn5_nci_prop_ops[4] = { int s3fwrn5_nci_rf_configure(struct s3fwrn5_info *info, const char *fw_name) { + struct device *dev = &info->ndev->nfc_dev->dev; const struct firmware *fw; struct nci_prop_fw_cfg_cmd fw_cfg; struct nci_prop_set_rfreg_cmd set_rfreg; @@ -55,7 +56,7 @@ int s3fwrn5_nci_rf_configure(struct s3fwrn5_info *info, const char *fw_name) int i, len; int ret; - ret = request_firmware(&fw, fw_name, &info->ndev->nfc_dev->dev); + ret = request_firmware(&fw, fw_name, dev); if (ret < 0) return ret; @@ -77,13 +78,11 @@ int s3fwrn5_nci_rf_configure(struct s3fwrn5_info *info, const char *fw_name) /* Start rfreg configuration */ - dev_info(&info->ndev->nfc_dev->dev, - "rfreg configuration update: %s\n", fw_name); + dev_info(dev, "rfreg configuration update: %s\n", fw_name); ret = nci_prop_cmd(info->ndev, NCI_PROP_START_RFREG, 0, NULL); if (ret < 0) { - dev_err(&info->ndev->nfc_dev->dev, - "Unable to start rfreg update\n"); + dev_err(dev, "Unable to start rfreg update\n"); goto out; } @@ -97,8 +96,7 @@ int s3fwrn5_nci_rf_configure(struct s3fwrn5_info *info, const char *fw_name) ret = nci_prop_cmd(info->ndev, NCI_PROP_SET_RFREG, len+1, (__u8 *)&set_rfreg); if (ret < 0) { - dev_err(&info->ndev->nfc_dev->dev, - "rfreg update error (code=%d)\n", ret); + dev_err(dev, "rfreg update error (code=%d)\n", ret); goto out; } set_rfreg.index++; @@ -110,13 +108,11 @@ int s3fwrn5_nci_rf_configure(struct s3fwrn5_info *info, const char *fw_name) ret = nci_prop_cmd(info->ndev, NCI_PROP_STOP_RFREG, sizeof(stop_rfreg), (__u8 *)&stop_rfreg); if (ret < 0) { - dev_err(&info->ndev->nfc_dev->dev, - "Unable to stop rfreg update\n"); + dev_err(dev, "Unable to stop rfreg update\n"); goto out; } - dev_info(&info->ndev->nfc_dev->dev, - "rfreg configuration update: success\n"); + dev_info(dev, "rfreg configuration update: success\n"); out: release_firmware(fw); return ret; From 84910319fad4a511602236ea7b0c1c510132d895 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 11 Oct 2021 15:38:32 +0200 Subject: [PATCH 101/440] nfc: st-nci: drop unneeded debug prints ftrace is a preferred and standard way to debug entering and exiting functions so drop useless debug prints. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Jakub Kicinski --- drivers/nfc/st-nci/i2c.c | 4 ---- drivers/nfc/st-nci/ndlc.c | 4 ---- drivers/nfc/st-nci/se.c | 6 ------ drivers/nfc/st-nci/spi.c | 4 ---- 4 files changed, 18 deletions(-) diff --git a/drivers/nfc/st-nci/i2c.c b/drivers/nfc/st-nci/i2c.c index ccf6152ebb9f..cbd968f013c7 100644 --- a/drivers/nfc/st-nci/i2c.c +++ b/drivers/nfc/st-nci/i2c.c @@ -157,7 +157,6 @@ static int st_nci_i2c_read(struct st_nci_i2c_phy *phy, static irqreturn_t st_nci_irq_thread_fn(int irq, void *phy_id) { struct st_nci_i2c_phy *phy = phy_id; - struct i2c_client *client; struct sk_buff *skb = NULL; int r; @@ -166,9 +165,6 @@ static irqreturn_t st_nci_irq_thread_fn(int irq, void *phy_id) return IRQ_NONE; } - client = phy->i2c_dev; - dev_dbg(&client->dev, "IRQ\n"); - if (phy->ndlc->hard_fault) return IRQ_HANDLED; diff --git a/drivers/nfc/st-nci/ndlc.c b/drivers/nfc/st-nci/ndlc.c index e9dc313b333e..755460a73c0d 100644 --- a/drivers/nfc/st-nci/ndlc.c +++ b/drivers/nfc/st-nci/ndlc.c @@ -239,8 +239,6 @@ static void ndlc_t1_timeout(struct timer_list *t) { struct llt_ndlc *ndlc = from_timer(ndlc, t, t1_timer); - pr_debug("\n"); - schedule_work(&ndlc->sm_work); } @@ -248,8 +246,6 @@ static void ndlc_t2_timeout(struct timer_list *t) { struct llt_ndlc *ndlc = from_timer(ndlc, t, t2_timer); - pr_debug("\n"); - schedule_work(&ndlc->sm_work); } diff --git a/drivers/nfc/st-nci/se.c b/drivers/nfc/st-nci/se.c index 5fd89f72969d..7764b1a4c3cf 100644 --- a/drivers/nfc/st-nci/se.c +++ b/drivers/nfc/st-nci/se.c @@ -638,8 +638,6 @@ int st_nci_se_io(struct nci_dev *ndev, u32 se_idx, { struct st_nci_info *info = nci_get_drvdata(ndev); - pr_debug("\n"); - switch (se_idx) { case ST_NCI_ESE_HOST_ID: info->se_info.cb = cb; @@ -671,8 +669,6 @@ static void st_nci_se_wt_timeout(struct timer_list *t) u8 param = 0x01; struct st_nci_info *info = from_timer(info, t, se_info.bwi_timer); - pr_debug("\n"); - info->se_info.bwi_active = false; if (!info->se_info.xch_error) { @@ -692,8 +688,6 @@ static void st_nci_se_activation_timeout(struct timer_list *t) struct st_nci_info *info = from_timer(info, t, se_info.se_active_timer); - pr_debug("\n"); - info->se_info.se_active = false; complete(&info->se_info.req_completion); diff --git a/drivers/nfc/st-nci/spi.c b/drivers/nfc/st-nci/spi.c index 0875b773fb41..4e723992e74c 100644 --- a/drivers/nfc/st-nci/spi.c +++ b/drivers/nfc/st-nci/spi.c @@ -169,7 +169,6 @@ static int st_nci_spi_read(struct st_nci_spi_phy *phy, static irqreturn_t st_nci_irq_thread_fn(int irq, void *phy_id) { struct st_nci_spi_phy *phy = phy_id; - struct spi_device *dev; struct sk_buff *skb = NULL; int r; @@ -178,9 +177,6 @@ static irqreturn_t st_nci_irq_thread_fn(int irq, void *phy_id) return IRQ_NONE; } - dev = phy->spi_dev; - dev_dbg(&dev->dev, "IRQ\n"); - if (phy->ndlc->hard_fault) return IRQ_HANDLED; From e52cc2a625a6841cad9a8a1fb537a0125cead3a9 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 11 Oct 2021 15:38:33 +0200 Subject: [PATCH 102/440] nfc: st21nfca: drop unneeded debug prints ftrace is a preferred and standard way to debug entering and exiting functions so drop useless debug prints. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Jakub Kicinski --- drivers/nfc/st21nfca/i2c.c | 4 ---- drivers/nfc/st21nfca/se.c | 4 ---- 2 files changed, 8 deletions(-) diff --git a/drivers/nfc/st21nfca/i2c.c b/drivers/nfc/st21nfca/i2c.c index 279d88128b2e..f126ce96a7df 100644 --- a/drivers/nfc/st21nfca/i2c.c +++ b/drivers/nfc/st21nfca/i2c.c @@ -421,7 +421,6 @@ static int st21nfca_hci_i2c_read(struct st21nfca_i2c_phy *phy, static irqreturn_t st21nfca_hci_irq_thread_fn(int irq, void *phy_id) { struct st21nfca_i2c_phy *phy = phy_id; - struct i2c_client *client; int r; @@ -430,9 +429,6 @@ static irqreturn_t st21nfca_hci_irq_thread_fn(int irq, void *phy_id) return IRQ_NONE; } - client = phy->i2c_dev; - dev_dbg(&client->dev, "IRQ\n"); - if (phy->hard_fault != 0) return IRQ_HANDLED; diff --git a/drivers/nfc/st21nfca/se.c b/drivers/nfc/st21nfca/se.c index c8bdf078d111..a43fc4117fa5 100644 --- a/drivers/nfc/st21nfca/se.c +++ b/drivers/nfc/st21nfca/se.c @@ -257,8 +257,6 @@ static void st21nfca_se_wt_timeout(struct timer_list *t) struct st21nfca_hci_info *info = from_timer(info, t, se_info.bwi_timer); - pr_debug("\n"); - info->se_info.bwi_active = false; if (!info->se_info.xch_error) { @@ -278,8 +276,6 @@ static void st21nfca_se_activation_timeout(struct timer_list *t) struct st21nfca_hci_info *info = from_timer(info, t, se_info.se_active_timer); - pr_debug("\n"); - info->se_info.se_active = false; complete(&info->se_info.req_completion); From f0563ebec68feefa741e6d76bbc8999563444c97 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 11 Oct 2021 15:38:34 +0200 Subject: [PATCH 103/440] nfc: trf7970a: drop unneeded debug prints ftrace is a preferred and standard way to debug entering and exiting functions so drop useless debug prints. Signed-off-by: Krzysztof Kozlowski Acked-by: Mark Greer Signed-off-by: Jakub Kicinski --- drivers/nfc/trf7970a.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/nfc/trf7970a.c b/drivers/nfc/trf7970a.c index 8890fcd59c39..29ca9c328df2 100644 --- a/drivers/nfc/trf7970a.c +++ b/drivers/nfc/trf7970a.c @@ -2170,8 +2170,6 @@ static int trf7970a_suspend(struct device *dev) struct spi_device *spi = to_spi_device(dev); struct trf7970a *trf = spi_get_drvdata(spi); - dev_dbg(dev, "Suspend\n"); - mutex_lock(&trf->lock); trf7970a_shutdown(trf); @@ -2187,8 +2185,6 @@ static int trf7970a_resume(struct device *dev) struct trf7970a *trf = spi_get_drvdata(spi); int ret; - dev_dbg(dev, "Resume\n"); - mutex_lock(&trf->lock); ret = trf7970a_startup(trf); @@ -2206,8 +2202,6 @@ static int trf7970a_pm_runtime_suspend(struct device *dev) struct trf7970a *trf = spi_get_drvdata(spi); int ret; - dev_dbg(dev, "Runtime suspend\n"); - mutex_lock(&trf->lock); ret = trf7970a_power_down(trf); @@ -2223,8 +2217,6 @@ static int trf7970a_pm_runtime_resume(struct device *dev) struct trf7970a *trf = spi_get_drvdata(spi); int ret; - dev_dbg(dev, "Runtime resume\n"); - ret = trf7970a_power_up(trf); if (!ret) pm_runtime_mark_last_busy(dev); From f41e137abd2546394b921168c11dfca563e114d1 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 11 Oct 2021 15:38:35 +0200 Subject: [PATCH 104/440] nfc: microread: drop unneeded debug prints ftrace is a preferred and standard way to debug entering and exiting functions so drop useless debug prints. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Jakub Kicinski --- drivers/nfc/microread/i2c.c | 4 ---- drivers/nfc/microread/mei.c | 2 -- 2 files changed, 6 deletions(-) diff --git a/drivers/nfc/microread/i2c.c b/drivers/nfc/microread/i2c.c index 86f593c73ed6..067295124eb9 100644 --- a/drivers/nfc/microread/i2c.c +++ b/drivers/nfc/microread/i2c.c @@ -237,8 +237,6 @@ static int microread_i2c_probe(struct i2c_client *client, struct microread_i2c_phy *phy; int r; - dev_dbg(&client->dev, "client %p\n", client); - phy = devm_kzalloc(&client->dev, sizeof(struct microread_i2c_phy), GFP_KERNEL); if (!phy) @@ -262,8 +260,6 @@ static int microread_i2c_probe(struct i2c_client *client, if (r < 0) goto err_irq; - nfc_info(&client->dev, "Probed\n"); - return 0; err_irq: diff --git a/drivers/nfc/microread/mei.c b/drivers/nfc/microread/mei.c index 00689e18dc46..e2a77a5fc887 100644 --- a/drivers/nfc/microread/mei.c +++ b/drivers/nfc/microread/mei.c @@ -23,8 +23,6 @@ static int microread_mei_probe(struct mei_cl_device *cldev, struct nfc_mei_phy *phy; int r; - pr_info("Probing NFC microread\n"); - phy = nfc_mei_phy_alloc(cldev); if (!phy) return -ENOMEM; From a34dda7284304046adaca2685777e86927c58317 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Sun, 10 Oct 2021 14:40:13 +0300 Subject: [PATCH 105/440] mlxsw: spectrum_qdisc: Pass extack to mlxsw_sp_qevent_entry_configure() This function will report a new failure in the following patches. Pass extack so that the failure is explicable. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_qdisc.c | 29 ++++++++++++------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c index 9958d503bf0e..14b87d672a9d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c @@ -1583,9 +1583,11 @@ static void mlxsw_sp_qevent_trap_deconfigure(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_qevent_span_deconfigure(mlxsw_sp, qevent_binding, mall_entry->trap.span_id); } -static int mlxsw_sp_qevent_entry_configure(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_mall_entry *mall_entry, - struct mlxsw_sp_qevent_binding *qevent_binding) +static int +mlxsw_sp_qevent_entry_configure(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_mall_entry *mall_entry, + struct mlxsw_sp_qevent_binding *qevent_binding, + struct netlink_ext_ack *extack) { switch (mall_entry->type) { case MLXSW_SP_MALL_ACTION_TYPE_MIRROR: @@ -1614,15 +1616,17 @@ static void mlxsw_sp_qevent_entry_deconfigure(struct mlxsw_sp *mlxsw_sp, } } -static int mlxsw_sp_qevent_binding_configure(struct mlxsw_sp_qevent_block *qevent_block, - struct mlxsw_sp_qevent_binding *qevent_binding) +static int +mlxsw_sp_qevent_binding_configure(struct mlxsw_sp_qevent_block *qevent_block, + struct mlxsw_sp_qevent_binding *qevent_binding, + struct netlink_ext_ack *extack) { struct mlxsw_sp_mall_entry *mall_entry; int err; list_for_each_entry(mall_entry, &qevent_block->mall_entry_list, list) { err = mlxsw_sp_qevent_entry_configure(qevent_block->mlxsw_sp, mall_entry, - qevent_binding); + qevent_binding, extack); if (err) goto err_entry_configure; } @@ -1646,13 +1650,17 @@ static void mlxsw_sp_qevent_binding_deconfigure(struct mlxsw_sp_qevent_block *qe qevent_binding); } -static int mlxsw_sp_qevent_block_configure(struct mlxsw_sp_qevent_block *qevent_block) +static int +mlxsw_sp_qevent_block_configure(struct mlxsw_sp_qevent_block *qevent_block, + struct netlink_ext_ack *extack) { struct mlxsw_sp_qevent_binding *qevent_binding; int err; list_for_each_entry(qevent_binding, &qevent_block->binding_list, list) { - err = mlxsw_sp_qevent_binding_configure(qevent_block, qevent_binding); + err = mlxsw_sp_qevent_binding_configure(qevent_block, + qevent_binding, + extack); if (err) goto err_binding_configure; } @@ -1737,7 +1745,7 @@ static int mlxsw_sp_qevent_mall_replace(struct mlxsw_sp *mlxsw_sp, list_add_tail(&mall_entry->list, &qevent_block->mall_entry_list); - err = mlxsw_sp_qevent_block_configure(qevent_block); + err = mlxsw_sp_qevent_block_configure(qevent_block, f->common.extack); if (err) goto err_block_configure; @@ -1911,7 +1919,8 @@ static int mlxsw_sp_setup_tc_block_qevent_bind(struct mlxsw_sp_port *mlxsw_sp_po goto err_binding_create; } - err = mlxsw_sp_qevent_binding_configure(qevent_block, qevent_binding); + err = mlxsw_sp_qevent_binding_configure(qevent_block, qevent_binding, + f->extack); if (err) goto err_binding_configure; From 0908e42ad9a5965eb89028f286c1056bf56978b7 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Sun, 10 Oct 2021 14:40:14 +0300 Subject: [PATCH 106/440] mlxsw: spectrum_qdisc: Distinguish between ingress and egress triggers The following patches will configure the MLXSW_SP_SPAN_TRIGGER_ECN mirroring trigger. This trigger is considered "egress", unlike the previously-offloaded _EARLY_DROP. Add a helper to spectrum_span, mlxsw_sp_span_trigger_is_ingress(), to classify triggers to ingress and egress. Pass result of this instead of hardcoding true when calling mlxsw_sp_span_analyzed_port_get()/_put(). Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_qdisc.c | 23 ++++++++++++------- .../ethernet/mellanox/mlxsw/spectrum_span.c | 16 +++++++++++++ .../ethernet/mellanox/mlxsw/spectrum_span.h | 1 + 3 files changed, 32 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c index 14b87d672a9d..3e3da5b909f5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c @@ -1482,8 +1482,10 @@ static int mlxsw_sp_qevent_span_configure(struct mlxsw_sp *mlxsw_sp, const struct mlxsw_sp_span_agent_parms *agent_parms, int *p_span_id) { + enum mlxsw_sp_span_trigger span_trigger = qevent_binding->span_trigger; struct mlxsw_sp_port *mlxsw_sp_port = qevent_binding->mlxsw_sp_port; struct mlxsw_sp_span_trigger_parms trigger_parms = {}; + bool ingress; int span_id; int err; @@ -1491,18 +1493,19 @@ static int mlxsw_sp_qevent_span_configure(struct mlxsw_sp *mlxsw_sp, if (err) return err; - err = mlxsw_sp_span_analyzed_port_get(mlxsw_sp_port, true); + ingress = mlxsw_sp_span_trigger_is_ingress(span_trigger); + err = mlxsw_sp_span_analyzed_port_get(mlxsw_sp_port, ingress); if (err) goto err_analyzed_port_get; trigger_parms.span_id = span_id; trigger_parms.probability_rate = 1; - err = mlxsw_sp_span_agent_bind(mlxsw_sp, qevent_binding->span_trigger, mlxsw_sp_port, + err = mlxsw_sp_span_agent_bind(mlxsw_sp, span_trigger, mlxsw_sp_port, &trigger_parms); if (err) goto err_agent_bind; - err = mlxsw_sp_span_trigger_enable(mlxsw_sp_port, qevent_binding->span_trigger, + err = mlxsw_sp_span_trigger_enable(mlxsw_sp_port, span_trigger, qevent_binding->tclass_num); if (err) goto err_trigger_enable; @@ -1511,10 +1514,10 @@ static int mlxsw_sp_qevent_span_configure(struct mlxsw_sp *mlxsw_sp, return 0; err_trigger_enable: - mlxsw_sp_span_agent_unbind(mlxsw_sp, qevent_binding->span_trigger, mlxsw_sp_port, + mlxsw_sp_span_agent_unbind(mlxsw_sp, span_trigger, mlxsw_sp_port, &trigger_parms); err_agent_bind: - mlxsw_sp_span_analyzed_port_put(mlxsw_sp_port, true); + mlxsw_sp_span_analyzed_port_put(mlxsw_sp_port, ingress); err_analyzed_port_get: mlxsw_sp_span_agent_put(mlxsw_sp, span_id); return err; @@ -1524,16 +1527,20 @@ static void mlxsw_sp_qevent_span_deconfigure(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_qevent_binding *qevent_binding, int span_id) { + enum mlxsw_sp_span_trigger span_trigger = qevent_binding->span_trigger; struct mlxsw_sp_port *mlxsw_sp_port = qevent_binding->mlxsw_sp_port; struct mlxsw_sp_span_trigger_parms trigger_parms = { .span_id = span_id, }; + bool ingress; - mlxsw_sp_span_trigger_disable(mlxsw_sp_port, qevent_binding->span_trigger, + ingress = mlxsw_sp_span_trigger_is_ingress(span_trigger); + + mlxsw_sp_span_trigger_disable(mlxsw_sp_port, span_trigger, qevent_binding->tclass_num); - mlxsw_sp_span_agent_unbind(mlxsw_sp, qevent_binding->span_trigger, mlxsw_sp_port, + mlxsw_sp_span_agent_unbind(mlxsw_sp, span_trigger, mlxsw_sp_port, &trigger_parms); - mlxsw_sp_span_analyzed_port_put(mlxsw_sp_port, true); + mlxsw_sp_span_analyzed_port_put(mlxsw_sp_port, ingress); mlxsw_sp_span_agent_put(mlxsw_sp, span_id); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c index 3398cc01e5ec..f5f819aa9a65 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c @@ -1650,6 +1650,22 @@ void mlxsw_sp_span_trigger_disable(struct mlxsw_sp_port *mlxsw_sp_port, return trigger_entry->ops->disable(trigger_entry, mlxsw_sp_port, tc); } +bool mlxsw_sp_span_trigger_is_ingress(enum mlxsw_sp_span_trigger trigger) +{ + switch (trigger) { + case MLXSW_SP_SPAN_TRIGGER_INGRESS: + case MLXSW_SP_SPAN_TRIGGER_EARLY_DROP: + case MLXSW_SP_SPAN_TRIGGER_TAIL_DROP: + return true; + case MLXSW_SP_SPAN_TRIGGER_EGRESS: + case MLXSW_SP_SPAN_TRIGGER_ECN: + return false; + } + + WARN_ON_ONCE(1); + return false; +} + static int mlxsw_sp1_span_init(struct mlxsw_sp *mlxsw_sp) { size_t arr_size = ARRAY_SIZE(mlxsw_sp1_span_entry_ops_arr); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h index efaefd1ae863..82e711afb02b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h @@ -120,6 +120,7 @@ int mlxsw_sp_span_trigger_enable(struct mlxsw_sp_port *mlxsw_sp_port, enum mlxsw_sp_span_trigger trigger, u8 tc); void mlxsw_sp_span_trigger_disable(struct mlxsw_sp_port *mlxsw_sp_port, enum mlxsw_sp_span_trigger trigger, u8 tc); +bool mlxsw_sp_span_trigger_is_ingress(enum mlxsw_sp_span_trigger trigger); extern const struct mlxsw_sp_span_ops mlxsw_sp1_span_ops; extern const struct mlxsw_sp_span_ops mlxsw_sp2_span_ops; From 099bf89d6a35086eee868f8357fb9d84a895b3c6 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Sun, 10 Oct 2021 14:40:15 +0300 Subject: [PATCH 107/440] mlxsw: spectrum_qdisc: Track permissible actions per binding One block can be bound to several qevents. The qevent type that the block is bound to determines which actions make sense in a given context. In the particular case of mlxsw, trap cannot be offloaded on a RED mark qevent, because the trap contract specifies that the packet is dropped in the HW datapath, and the HW trigger that the action is offloaded to is always forwarding the packet (in addition to marking in). Therefore keep track of which actions are permissible at each binding block. When an attempt is made to bind a certain action at a binding point where it is not supported, bounce the request. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_qdisc.c | 44 ++++++++++++++----- 1 file changed, 33 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c index 3e3da5b909f5..2dfc9e38307d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c @@ -1472,6 +1472,7 @@ struct mlxsw_sp_qevent_binding { u32 handle; int tclass_num; enum mlxsw_sp_span_trigger span_trigger; + unsigned int action_mask; }; static LIST_HEAD(mlxsw_sp_qevent_block_cb_list); @@ -1596,6 +1597,11 @@ mlxsw_sp_qevent_entry_configure(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_qevent_binding *qevent_binding, struct netlink_ext_ack *extack) { + if (!(BIT(mall_entry->type) & qevent_binding->action_mask)) { + NL_SET_ERR_MSG(extack, "Action not supported at this qevent"); + return -EOPNOTSUPP; + } + switch (mall_entry->type) { case MLXSW_SP_MALL_ACTION_TYPE_MIRROR: return mlxsw_sp_qevent_mirror_configure(mlxsw_sp, mall_entry, qevent_binding); @@ -1840,7 +1846,8 @@ static void mlxsw_sp_qevent_block_release(void *cb_priv) static struct mlxsw_sp_qevent_binding * mlxsw_sp_qevent_binding_create(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, int tclass_num, - enum mlxsw_sp_span_trigger span_trigger) + enum mlxsw_sp_span_trigger span_trigger, + unsigned int action_mask) { struct mlxsw_sp_qevent_binding *binding; @@ -1852,6 +1859,7 @@ mlxsw_sp_qevent_binding_create(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, binding->handle = handle; binding->tclass_num = tclass_num; binding->span_trigger = span_trigger; + binding->action_mask = action_mask; return binding; } @@ -1877,9 +1885,11 @@ mlxsw_sp_qevent_binding_lookup(struct mlxsw_sp_qevent_block *block, return NULL; } -static int mlxsw_sp_setup_tc_block_qevent_bind(struct mlxsw_sp_port *mlxsw_sp_port, - struct flow_block_offload *f, - enum mlxsw_sp_span_trigger span_trigger) +static int +mlxsw_sp_setup_tc_block_qevent_bind(struct mlxsw_sp_port *mlxsw_sp_port, + struct flow_block_offload *f, + enum mlxsw_sp_span_trigger span_trigger, + unsigned int action_mask) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; struct mlxsw_sp_qevent_binding *qevent_binding; @@ -1919,8 +1929,11 @@ static int mlxsw_sp_setup_tc_block_qevent_bind(struct mlxsw_sp_port *mlxsw_sp_po goto err_binding_exists; } - qevent_binding = mlxsw_sp_qevent_binding_create(mlxsw_sp_port, f->sch->handle, - qdisc->tclass_num, span_trigger); + qevent_binding = mlxsw_sp_qevent_binding_create(mlxsw_sp_port, + f->sch->handle, + qdisc->tclass_num, + span_trigger, + action_mask); if (IS_ERR(qevent_binding)) { err = PTR_ERR(qevent_binding); goto err_binding_create; @@ -1979,15 +1992,19 @@ static void mlxsw_sp_setup_tc_block_qevent_unbind(struct mlxsw_sp_port *mlxsw_sp } } -static int mlxsw_sp_setup_tc_block_qevent(struct mlxsw_sp_port *mlxsw_sp_port, - struct flow_block_offload *f, - enum mlxsw_sp_span_trigger span_trigger) +static int +mlxsw_sp_setup_tc_block_qevent(struct mlxsw_sp_port *mlxsw_sp_port, + struct flow_block_offload *f, + enum mlxsw_sp_span_trigger span_trigger, + unsigned int action_mask) { f->driver_block_list = &mlxsw_sp_qevent_block_cb_list; switch (f->command) { case FLOW_BLOCK_BIND: - return mlxsw_sp_setup_tc_block_qevent_bind(mlxsw_sp_port, f, span_trigger); + return mlxsw_sp_setup_tc_block_qevent_bind(mlxsw_sp_port, f, + span_trigger, + action_mask); case FLOW_BLOCK_UNBIND: mlxsw_sp_setup_tc_block_qevent_unbind(mlxsw_sp_port, f, span_trigger); return 0; @@ -1999,7 +2016,12 @@ static int mlxsw_sp_setup_tc_block_qevent(struct mlxsw_sp_port *mlxsw_sp_port, int mlxsw_sp_setup_tc_block_qevent_early_drop(struct mlxsw_sp_port *mlxsw_sp_port, struct flow_block_offload *f) { - return mlxsw_sp_setup_tc_block_qevent(mlxsw_sp_port, f, MLXSW_SP_SPAN_TRIGGER_EARLY_DROP); + unsigned int action_mask = BIT(MLXSW_SP_MALL_ACTION_TYPE_MIRROR) | + BIT(MLXSW_SP_MALL_ACTION_TYPE_TRAP); + + return mlxsw_sp_setup_tc_block_qevent(mlxsw_sp_port, f, + MLXSW_SP_SPAN_TRIGGER_EARLY_DROP, + action_mask); } int mlxsw_sp_tc_qdisc_init(struct mlxsw_sp_port *mlxsw_sp_port) From 9c18eaf2882d96f877bbfea795a9f5cdf63ac2ce Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Sun, 10 Oct 2021 14:40:16 +0300 Subject: [PATCH 108/440] mlxsw: spectrum_qdisc: Offload RED qevent mark The RED "mark" qevent can be offloaded under similar conditions as the RED "early_drop" qevent. Therefore recognize its binding type in the TC_SETUP_BLOCK handler and translate to the right SPAN trigger, with the right set of supported actions. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 2 ++ drivers/net/ethernet/mellanox/mlxsw/spectrum.h | 2 ++ drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c | 10 ++++++++++ 3 files changed, 14 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index d366786ce0f8..3c9844f2aa1d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -1035,6 +1035,8 @@ static int mlxsw_sp_setup_tc_block(struct mlxsw_sp_port *mlxsw_sp_port, return mlxsw_sp_setup_tc_block_clsact(mlxsw_sp_port, f, false); case FLOW_BLOCK_BINDER_TYPE_RED_EARLY_DROP: return mlxsw_sp_setup_tc_block_qevent_early_drop(mlxsw_sp_port, f); + case FLOW_BLOCK_BINDER_TYPE_RED_MARK: + return mlxsw_sp_setup_tc_block_qevent_mark(mlxsw_sp_port, f); default: return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 0ebbd9b04b89..aae1aed5345b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -1195,6 +1195,8 @@ int mlxsw_sp_setup_tc_fifo(struct mlxsw_sp_port *mlxsw_sp_port, struct tc_fifo_qopt_offload *p); int mlxsw_sp_setup_tc_block_qevent_early_drop(struct mlxsw_sp_port *mlxsw_sp_port, struct flow_block_offload *f); +int mlxsw_sp_setup_tc_block_qevent_mark(struct mlxsw_sp_port *mlxsw_sp_port, + struct flow_block_offload *f); /* spectrum_fid.c */ bool mlxsw_sp_fid_is_dummy(struct mlxsw_sp *mlxsw_sp, u16 fid_index); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c index 2dfc9e38307d..e367c63e72bf 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c @@ -2024,6 +2024,16 @@ int mlxsw_sp_setup_tc_block_qevent_early_drop(struct mlxsw_sp_port *mlxsw_sp_por action_mask); } +int mlxsw_sp_setup_tc_block_qevent_mark(struct mlxsw_sp_port *mlxsw_sp_port, + struct flow_block_offload *f) +{ + unsigned int action_mask = BIT(MLXSW_SP_MALL_ACTION_TYPE_MIRROR); + + return mlxsw_sp_setup_tc_block_qevent(mlxsw_sp_port, f, + MLXSW_SP_SPAN_TRIGGER_ECN, + action_mask); +} + int mlxsw_sp_tc_qdisc_init(struct mlxsw_sp_port *mlxsw_sp_port) { struct mlxsw_sp_qdisc_state *qdisc_state; From a703b5179b5c0740c69358b2cfb22c12545d13a4 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Sun, 10 Oct 2021 14:40:17 +0300 Subject: [PATCH 109/440] selftests: mlxsw: sch_red_core: Drop two unused variables These variables are cut'n'pasted from other functions in the file and not actually used. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh index 33ddd01689be..88053f8dfd12 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh @@ -551,10 +551,8 @@ do_drop_test() local trigger=$1; shift local subtest=$1; shift local fetch_counter=$1; shift - local backlog local base local now - local pct RET=0 From 0cd6fa99a076b79744d26bb2f4d089e1c4eae2e4 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Sun, 10 Oct 2021 14:40:18 +0300 Subject: [PATCH 110/440] selftests: mlxsw: RED: Add selftests for the mark qevent Add do_mark_test(), which is to do_ecn_test() like do_drop_test() is to do_red_test(): meant to test that actions on the RED mark qevent block are offloaded, and executed on ECN-marked packets. The test splits install_qdisc() into its constituents, install_root_qdisc() and install_qdisc_tcX(). This is in order to test that when mirroring is enabled on one TC, the other TC does not mirror. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../drivers/net/mlxsw/sch_red_core.sh | 74 +++++++++++++++++++ .../drivers/net/mlxsw/sch_red_ets.sh | 55 ++++++++++++-- 2 files changed, 123 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh index 88053f8dfd12..eea3e5ad3f38 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh @@ -544,6 +544,53 @@ do_mc_backlog_test() log_test "TC $((vlan - 10)): Qdisc reports MC backlog" } +do_mark_test() +{ + local vlan=$1; shift + local limit=$1; shift + local subtest=$1; shift + local fetch_counter=$1; shift + local should_fail=$1; shift + local base + + RET=0 + + start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) \ + $h3_mac tos=0x01 + + # Create a bit of a backlog and observe no mirroring due to marks. + qevent_rule_install_$subtest + + build_backlog $vlan $((2 * limit / 3)) tcp tos=0x01 >/dev/null + + base=$($fetch_counter) + count=$(busywait 1100 until_counter_is ">= $((base + 1))" \ + $fetch_counter) + check_fail $? "Spurious packets ($base -> $count) observed without buffer pressure" + + # Above limit, everything should be mirrored, we should see lots of + # packets. + build_backlog $vlan $((3 * limit / 2)) tcp tos=0x01 >/dev/null + busywait_for_counter 1100 +10000 \ + $fetch_counter > /dev/null + check_err_fail "$should_fail" $? "ECN-marked packets $subtest'd" + + # When the rule is uninstalled, there should be no mirroring. + qevent_rule_uninstall_$subtest + busywait_for_counter 1100 +10 \ + $fetch_counter > /dev/null + check_fail $? "Spurious packets observed after uninstall" + + if ((should_fail)); then + log_test "TC $((vlan - 10)): marked packets not $subtest'd" + else + log_test "TC $((vlan - 10)): marked packets $subtest'd" + fi + + stop_traffic + sleep 1 +} + do_drop_test() { local vlan=$1; shift @@ -626,6 +673,22 @@ do_drop_mirror_test() tc filter del dev $h2 ingress pref 1 handle 101 flower } +do_mark_mirror_test() +{ + local vlan=$1; shift + local limit=$1; shift + + tc filter add dev $h2 ingress pref 1 handle 101 prot ip \ + flower skip_sw ip_proto tcp \ + action drop + + do_mark_test "$vlan" "$limit" mirror \ + qevent_counter_fetch_mirror \ + $(: should_fail=)0 + + tc filter del dev $h2 ingress pref 1 handle 101 flower +} + qevent_rule_install_trap() { tc filter add block 10 pref 1234 handle 102 matchall skip_sw \ @@ -653,3 +716,14 @@ do_drop_trap_test() do_drop_test "$vlan" "$limit" "$trap_name" trap \ "qevent_counter_fetch_trap $trap_name" } + +qevent_rule_install_trap_fwd() +{ + tc filter add block 10 pref 1234 handle 102 matchall skip_sw \ + action trap_fwd hw_stats disabled +} + +qevent_rule_uninstall_trap_fwd() +{ + tc filter del block 10 pref 1234 handle 102 matchall +} diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh index f3ef3274f9b3..b58b4cf9dc13 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh @@ -9,6 +9,7 @@ ALL_TESTS=" mc_backlog_test red_mirror_test red_trap_test + ecn_mirror_test " : ${QDISC:=ets} source sch_red_core.sh @@ -21,26 +22,58 @@ source sch_red_core.sh BACKLOG1=200000 BACKLOG2=500000 -install_qdisc() +install_root_qdisc() +{ + tc qdisc add dev $swp3 root handle 10: $QDISC \ + bands 8 priomap 7 6 5 4 3 2 1 0 +} + +install_qdisc_tc0() { local -a args=("$@") - tc qdisc add dev $swp3 root handle 10: $QDISC \ - bands 8 priomap 7 6 5 4 3 2 1 0 tc qdisc add dev $swp3 parent 10:8 handle 108: red \ limit 1000000 min $BACKLOG1 max $((BACKLOG1 + 1)) \ probability 1.0 avpkt 8000 burst 38 "${args[@]}" +} + +install_qdisc_tc1() +{ + local -a args=("$@") + tc qdisc add dev $swp3 parent 10:7 handle 107: red \ limit 1000000 min $BACKLOG2 max $((BACKLOG2 + 1)) \ probability 1.0 avpkt 8000 burst 63 "${args[@]}" +} + +install_qdisc() +{ + install_root_qdisc + install_qdisc_tc0 "$@" + install_qdisc_tc1 "$@" sleep 1 } +uninstall_qdisc_tc0() +{ + tc qdisc del dev $swp3 parent 10:8 +} + +uninstall_qdisc_tc1() +{ + tc qdisc del dev $swp3 parent 10:7 +} + +uninstall_root_qdisc() +{ + tc qdisc del dev $swp3 root +} + uninstall_qdisc() { - tc qdisc del dev $swp3 parent 10:7 - tc qdisc del dev $swp3 parent 10:8 - tc qdisc del dev $swp3 root + uninstall_qdisc_tc0 + uninstall_qdisc_tc1 + uninstall_root_qdisc } ecn_test() @@ -112,6 +145,16 @@ red_trap_test() uninstall_qdisc } +ecn_mirror_test() +{ + install_qdisc ecn qevent mark block 10 + + do_mark_mirror_test 10 $BACKLOG1 + do_mark_mirror_test 11 $BACKLOG2 + + uninstall_qdisc +} + trap cleanup EXIT setup_prepare From 7bb39a394490c0690dc7304f2f0345539d590d2b Mon Sep 17 00:00:00 2001 From: Len Baker Date: Mon, 11 Oct 2021 11:01:00 +0200 Subject: [PATCH 111/440] net: hns: Prefer struct_size over open coded arithmetic As noted in the "Deprecated Interfaces, Language Features, Attributes, and Conventions" documentation [1], size calculations (especially multiplication) should not be performed in memory allocator (or similar) function arguments due to the risk of them overflowing. This could lead to values wrapping around and a smaller allocation being made than the caller was expecting. Using those allocations could lead to linear overflows of heap memory and other misbehaviors. So, take the opportunity to refactor the hnae_handle structure to switch the last member to flexible array, changing the code accordingly. Also, fix the comment in the hnae_vf_cb structure to inform that the ae_handle member must be the last member. Then, use the struct_size() helper to do the arithmetic instead of the argument "size + count * size" in the kzalloc() function. This code was detected with the help of Coccinelle and audited and fixed manually. [1] https://www.kernel.org/doc/html/latest/process/deprecated.html#open-coded-arithmetic-in-allocator-arguments Signed-off-by: Len Baker Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns/hnae.h | 2 +- drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c | 5 ++--- drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h | 2 +- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.h b/drivers/net/ethernet/hisilicon/hns/hnae.h index 2b7db1c22321..d46e8f999019 100644 --- a/drivers/net/ethernet/hisilicon/hns/hnae.h +++ b/drivers/net/ethernet/hisilicon/hns/hnae.h @@ -558,7 +558,7 @@ struct hnae_handle { enum hnae_media_type media_type; struct list_head node; /* list to hnae_ae_dev->handle_list */ struct hnae_buf_ops *bops; /* operation for the buffer */ - struct hnae_queue **qs; /* array base of all queues */ + struct hnae_queue *qs[]; /* flexible array of all queues */ }; #define ring_to_dev(ring) ((ring)->q->dev->dev) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c index 75e4ec569da8..e81116ad9bdf 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c @@ -81,8 +81,8 @@ static struct hnae_handle *hns_ae_get_handle(struct hnae_ae_dev *dev, vfnum_per_port = hns_ae_get_vf_num_per_port(dsaf_dev, port_id); qnum_per_vf = hns_ae_get_q_num_per_vf(dsaf_dev, port_id); - vf_cb = kzalloc(sizeof(*vf_cb) + - qnum_per_vf * sizeof(struct hnae_queue *), GFP_KERNEL); + vf_cb = kzalloc(struct_size(vf_cb, ae_handle.qs, qnum_per_vf), + GFP_KERNEL); if (unlikely(!vf_cb)) { dev_err(dsaf_dev->dev, "malloc vf_cb fail!\n"); ae_handle = ERR_PTR(-ENOMEM); @@ -108,7 +108,6 @@ static struct hnae_handle *hns_ae_get_handle(struct hnae_ae_dev *dev, goto vf_id_err; } - ae_handle->qs = (struct hnae_queue **)(&ae_handle->qs + 1); for (i = 0; i < qnum_per_vf; i++) { ae_handle->qs[i] = &ring_pair_cb->q; ae_handle->qs[i]->rx_ring.q = ae_handle->qs[i]; diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h index cba04bfa0b3f..5526a10caac5 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h @@ -210,7 +210,7 @@ struct hnae_vf_cb { u8 port_index; struct hns_mac_cb *mac_cb; struct dsaf_device *dsaf_dev; - struct hnae_handle ae_handle; /* must be the last number */ + struct hnae_handle ae_handle; /* must be the last member */ }; struct dsaf_int_xge_src { From e4400bbf5b15750e1b59bf4722d18d99be60c69f Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 11 Oct 2021 14:12:35 +0200 Subject: [PATCH 112/440] net, neigh: Fix NTF_EXT_LEARNED in combination with NTF_USE The NTF_EXT_LEARNED neigh flag is usually propagated back to user space upon dump of the neighbor table. However, when used in combination with NTF_USE flag this is not the case despite exempting the entry from the garbage collector. This results in inconsistent state since entries are typically marked in neigh->flags with NTF_EXT_LEARNED, but here they are not. Fix it by propagating the creation flag to ___neigh_create(). Before fix: # ./ip/ip n replace 192.168.178.30 dev enp5s0 use extern_learn # ./ip/ip n 192.168.178.30 dev enp5s0 lladdr f4:8c:50:5e:71:9a REACHABLE [...] After fix: # ./ip/ip n replace 192.168.178.30 dev enp5s0 use extern_learn # ./ip/ip n 192.168.178.30 dev enp5s0 lladdr f4:8c:50:5e:71:9a extern_learn REACHABLE [...] Fixes: 9ce33e46531d ("neighbour: support for NTF_EXT_LEARNED flag") Signed-off-by: Daniel Borkmann Acked-by: Roopa Prabhu Signed-off-by: David S. Miller --- net/core/neighbour.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 2d5bc3a75fae..8457d5f97022 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -379,7 +379,7 @@ EXPORT_SYMBOL(neigh_ifdown); static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev, - bool exempt_from_gc) + u8 flags, bool exempt_from_gc) { struct neighbour *n = NULL; unsigned long now = jiffies; @@ -412,6 +412,7 @@ do_alloc: n->updated = n->used = now; n->nud_state = NUD_NONE; n->output = neigh_blackhole; + n->flags = flags; seqlock_init(&n->hh.hh_lock); n->parms = neigh_parms_clone(&tbl->parms); timer_setup(&n->timer, neigh_timer_handler, 0); @@ -575,19 +576,18 @@ struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net, } EXPORT_SYMBOL(neigh_lookup_nodev); -static struct neighbour *___neigh_create(struct neigh_table *tbl, - const void *pkey, - struct net_device *dev, - bool exempt_from_gc, bool want_ref) +static struct neighbour * +___neigh_create(struct neigh_table *tbl, const void *pkey, + struct net_device *dev, u8 flags, + bool exempt_from_gc, bool want_ref) { - struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev, exempt_from_gc); - u32 hash_val; - unsigned int key_len = tbl->key_len; - int error; + u32 hash_val, key_len = tbl->key_len; + struct neighbour *n1, *rc, *n; struct neigh_hash_table *nht; + int error; + n = neigh_alloc(tbl, dev, flags, exempt_from_gc); trace_neigh_create(tbl, dev, pkey, n, exempt_from_gc); - if (!n) { rc = ERR_PTR(-ENOBUFS); goto out; @@ -674,7 +674,7 @@ out_neigh_release: struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey, struct net_device *dev, bool want_ref) { - return ___neigh_create(tbl, pkey, dev, false, want_ref); + return ___neigh_create(tbl, pkey, dev, 0, false, want_ref); } EXPORT_SYMBOL(__neigh_create); @@ -1942,7 +1942,9 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, exempt_from_gc = ndm->ndm_state & NUD_PERMANENT || ndm->ndm_flags & NTF_EXT_LEARNED; - neigh = ___neigh_create(tbl, dst, dev, exempt_from_gc, true); + neigh = ___neigh_create(tbl, dst, dev, + ndm->ndm_flags & NTF_EXT_LEARNED, + exempt_from_gc, true); if (IS_ERR(neigh)) { err = PTR_ERR(neigh); goto out; From 3dc20f4762c62d3b3f0940644881ed818aa7b2f5 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 11 Oct 2021 14:12:36 +0200 Subject: [PATCH 113/440] net, neigh: Enable state migration between NUD_PERMANENT and NTF_USE Currently, it is not possible to migrate a neighbor entry between NUD_PERMANENT state and NTF_USE flag with a dynamic NUD state from a user space control plane. Similarly, it is not possible to add/remove NTF_EXT_LEARNED flag from an existing neighbor entry in combination with NTF_USE flag. This is due to the latter directly calling into neigh_event_send() without any meta data updates as happening in __neigh_update(). Thus, to enable this use case, extend the latter with a NEIGH_UPDATE_F_USE flag where we break the NUD_PERMANENT state in particular so that a latter neigh_event_send() is able to re-resolve a neighbor entry. Before fix, NUD_PERMANENT -> NUD_* & NTF_USE: # ./ip/ip n replace 192.168.178.30 dev enp5s0 lladdr f4:8c:50:5e:71:9a # ./ip/ip n 192.168.178.30 dev enp5s0 lladdr f4:8c:50:5e:71:9a PERMANENT [...] # ./ip/ip n replace 192.168.178.30 dev enp5s0 use extern_learn # ./ip/ip n 192.168.178.30 dev enp5s0 lladdr f4:8c:50:5e:71:9a PERMANENT [...] As can be seen, despite the admin-triggered replace, the entry remains in the NUD_PERMANENT state. After fix, NUD_PERMANENT -> NUD_* & NTF_USE: # ./ip/ip n replace 192.168.178.30 dev enp5s0 lladdr f4:8c:50:5e:71:9a # ./ip/ip n 192.168.178.30 dev enp5s0 lladdr f4:8c:50:5e:71:9a PERMANENT [...] # ./ip/ip n replace 192.168.178.30 dev enp5s0 use extern_learn # ./ip/ip n 192.168.178.30 dev enp5s0 lladdr f4:8c:50:5e:71:9a extern_learn REACHABLE [...] # ./ip/ip n 192.168.178.30 dev enp5s0 lladdr f4:8c:50:5e:71:9a extern_learn STALE [...] # ./ip/ip n replace 192.168.178.30 dev enp5s0 lladdr f4:8c:50:5e:71:9a # ./ip/ip n 192.168.178.30 dev enp5s0 lladdr f4:8c:50:5e:71:9a PERMANENT [...] After the fix, the admin-triggered replace switches to a dynamic state from the NTF_USE flag which triggered a new neighbor resolution. Likewise, we can transition back from there, if needed, into NUD_PERMANENT. Similar before/after behavior can be observed for below transitions: Before fix, NTF_USE -> NTF_USE | NTF_EXT_LEARNED -> NTF_USE: # ./ip/ip n replace 192.168.178.30 dev enp5s0 use # ./ip/ip n 192.168.178.30 dev enp5s0 lladdr f4:8c:50:5e:71:9a REACHABLE [...] # ./ip/ip n replace 192.168.178.30 dev enp5s0 use extern_learn # ./ip/ip n 192.168.178.30 dev enp5s0 lladdr f4:8c:50:5e:71:9a REACHABLE [...] After fix, NTF_USE -> NTF_USE | NTF_EXT_LEARNED -> NTF_USE: # ./ip/ip n replace 192.168.178.30 dev enp5s0 use # ./ip/ip n 192.168.178.30 dev enp5s0 lladdr f4:8c:50:5e:71:9a REACHABLE [...] # ./ip/ip n replace 192.168.178.30 dev enp5s0 use extern_learn # ./ip/ip n 192.168.178.30 dev enp5s0 lladdr f4:8c:50:5e:71:9a extern_learn REACHABLE [...] # ./ip/ip n replace 192.168.178.30 dev enp5s0 use # ./ip/ip n 192.168.178.30 dev enp5s0 lladdr f4:8c:50:5e:71:9a REACHABLE [..] Signed-off-by: Daniel Borkmann Acked-by: Roopa Prabhu Signed-off-by: David S. Miller --- include/net/neighbour.h | 1 + net/core/neighbour.c | 22 +++++++++++++--------- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 22ced1381ede..eb2a7c03a5b0 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -253,6 +253,7 @@ static inline void *neighbour_priv(const struct neighbour *n) #define NEIGH_UPDATE_F_OVERRIDE 0x00000001 #define NEIGH_UPDATE_F_WEAK_OVERRIDE 0x00000002 #define NEIGH_UPDATE_F_OVERRIDE_ISROUTER 0x00000004 +#define NEIGH_UPDATE_F_USE 0x10000000 #define NEIGH_UPDATE_F_EXT_LEARNED 0x20000000 #define NEIGH_UPDATE_F_ISROUTER 0x40000000 #define NEIGH_UPDATE_F_ADMIN 0x80000000 diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 8457d5f97022..3e58037a8ae6 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1217,7 +1217,7 @@ static void neigh_update_hhs(struct neighbour *neigh) lladdr instead of overriding it if it is different. NEIGH_UPDATE_F_ADMIN means that the change is administrative. - + NEIGH_UPDATE_F_USE means that the entry is user triggered. NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing NTF_ROUTER flag. NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as @@ -1255,6 +1255,12 @@ static int __neigh_update(struct neighbour *neigh, const u8 *lladdr, goto out; ext_learn_change = neigh_update_ext_learned(neigh, flags, ¬ify); + if (flags & NEIGH_UPDATE_F_USE) { + new = old & ~NUD_PERMANENT; + neigh->nud_state = new; + err = 0; + goto out; + } if (!(new & NUD_VALID)) { neigh_del_timer(neigh); @@ -1963,22 +1969,20 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, if (protocol) neigh->protocol = protocol; - if (ndm->ndm_flags & NTF_EXT_LEARNED) flags |= NEIGH_UPDATE_F_EXT_LEARNED; - if (ndm->ndm_flags & NTF_ROUTER) flags |= NEIGH_UPDATE_F_ISROUTER; + if (ndm->ndm_flags & NTF_USE) + flags |= NEIGH_UPDATE_F_USE; - if (ndm->ndm_flags & NTF_USE) { + err = __neigh_update(neigh, lladdr, ndm->ndm_state, flags, + NETLINK_CB(skb).portid, extack); + if (!err && ndm->ndm_flags & NTF_USE) { neigh_event_send(neigh, NULL); err = 0; - } else - err = __neigh_update(neigh, lladdr, ndm->ndm_state, flags, - NETLINK_CB(skb).portid, extack); - + } neigh_release(neigh); - out: return err; } From 2c611ad97a82b51221bb0920cc6cac0b1d4c0e52 Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Mon, 11 Oct 2021 14:12:37 +0200 Subject: [PATCH 114/440] net, neigh: Extend neigh->flags to 32 bit to allow for extensions Currently, all bits in struct ndmsg's ndm_flags are used up with the most recent addition of 435f2e7cc0b7 ("net: bridge: add support for sticky fdb entries"). This makes it impossible to extend the neighboring subsystem with new NTF_* flags: struct ndmsg { __u8 ndm_family; __u8 ndm_pad1; __u16 ndm_pad2; __s32 ndm_ifindex; __u16 ndm_state; __u8 ndm_flags; __u8 ndm_type; }; There are ndm_pad{1,2} attributes which are not used. However, due to uncareful design, the kernel does not enforce them to be zero upon new neighbor entry addition, and given they've been around forever, it is not possible to reuse them today due to risk of breakage. One option to overcome this limitation is to add a new NDA_FLAGS_EXT attribute for extended flags. In struct neighbour, there is a 3 byte hole between protocol and ha_lock, which allows neigh->flags to be extended from 8 to 32 bits while still being on the same cacheline as before. This also allows for all future NTF_* flags being in neigh->flags rather than yet another flags field. Unknown flags in NDA_FLAGS_EXT will be rejected by the kernel. Co-developed-by: Daniel Borkmann Signed-off-by: Daniel Borkmann Signed-off-by: Roopa Prabhu Signed-off-by: David S. Miller --- include/net/neighbour.h | 14 +++++---- include/uapi/linux/neighbour.h | 1 + net/core/neighbour.c | 55 ++++++++++++++++++++++++---------- 3 files changed, 50 insertions(+), 20 deletions(-) diff --git a/include/net/neighbour.h b/include/net/neighbour.h index eb2a7c03a5b0..26d4ada0aea9 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -144,11 +144,11 @@ struct neighbour { struct timer_list timer; unsigned long used; atomic_t probes; - __u8 flags; - __u8 nud_state; - __u8 type; - __u8 dead; + u8 nud_state; + u8 type; + u8 dead; u8 protocol; + u32 flags; seqlock_t ha_lock; unsigned char ha[ALIGN(MAX_ADDR_LEN, sizeof(unsigned long))] __aligned(8); struct hh_cache hh; @@ -172,7 +172,7 @@ struct pneigh_entry { struct pneigh_entry *next; possible_net_t net; struct net_device *dev; - u8 flags; + u32 flags; u8 protocol; u8 key[]; }; @@ -258,6 +258,10 @@ static inline void *neighbour_priv(const struct neighbour *n) #define NEIGH_UPDATE_F_ISROUTER 0x40000000 #define NEIGH_UPDATE_F_ADMIN 0x80000000 +/* In-kernel representation for NDA_FLAGS_EXT flags: */ +#define NTF_OLD_MASK 0xff +#define NTF_EXT_SHIFT 8 + extern const struct nla_policy nda_policy[]; static inline bool neigh_key_eq16(const struct neighbour *n, const void *pkey) diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h index 00a60695fa53..a80cca141855 100644 --- a/include/uapi/linux/neighbour.h +++ b/include/uapi/linux/neighbour.h @@ -31,6 +31,7 @@ enum { NDA_PROTOCOL, /* Originator of entry */ NDA_NH_ID, NDA_FDB_EXT_ATTRS, + NDA_FLAGS_EXT, __NDA_MAX }; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 3e58037a8ae6..5245e888c981 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -159,7 +159,7 @@ static bool neigh_update_ext_learned(struct neighbour *neigh, u32 flags, int *notify) { bool rc = false; - u8 ndm_flags; + u32 ndm_flags; if (!(flags & NEIGH_UPDATE_F_ADMIN)) return rc; @@ -379,7 +379,7 @@ EXPORT_SYMBOL(neigh_ifdown); static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev, - u8 flags, bool exempt_from_gc) + u32 flags, bool exempt_from_gc) { struct neighbour *n = NULL; unsigned long now = jiffies; @@ -578,7 +578,7 @@ EXPORT_SYMBOL(neigh_lookup_nodev); static struct neighbour * ___neigh_create(struct neigh_table *tbl, const void *pkey, - struct net_device *dev, u8 flags, + struct net_device *dev, u32 flags, bool exempt_from_gc, bool want_ref) { u32 hash_val, key_len = tbl->key_len; @@ -1789,6 +1789,7 @@ const struct nla_policy nda_policy[NDA_MAX+1] = { [NDA_MASTER] = { .type = NLA_U32 }, [NDA_PROTOCOL] = { .type = NLA_U8 }, [NDA_NH_ID] = { .type = NLA_U32 }, + [NDA_FLAGS_EXT] = { .type = NLA_U32 }, [NDA_FDB_EXT_ATTRS] = { .type = NLA_NESTED }, }; @@ -1861,7 +1862,7 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE | - NEIGH_UPDATE_F_OVERRIDE_ISROUTER; + NEIGH_UPDATE_F_OVERRIDE_ISROUTER; struct net *net = sock_net(skb->sk); struct ndmsg *ndm; struct nlattr *tb[NDA_MAX+1]; @@ -1870,6 +1871,7 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, struct neighbour *neigh; void *dst, *lladdr; u8 protocol = 0; + u32 ndm_flags; int err; ASSERT_RTNL(); @@ -1885,6 +1887,16 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, } ndm = nlmsg_data(nlh); + ndm_flags = ndm->ndm_flags; + if (tb[NDA_FLAGS_EXT]) { + u32 ext = nla_get_u32(tb[NDA_FLAGS_EXT]); + + if (ext & ~0) { + NL_SET_ERR_MSG(extack, "Invalid extended flags"); + goto out; + } + ndm_flags |= (ext << NTF_EXT_SHIFT); + } if (ndm->ndm_ifindex) { dev = __dev_get_by_index(net, ndm->ndm_ifindex); if (dev == NULL) { @@ -1912,14 +1924,13 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, if (tb[NDA_PROTOCOL]) protocol = nla_get_u8(tb[NDA_PROTOCOL]); - - if (ndm->ndm_flags & NTF_PROXY) { + if (ndm_flags & NTF_PROXY) { struct pneigh_entry *pn; err = -ENOBUFS; pn = pneigh_lookup(tbl, net, dst, dev, 1); if (pn) { - pn->flags = ndm->ndm_flags; + pn->flags = ndm_flags; if (protocol) pn->protocol = protocol; err = 0; @@ -1947,9 +1958,9 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, } exempt_from_gc = ndm->ndm_state & NUD_PERMANENT || - ndm->ndm_flags & NTF_EXT_LEARNED; + ndm_flags & NTF_EXT_LEARNED; neigh = ___neigh_create(tbl, dst, dev, - ndm->ndm_flags & NTF_EXT_LEARNED, + ndm_flags & NTF_EXT_LEARNED, exempt_from_gc, true); if (IS_ERR(neigh)) { err = PTR_ERR(neigh); @@ -1969,16 +1980,16 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, if (protocol) neigh->protocol = protocol; - if (ndm->ndm_flags & NTF_EXT_LEARNED) + if (ndm_flags & NTF_EXT_LEARNED) flags |= NEIGH_UPDATE_F_EXT_LEARNED; - if (ndm->ndm_flags & NTF_ROUTER) + if (ndm_flags & NTF_ROUTER) flags |= NEIGH_UPDATE_F_ISROUTER; - if (ndm->ndm_flags & NTF_USE) + if (ndm_flags & NTF_USE) flags |= NEIGH_UPDATE_F_USE; err = __neigh_update(neigh, lladdr, ndm->ndm_state, flags, NETLINK_CB(skb).portid, extack); - if (!err && ndm->ndm_flags & NTF_USE) { + if (!err && ndm_flags & NTF_USE) { neigh_event_send(neigh, NULL); err = 0; } @@ -2433,6 +2444,7 @@ out: static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh, u32 pid, u32 seq, int type, unsigned int flags) { + u32 neigh_flags, neigh_flags_ext; unsigned long now = jiffies; struct nda_cacheinfo ci; struct nlmsghdr *nlh; @@ -2442,11 +2454,14 @@ static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh, if (nlh == NULL) return -EMSGSIZE; + neigh_flags_ext = neigh->flags >> NTF_EXT_SHIFT; + neigh_flags = neigh->flags & NTF_OLD_MASK; + ndm = nlmsg_data(nlh); ndm->ndm_family = neigh->ops->family; ndm->ndm_pad1 = 0; ndm->ndm_pad2 = 0; - ndm->ndm_flags = neigh->flags; + ndm->ndm_flags = neigh_flags; ndm->ndm_type = neigh->type; ndm->ndm_ifindex = neigh->dev->ifindex; @@ -2477,6 +2492,8 @@ static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh, if (neigh->protocol && nla_put_u8(skb, NDA_PROTOCOL, neigh->protocol)) goto nla_put_failure; + if (neigh_flags_ext && nla_put_u32(skb, NDA_FLAGS_EXT, neigh_flags_ext)) + goto nla_put_failure; nlmsg_end(skb, nlh); return 0; @@ -2490,6 +2507,7 @@ static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn, u32 pid, u32 seq, int type, unsigned int flags, struct neigh_table *tbl) { + u32 neigh_flags, neigh_flags_ext; struct nlmsghdr *nlh; struct ndmsg *ndm; @@ -2497,11 +2515,14 @@ static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn, if (nlh == NULL) return -EMSGSIZE; + neigh_flags_ext = pn->flags >> NTF_EXT_SHIFT; + neigh_flags = pn->flags & NTF_OLD_MASK; + ndm = nlmsg_data(nlh); ndm->ndm_family = tbl->family; ndm->ndm_pad1 = 0; ndm->ndm_pad2 = 0; - ndm->ndm_flags = pn->flags | NTF_PROXY; + ndm->ndm_flags = neigh_flags | NTF_PROXY; ndm->ndm_type = RTN_UNICAST; ndm->ndm_ifindex = pn->dev ? pn->dev->ifindex : 0; ndm->ndm_state = NUD_NONE; @@ -2511,6 +2532,8 @@ static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn, if (pn->protocol && nla_put_u8(skb, NDA_PROTOCOL, pn->protocol)) goto nla_put_failure; + if (neigh_flags_ext && nla_put_u32(skb, NDA_FLAGS_EXT, neigh_flags_ext)) + goto nla_put_failure; nlmsg_end(skb, nlh); return 0; @@ -2826,6 +2849,7 @@ static inline size_t neigh_nlmsg_size(void) + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */ + nla_total_size(sizeof(struct nda_cacheinfo)) + nla_total_size(4) /* NDA_PROBES */ + + nla_total_size(4) /* NDA_FLAGS_EXT */ + nla_total_size(1); /* NDA_PROTOCOL */ } @@ -2854,6 +2878,7 @@ static inline size_t pneigh_nlmsg_size(void) { return NLMSG_ALIGN(sizeof(struct ndmsg)) + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */ + + nla_total_size(4) /* NDA_FLAGS_EXT */ + nla_total_size(1); /* NDA_PROTOCOL */ } From 7482e3841d520a368426ac196720601687e2dc47 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 11 Oct 2021 14:12:38 +0200 Subject: [PATCH 115/440] net, neigh: Add NTF_MANAGED flag for managed neighbor entries Allow a user space control plane to insert entries with a new NTF_EXT_MANAGED flag. The flag then indicates to the kernel that the neighbor entry should be periodically probed for keeping the entry in NUD_REACHABLE state iff possible. The use case for this is targeting XDP or tc BPF load-balancers which use the bpf_fib_lookup() BPF helper in order to piggyback on neighbor resolution for their backends. Given they cannot be resolved in fast-path, a control plane inserts the L3 (without L2) entries manually into the neighbor table and lets the kernel do the neighbor resolution either on the gateway or on the backend directly in case the latter resides in the same L2. This avoids to deal with L2 in the control plane and to rebuild what the kernel already does best anyway. NTF_EXT_MANAGED can be combined with NTF_EXT_LEARNED in order to avoid GC eviction. The kernel then adds NTF_MANAGED flagged entries to a per-neighbor table which gets triggered by the system work queue to periodically call neigh_event_send() for performing the resolution. The implementation allows migration from/to NTF_MANAGED neighbor entries, so that already existing entries can be converted by the control plane if needed. Potentially, we could make the interval for periodically calling neigh_event_send() configurable; right now it's set to DELAY_PROBE_TIME which is also in line with mlxsw which has similar driver-internal infrastructure c723c735fa6b ("mlxsw: spectrum_router: Periodically update the kernel's neigh table"). In future, the latter could possibly reuse the NTF_MANAGED neighbors as well. Example: # ./ip/ip n replace 192.168.178.30 dev enp5s0 managed extern_learn # ./ip/ip n 192.168.178.30 dev enp5s0 lladdr f4:8c:50:5e:71:9a managed extern_learn REACHABLE [...] Signed-off-by: Daniel Borkmann Acked-by: Roopa Prabhu Link: https://linuxplumbersconf.org/event/11/contributions/953/ Signed-off-by: David S. Miller --- include/net/neighbour.h | 21 ++++-- include/uapi/linux/neighbour.h | 34 ++++++---- net/core/neighbour.c | 115 ++++++++++++++++++++++++--------- 3 files changed, 121 insertions(+), 49 deletions(-) diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 26d4ada0aea9..e8e48be66755 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -155,6 +155,7 @@ struct neighbour { int (*output)(struct neighbour *, struct sk_buff *); const struct neigh_ops *ops; struct list_head gc_list; + struct list_head managed_list; struct rcu_head rcu; struct net_device *dev; u8 primary_key[0]; @@ -216,11 +217,13 @@ struct neigh_table { int gc_thresh3; unsigned long last_flush; struct delayed_work gc_work; + struct delayed_work managed_work; struct timer_list proxy_timer; struct sk_buff_head proxy_queue; atomic_t entries; atomic_t gc_entries; struct list_head gc_list; + struct list_head managed_list; rwlock_t lock; unsigned long last_rand; struct neigh_statistics __percpu *stats; @@ -250,17 +253,21 @@ static inline void *neighbour_priv(const struct neighbour *n) } /* flags for neigh_update() */ -#define NEIGH_UPDATE_F_OVERRIDE 0x00000001 -#define NEIGH_UPDATE_F_WEAK_OVERRIDE 0x00000002 -#define NEIGH_UPDATE_F_OVERRIDE_ISROUTER 0x00000004 -#define NEIGH_UPDATE_F_USE 0x10000000 -#define NEIGH_UPDATE_F_EXT_LEARNED 0x20000000 -#define NEIGH_UPDATE_F_ISROUTER 0x40000000 -#define NEIGH_UPDATE_F_ADMIN 0x80000000 +#define NEIGH_UPDATE_F_OVERRIDE BIT(0) +#define NEIGH_UPDATE_F_WEAK_OVERRIDE BIT(1) +#define NEIGH_UPDATE_F_OVERRIDE_ISROUTER BIT(2) +#define NEIGH_UPDATE_F_USE BIT(3) +#define NEIGH_UPDATE_F_MANAGED BIT(4) +#define NEIGH_UPDATE_F_EXT_LEARNED BIT(5) +#define NEIGH_UPDATE_F_ISROUTER BIT(6) +#define NEIGH_UPDATE_F_ADMIN BIT(7) /* In-kernel representation for NDA_FLAGS_EXT flags: */ #define NTF_OLD_MASK 0xff #define NTF_EXT_SHIFT 8 +#define NTF_EXT_MASK (NTF_EXT_MANAGED) + +#define NTF_MANAGED (NTF_EXT_MANAGED << NTF_EXT_SHIFT) extern const struct nla_policy nda_policy[]; diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h index a80cca141855..db05fb55055e 100644 --- a/include/uapi/linux/neighbour.h +++ b/include/uapi/linux/neighbour.h @@ -41,14 +41,16 @@ enum { * Neighbor Cache Entry Flags */ -#define NTF_USE 0x01 -#define NTF_SELF 0x02 -#define NTF_MASTER 0x04 -#define NTF_PROXY 0x08 /* == ATF_PUBL */ -#define NTF_EXT_LEARNED 0x10 -#define NTF_OFFLOADED 0x20 -#define NTF_STICKY 0x40 -#define NTF_ROUTER 0x80 +#define NTF_USE (1 << 0) +#define NTF_SELF (1 << 1) +#define NTF_MASTER (1 << 2) +#define NTF_PROXY (1 << 3) /* == ATF_PUBL */ +#define NTF_EXT_LEARNED (1 << 4) +#define NTF_OFFLOADED (1 << 5) +#define NTF_STICKY (1 << 6) +#define NTF_ROUTER (1 << 7) +/* Extended flags under NDA_FLAGS_EXT: */ +#define NTF_EXT_MANAGED (1 << 0) /* * Neighbor Cache Entry States. @@ -66,12 +68,22 @@ enum { #define NUD_PERMANENT 0x80 #define NUD_NONE 0x00 -/* NUD_NOARP & NUD_PERMANENT are pseudostates, they never change - * and make no address resolution or NUD. - * NUD_PERMANENT also cannot be deleted by garbage collectors. +/* NUD_NOARP & NUD_PERMANENT are pseudostates, they never change and make no + * address resolution or NUD. + * + * NUD_PERMANENT also cannot be deleted by garbage collectors. This holds true + * for dynamic entries with NTF_EXT_LEARNED flag as well. However, upon carrier + * down event, NUD_PERMANENT entries are not flushed whereas NTF_EXT_LEARNED + * flagged entries explicitly are (which is also consistent with the routing + * subsystem). + * * When NTF_EXT_LEARNED is set for a bridge fdb entry the different cache entry * states don't make sense and thus are ignored. Such entries don't age and * can roam. + * + * NTF_EXT_MANAGED flagged neigbor entries are managed by the kernel on behalf + * of a user space control plane, and automatically refreshed so that (if + * possible) they remain in NUD_REACHABLE state. */ struct nda_cacheinfo { diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 5245e888c981..eae73efa9245 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -122,6 +122,8 @@ static void neigh_mark_dead(struct neighbour *n) list_del_init(&n->gc_list); atomic_dec(&n->tbl->gc_entries); } + if (!list_empty(&n->managed_list)) + list_del_init(&n->managed_list); } static void neigh_update_gc_list(struct neighbour *n) @@ -130,7 +132,6 @@ static void neigh_update_gc_list(struct neighbour *n) write_lock_bh(&n->tbl->lock); write_lock(&n->lock); - if (n->dead) goto out; @@ -149,32 +150,59 @@ static void neigh_update_gc_list(struct neighbour *n) list_add_tail(&n->gc_list, &n->tbl->gc_list); atomic_inc(&n->tbl->gc_entries); } - out: write_unlock(&n->lock); write_unlock_bh(&n->tbl->lock); } -static bool neigh_update_ext_learned(struct neighbour *neigh, u32 flags, - int *notify) +static void neigh_update_managed_list(struct neighbour *n) { - bool rc = false; - u32 ndm_flags; + bool on_managed_list, add_to_managed; + + write_lock_bh(&n->tbl->lock); + write_lock(&n->lock); + if (n->dead) + goto out; + + add_to_managed = n->flags & NTF_MANAGED; + on_managed_list = !list_empty(&n->managed_list); + + if (!add_to_managed && on_managed_list) + list_del_init(&n->managed_list); + else if (add_to_managed && !on_managed_list) + list_add_tail(&n->managed_list, &n->tbl->managed_list); +out: + write_unlock(&n->lock); + write_unlock_bh(&n->tbl->lock); +} + +static void neigh_update_flags(struct neighbour *neigh, u32 flags, int *notify, + bool *gc_update, bool *managed_update) +{ + u32 ndm_flags, old_flags = neigh->flags; if (!(flags & NEIGH_UPDATE_F_ADMIN)) - return rc; + return; - ndm_flags = (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0; - if ((neigh->flags ^ ndm_flags) & NTF_EXT_LEARNED) { + ndm_flags = (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0; + ndm_flags |= (flags & NEIGH_UPDATE_F_MANAGED) ? NTF_MANAGED : 0; + + if ((old_flags ^ ndm_flags) & NTF_EXT_LEARNED) { if (ndm_flags & NTF_EXT_LEARNED) neigh->flags |= NTF_EXT_LEARNED; else neigh->flags &= ~NTF_EXT_LEARNED; - rc = true; *notify = 1; + *gc_update = true; + } + if ((old_flags ^ ndm_flags) & NTF_MANAGED) { + if (ndm_flags & NTF_MANAGED) + neigh->flags |= NTF_MANAGED; + else + neigh->flags &= ~NTF_MANAGED; + *notify = 1; + *managed_update = true; } - - return rc; } static bool neigh_del(struct neighbour *n, struct neighbour __rcu **np, @@ -422,6 +450,7 @@ do_alloc: refcount_set(&n->refcnt, 1); n->dead = 1; INIT_LIST_HEAD(&n->gc_list); + INIT_LIST_HEAD(&n->managed_list); atomic_inc(&tbl->entries); out: @@ -650,7 +679,8 @@ ___neigh_create(struct neigh_table *tbl, const void *pkey, n->dead = 0; if (!exempt_from_gc) list_add_tail(&n->gc_list, &n->tbl->gc_list); - + if (n->flags & NTF_MANAGED) + list_add_tail(&n->managed_list, &n->tbl->managed_list); if (want_ref) neigh_hold(n); rcu_assign_pointer(n->next, @@ -1205,8 +1235,6 @@ static void neigh_update_hhs(struct neighbour *neigh) } } - - /* Generic update routine. -- lladdr is new lladdr or NULL, if it is not supplied. -- new is new state. @@ -1218,6 +1246,7 @@ static void neigh_update_hhs(struct neighbour *neigh) if it is different. NEIGH_UPDATE_F_ADMIN means that the change is administrative. NEIGH_UPDATE_F_USE means that the entry is user triggered. + NEIGH_UPDATE_F_MANAGED means that the entry will be auto-refreshed. NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing NTF_ROUTER flag. NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as @@ -1225,17 +1254,15 @@ static void neigh_update_hhs(struct neighbour *neigh) Caller MUST hold reference count on the entry. */ - static int __neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, u32 flags, u32 nlmsg_pid, struct netlink_ext_ack *extack) { - bool ext_learn_change = false; - u8 old; - int err; - int notify = 0; - struct net_device *dev; + bool gc_update = false, managed_update = false; int update_isrouter = 0; + struct net_device *dev; + int err, notify = 0; + u8 old; trace_neigh_update(neigh, lladdr, new, flags, nlmsg_pid); @@ -1254,8 +1281,8 @@ static int __neigh_update(struct neighbour *neigh, const u8 *lladdr, (old & (NUD_NOARP | NUD_PERMANENT))) goto out; - ext_learn_change = neigh_update_ext_learned(neigh, flags, ¬ify); - if (flags & NEIGH_UPDATE_F_USE) { + neigh_update_flags(neigh, flags, ¬ify, &gc_update, &managed_update); + if (flags & (NEIGH_UPDATE_F_USE | NEIGH_UPDATE_F_MANAGED)) { new = old & ~NUD_PERMANENT; neigh->nud_state = new; err = 0; @@ -1405,15 +1432,13 @@ out: if (update_isrouter) neigh_update_is_router(neigh, flags, ¬ify); write_unlock_bh(&neigh->lock); - - if (((new ^ old) & NUD_PERMANENT) || ext_learn_change) + if (((new ^ old) & NUD_PERMANENT) || gc_update) neigh_update_gc_list(neigh); - + if (managed_update) + neigh_update_managed_list(neigh); if (notify) neigh_update_notify(neigh, nlmsg_pid); - trace_neigh_update_done(neigh, err); - return err; } @@ -1539,6 +1564,20 @@ int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb) } EXPORT_SYMBOL(neigh_direct_output); +static void neigh_managed_work(struct work_struct *work) +{ + struct neigh_table *tbl = container_of(work, struct neigh_table, + managed_work.work); + struct neighbour *neigh; + + write_lock_bh(&tbl->lock); + list_for_each_entry(neigh, &tbl->managed_list, managed_list) + neigh_event_send(neigh, NULL); + queue_delayed_work(system_power_efficient_wq, &tbl->managed_work, + NEIGH_VAR(&tbl->parms, DELAY_PROBE_TIME)); + write_unlock_bh(&tbl->lock); +} + static void neigh_proxy_process(struct timer_list *t) { struct neigh_table *tbl = from_timer(tbl, t, proxy_timer); @@ -1685,6 +1724,8 @@ void neigh_table_init(int index, struct neigh_table *tbl) INIT_LIST_HEAD(&tbl->parms_list); INIT_LIST_HEAD(&tbl->gc_list); + INIT_LIST_HEAD(&tbl->managed_list); + list_add(&tbl->parms.list, &tbl->parms_list); write_pnet(&tbl->parms.net, &init_net); refcount_set(&tbl->parms.refcnt, 1); @@ -1716,9 +1757,13 @@ void neigh_table_init(int index, struct neigh_table *tbl) WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN); rwlock_init(&tbl->lock); + INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work); queue_delayed_work(system_power_efficient_wq, &tbl->gc_work, tbl->parms.reachable_time); + INIT_DEFERRABLE_WORK(&tbl->managed_work, neigh_managed_work); + queue_delayed_work(system_power_efficient_wq, &tbl->managed_work, 0); + timer_setup(&tbl->proxy_timer, neigh_proxy_process, 0); skb_queue_head_init_class(&tbl->proxy_queue, &neigh_table_proxy_queue_class); @@ -1891,7 +1936,7 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, if (tb[NDA_FLAGS_EXT]) { u32 ext = nla_get_u32(tb[NDA_FLAGS_EXT]); - if (ext & ~0) { + if (ext & ~NTF_EXT_MASK) { NL_SET_ERR_MSG(extack, "Invalid extended flags"); goto out; } @@ -1927,6 +1972,11 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, if (ndm_flags & NTF_PROXY) { struct pneigh_entry *pn; + if (ndm_flags & NTF_MANAGED) { + NL_SET_ERR_MSG(extack, "Invalid NTF_* flag combination"); + goto out; + } + err = -ENOBUFS; pn = pneigh_lookup(tbl, net, dst, dev, 1); if (pn) { @@ -1960,7 +2010,8 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, exempt_from_gc = ndm->ndm_state & NUD_PERMANENT || ndm_flags & NTF_EXT_LEARNED; neigh = ___neigh_create(tbl, dst, dev, - ndm_flags & NTF_EXT_LEARNED, + ndm_flags & + (NTF_EXT_LEARNED | NTF_MANAGED), exempt_from_gc, true); if (IS_ERR(neigh)) { err = PTR_ERR(neigh); @@ -1984,12 +2035,14 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, flags |= NEIGH_UPDATE_F_EXT_LEARNED; if (ndm_flags & NTF_ROUTER) flags |= NEIGH_UPDATE_F_ISROUTER; + if (ndm_flags & NTF_MANAGED) + flags |= NEIGH_UPDATE_F_MANAGED; if (ndm_flags & NTF_USE) flags |= NEIGH_UPDATE_F_USE; err = __neigh_update(neigh, lladdr, ndm->ndm_state, flags, NETLINK_CB(skb).portid, extack); - if (!err && ndm_flags & NTF_USE) { + if (!err && ndm_flags & (NTF_USE | NTF_MANAGED)) { neigh_event_send(neigh, NULL); err = 0; } From 25b90c19102f50ae261cbc328361c0fc66b7c901 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 11 Oct 2021 06:09:09 -0700 Subject: [PATCH 116/440] tulip: fix setting device address from rom I missed removing i from the array index when converting from a loop to a direct copy. Fixes: ca8793175564 ("ethernet: tulip: remove direct netdev->dev_addr writes") Reported-by: Joe Perches Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/dec/tulip/de2104x.c | 2 +- drivers/net/ethernet/dec/tulip/dmfe.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/dec/tulip/de2104x.c b/drivers/net/ethernet/dec/tulip/de2104x.c index 1e3c90c3c0ed..d51b3d24a0c8 100644 --- a/drivers/net/ethernet/dec/tulip/de2104x.c +++ b/drivers/net/ethernet/dec/tulip/de2104x.c @@ -1823,7 +1823,7 @@ static void de21041_get_srom_info(struct de_private *de) #endif /* store MAC address */ - eth_hw_addr_set(de->dev, &ee_data[i + sa_offset]); + eth_hw_addr_set(de->dev, &ee_data[sa_offset]); /* get offset of controller 0 info leaf. ignore 2nd byte. */ ofs = ee_data[SROMC0InfoLeaf]; diff --git a/drivers/net/ethernet/dec/tulip/dmfe.c b/drivers/net/ethernet/dec/tulip/dmfe.c index 6e64ff20a378..83f1727d1423 100644 --- a/drivers/net/ethernet/dec/tulip/dmfe.c +++ b/drivers/net/ethernet/dec/tulip/dmfe.c @@ -476,7 +476,7 @@ static int dmfe_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) } /* Set Node address */ - eth_hw_addr_set(dev, &db->srom[20 + i]); + eth_hw_addr_set(dev, &db->srom[20]); err = register_netdev (dev); if (err) From 850bfb912a6deb3134c5922882f8d139f77d8c5c Mon Sep 17 00:00:00 2001 From: Hao Chen Date: Mon, 11 Oct 2021 22:46:08 +0800 Subject: [PATCH 117/440] net: hns3: debugfs add support dumping page pool info Add a file node "page_pool_info" for debugfs, then cat this file node to dump page pool info as below: QUEUE_ID ALLOCATE_CNT FREE_CNT POOL_SIZE(PAGE_NUM) ORDER NUMA_ID MAX_LEN 0 512 0 512 0 2 4K 1 512 0 512 0 2 4K 2 512 0 512 0 2 4K 3 512 0 512 0 2 4K 4 512 0 512 0 2 4K Signed-off-by: Hao Chen Signed-off-by: Guangbin Huang Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hnae3.h | 1 + .../ethernet/hisilicon/hns3/hns3_debugfs.c | 74 +++++++++++++++++++ 2 files changed, 75 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h index 17872c50b63c..5d188573c433 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h @@ -298,6 +298,7 @@ enum hnae3_dbg_cmd { HNAE3_DBG_CMD_MAC_TNL_STATUS, HNAE3_DBG_CMD_SERV_INFO, HNAE3_DBG_CMD_UMV_INFO, + HNAE3_DBG_CMD_PAGE_POOL_INFO, HNAE3_DBG_CMD_UNKNOWN, }; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c index a1555f074e06..b26d43c9c088 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c @@ -336,6 +336,13 @@ static struct hns3_dbg_cmd_info hns3_dbg_cmd[] = { .buf_len = HNS3_DBG_READ_LEN, .init = hns3_dbg_common_file_init, }, + { + .name = "page_pool_info", + .cmd = HNAE3_DBG_CMD_PAGE_POOL_INFO, + .dentry = HNS3_DBG_DENTRY_COMMON, + .buf_len = HNS3_DBG_READ_LEN, + .init = hns3_dbg_common_file_init, + }, }; static struct hns3_dbg_cap_info hns3_dbg_cap[] = { @@ -941,6 +948,69 @@ static int hns3_dbg_dev_info(struct hnae3_handle *h, char *buf, int len) return 0; } +static const struct hns3_dbg_item page_pool_info_items[] = { + { "QUEUE_ID", 2 }, + { "ALLOCATE_CNT", 2 }, + { "FREE_CNT", 6 }, + { "POOL_SIZE(PAGE_NUM)", 2 }, + { "ORDER", 2 }, + { "NUMA_ID", 2 }, + { "MAX_LEN", 2 }, +}; + +static void hns3_dump_page_pool_info(struct hns3_enet_ring *ring, + char **result, u32 index) +{ + u32 j = 0; + + sprintf(result[j++], "%u", index); + sprintf(result[j++], "%u", ring->page_pool->pages_state_hold_cnt); + sprintf(result[j++], "%u", + atomic_read(&ring->page_pool->pages_state_release_cnt)); + sprintf(result[j++], "%u", ring->page_pool->p.pool_size); + sprintf(result[j++], "%u", ring->page_pool->p.order); + sprintf(result[j++], "%d", ring->page_pool->p.nid); + sprintf(result[j++], "%uK", ring->page_pool->p.max_len / 1024); +} + +static int +hns3_dbg_page_pool_info(struct hnae3_handle *h, char *buf, int len) +{ + char data_str[ARRAY_SIZE(page_pool_info_items)][HNS3_DBG_DATA_STR_LEN]; + char *result[ARRAY_SIZE(page_pool_info_items)]; + struct hns3_nic_priv *priv = h->priv; + char content[HNS3_DBG_INFO_LEN]; + struct hns3_enet_ring *ring; + int pos = 0; + u32 i; + + if (!priv->ring) { + dev_err(&h->pdev->dev, "priv->ring is NULL\n"); + return -EFAULT; + } + + for (i = 0; i < ARRAY_SIZE(page_pool_info_items); i++) + result[i] = &data_str[i][0]; + + hns3_dbg_fill_content(content, sizeof(content), page_pool_info_items, + NULL, ARRAY_SIZE(page_pool_info_items)); + pos += scnprintf(buf + pos, len - pos, "%s", content); + for (i = 0; i < h->kinfo.num_tqps; i++) { + if (!test_bit(HNS3_NIC_STATE_INITED, &priv->state) || + test_bit(HNS3_NIC_STATE_RESETTING, &priv->state)) + return -EPERM; + ring = &priv->ring[(u32)(i + h->kinfo.num_tqps)]; + hns3_dump_page_pool_info(ring, result, i); + hns3_dbg_fill_content(content, sizeof(content), + page_pool_info_items, + (const char **)result, + ARRAY_SIZE(page_pool_info_items)); + pos += scnprintf(buf + pos, len - pos, "%s", content); + } + + return 0; +} + static int hns3_dbg_get_cmd_index(struct hns3_dbg_data *dbg_data, u32 *index) { u32 i; @@ -982,6 +1052,10 @@ static const struct hns3_dbg_func hns3_dbg_cmd_func[] = { .cmd = HNAE3_DBG_CMD_TX_QUEUE_INFO, .dbg_dump = hns3_dbg_tx_queue_info, }, + { + .cmd = HNAE3_DBG_CMD_PAGE_POOL_INFO, + .dbg_dump = hns3_dbg_page_pool_info, + }, }; static int hns3_dbg_read_cmd(struct hns3_dbg_data *dbg_data, From 177c92353be935db555d0d08729e871145ec698c Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 11 Oct 2021 17:00:16 -0700 Subject: [PATCH 118/440] ethernet: tulip: avoid duplicate variable name on sparc I recently added a variable called addr to tulip_init_one() but for sparc there's already a variable called that half way thru the function. Rename it to fix build. Fixes: ca8793175564 ("ethernet: tulip: remove direct netdev->dev_addr writes") Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/dec/tulip/tulip_core.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/dec/tulip/tulip_core.c b/drivers/net/ethernet/dec/tulip/tulip_core.c index ec0ce8f1beea..79df5a72877b 100644 --- a/drivers/net/ethernet/dec/tulip/tulip_core.c +++ b/drivers/net/ethernet/dec/tulip/tulip_core.c @@ -1605,7 +1605,7 @@ static int tulip_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) if (sum == 0 || sum == 6*0xff) { #if defined(CONFIG_SPARC) struct device_node *dp = pci_device_to_OF_node(pdev); - const unsigned char *addr; + const unsigned char *addr2; int len; #endif eeprom_missing = 1; @@ -1614,9 +1614,9 @@ static int tulip_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) addr[i] = last_phys_addr[i] + 1; eth_hw_addr_set(dev, addr); #if defined(CONFIG_SPARC) - addr = of_get_property(dp, "local-mac-address", &len); - if (addr && len == ETH_ALEN) - eth_hw_addr_set(dev, addr); + addr2 = of_get_property(dp, "local-mac-address", &len); + if (addr2 && len == ETH_ALEN) + eth_hw_addr_set(dev, addr2); #endif #if defined(__i386__) || defined(__x86_64__) /* Patch up x86 BIOS bug. */ if (last_irq) From 21314638c9f251bb40c8ad3b4e1c232057789b8b Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 12 Oct 2021 16:15:21 +0300 Subject: [PATCH 119/440] devlink: Reduce struct devlink exposure The declaration of struct devlink in general header provokes the situation where internal fields can be accidentally used by the driver authors. In order to reduce such possible situations, let's reduce the namespace exposure of struct devlink. Signed-off-by: Leon Romanovsky Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxfw/mlxfw.h | 2 +- include/net/devlink.h | 54 ++-------------- include/trace/events/devlink.h | 72 ++++++++++----------- net/core/devlink.c | 57 ++++++++++++++++ 4 files changed, 98 insertions(+), 87 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw.h b/drivers/net/ethernet/mellanox/mlxfw/mlxfw.h index 7654841a05c2..e6475ea77cd1 100644 --- a/drivers/net/ethernet/mellanox/mlxfw/mlxfw.h +++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw.h @@ -19,7 +19,7 @@ struct mlxfw_dev { static inline struct device *mlxfw_dev_dev(struct mlxfw_dev *mlxfw_dev) { - return mlxfw_dev->devlink->dev; + return devlink_to_dev(mlxfw_dev->devlink); } #define MLXFW_PRFX "mlxfw: " diff --git a/include/net/devlink.h b/include/net/devlink.h index a7852a257bf6..ae03eb1c6cc9 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -21,45 +21,7 @@ #include #include -#define DEVLINK_RELOAD_STATS_ARRAY_SIZE \ - (__DEVLINK_RELOAD_LIMIT_MAX * __DEVLINK_RELOAD_ACTION_MAX) - -struct devlink_dev_stats { - u32 reload_stats[DEVLINK_RELOAD_STATS_ARRAY_SIZE]; - u32 remote_reload_stats[DEVLINK_RELOAD_STATS_ARRAY_SIZE]; -}; - -struct devlink_ops; - -struct devlink { - u32 index; - struct list_head port_list; - struct list_head rate_list; - struct list_head sb_list; - struct list_head dpipe_table_list; - struct list_head resource_list; - struct list_head param_list; - struct list_head region_list; - struct list_head reporter_list; - struct mutex reporters_lock; /* protects reporter_list */ - struct devlink_dpipe_headers *dpipe_headers; - struct list_head trap_list; - struct list_head trap_group_list; - struct list_head trap_policer_list; - const struct devlink_ops *ops; - struct xarray snapshot_ids; - struct devlink_dev_stats stats; - struct device *dev; - possible_net_t _net; - struct mutex lock; /* Serializes access to devlink instance specific objects such as - * port, sb, dpipe, resource, params, region, traps and more. - */ - u8 reload_failed:1, - reload_enabled:1; - refcount_t refcount; - struct completion comp; - char priv[0] __aligned(NETDEV_ALIGN); -}; +struct devlink; struct devlink_port_phys_attrs { u32 port_number; /* Same value as "split group". @@ -1520,17 +1482,9 @@ struct devlink_ops { struct netlink_ext_ack *extack); }; -static inline void *devlink_priv(struct devlink *devlink) -{ - BUG_ON(!devlink); - return &devlink->priv; -} - -static inline struct devlink *priv_to_devlink(void *priv) -{ - BUG_ON(!priv); - return container_of(priv, struct devlink, priv); -} +void *devlink_priv(struct devlink *devlink); +struct devlink *priv_to_devlink(void *priv); +struct device *devlink_to_dev(const struct devlink *devlink); static inline struct devlink_port * netdev_to_devlink_port(struct net_device *dev) diff --git a/include/trace/events/devlink.h b/include/trace/events/devlink.h index 44d8e2981065..2814f188d98c 100644 --- a/include/trace/events/devlink.h +++ b/include/trace/events/devlink.h @@ -21,9 +21,9 @@ TRACE_EVENT(devlink_hwmsg, TP_ARGS(devlink, incoming, type, buf, len), TP_STRUCT__entry( - __string(bus_name, devlink->dev->bus->name) - __string(dev_name, dev_name(devlink->dev)) - __string(driver_name, devlink->dev->driver->name) + __string(bus_name, devlink_to_dev(devlink)->bus->name) + __string(dev_name, dev_name(devlink_to_dev(devlink))) + __string(driver_name, devlink_to_dev(devlink)->driver->name) __field(bool, incoming) __field(unsigned long, type) __dynamic_array(u8, buf, len) @@ -31,9 +31,9 @@ TRACE_EVENT(devlink_hwmsg, ), TP_fast_assign( - __assign_str(bus_name, devlink->dev->bus->name); - __assign_str(dev_name, dev_name(devlink->dev)); - __assign_str(driver_name, devlink->dev->driver->name); + __assign_str(bus_name, devlink_to_dev(devlink)->bus->name); + __assign_str(dev_name, dev_name(devlink_to_dev(devlink))); + __assign_str(driver_name, devlink_to_dev(devlink)->driver->name); __entry->incoming = incoming; __entry->type = type; memcpy(__get_dynamic_array(buf), buf, len); @@ -55,17 +55,17 @@ TRACE_EVENT(devlink_hwerr, TP_ARGS(devlink, err, msg), TP_STRUCT__entry( - __string(bus_name, devlink->dev->bus->name) - __string(dev_name, dev_name(devlink->dev)) - __string(driver_name, devlink->dev->driver->name) + __string(bus_name, devlink_to_dev(devlink)->bus->name) + __string(dev_name, dev_name(devlink_to_dev(devlink))) + __string(driver_name, devlink_to_dev(devlink)->driver->name) __field(int, err) __string(msg, msg) ), TP_fast_assign( - __assign_str(bus_name, devlink->dev->bus->name); - __assign_str(dev_name, dev_name(devlink->dev)); - __assign_str(driver_name, devlink->dev->driver->name); + __assign_str(bus_name, devlink_to_dev(devlink)->bus->name); + __assign_str(dev_name, dev_name(devlink_to_dev(devlink))); + __assign_str(driver_name, devlink_to_dev(devlink)->driver->name); __entry->err = err; __assign_str(msg, msg); ), @@ -85,17 +85,17 @@ TRACE_EVENT(devlink_health_report, TP_ARGS(devlink, reporter_name, msg), TP_STRUCT__entry( - __string(bus_name, devlink->dev->bus->name) - __string(dev_name, dev_name(devlink->dev)) - __string(driver_name, devlink->dev->driver->name) + __string(bus_name, devlink_to_dev(devlink)->bus->name) + __string(dev_name, dev_name(devlink_to_dev(devlink))) + __string(driver_name, devlink_to_dev(devlink)->driver->name) __string(reporter_name, msg) __string(msg, msg) ), TP_fast_assign( - __assign_str(bus_name, devlink->dev->bus->name); - __assign_str(dev_name, dev_name(devlink->dev)); - __assign_str(driver_name, devlink->dev->driver->name); + __assign_str(bus_name, devlink_to_dev(devlink)->bus->name); + __assign_str(dev_name, dev_name(devlink_to_dev(devlink))); + __assign_str(driver_name, devlink_to_dev(devlink)->driver->name); __assign_str(reporter_name, reporter_name); __assign_str(msg, msg); ), @@ -116,18 +116,18 @@ TRACE_EVENT(devlink_health_recover_aborted, TP_ARGS(devlink, reporter_name, health_state, time_since_last_recover), TP_STRUCT__entry( - __string(bus_name, devlink->dev->bus->name) - __string(dev_name, dev_name(devlink->dev)) - __string(driver_name, devlink->dev->driver->name) + __string(bus_name, devlink_to_dev(devlink)->bus->name) + __string(dev_name, dev_name(devlink_to_dev(devlink))) + __string(driver_name, devlink_to_dev(devlink)->driver->name) __string(reporter_name, reporter_name) __field(bool, health_state) __field(u64, time_since_last_recover) ), TP_fast_assign( - __assign_str(bus_name, devlink->dev->bus->name); - __assign_str(dev_name, dev_name(devlink->dev)); - __assign_str(driver_name, devlink->dev->driver->name); + __assign_str(bus_name, devlink_to_dev(devlink)->bus->name); + __assign_str(dev_name, dev_name(devlink_to_dev(devlink))); + __assign_str(driver_name, devlink_to_dev(devlink)->driver->name); __assign_str(reporter_name, reporter_name); __entry->health_state = health_state; __entry->time_since_last_recover = time_since_last_recover; @@ -150,17 +150,17 @@ TRACE_EVENT(devlink_health_reporter_state_update, TP_ARGS(devlink, reporter_name, new_state), TP_STRUCT__entry( - __string(bus_name, devlink->dev->bus->name) - __string(dev_name, dev_name(devlink->dev)) - __string(driver_name, devlink->dev->driver->name) + __string(bus_name, devlink_to_dev(devlink)->bus->name) + __string(dev_name, dev_name(devlink_to_dev(devlink))) + __string(driver_name, devlink_to_dev(devlink)->driver->name) __string(reporter_name, reporter_name) __field(u8, new_state) ), TP_fast_assign( - __assign_str(bus_name, devlink->dev->bus->name); - __assign_str(dev_name, dev_name(devlink->dev)); - __assign_str(driver_name, devlink->dev->driver->name); + __assign_str(bus_name, devlink_to_dev(devlink)->bus->name); + __assign_str(dev_name, dev_name(devlink_to_dev(devlink))); + __assign_str(driver_name, devlink_to_dev(devlink)->driver->name); __assign_str(reporter_name, reporter_name); __entry->new_state = new_state; ), @@ -181,9 +181,9 @@ TRACE_EVENT(devlink_trap_report, TP_ARGS(devlink, skb, metadata), TP_STRUCT__entry( - __string(bus_name, devlink->dev->bus->name) - __string(dev_name, dev_name(devlink->dev)) - __string(driver_name, devlink->dev->driver->name) + __string(bus_name, devlink_to_dev(devlink)->bus->name) + __string(dev_name, dev_name(devlink_to_dev(devlink))) + __string(driver_name, devlink_to_dev(devlink)->driver->name) __string(trap_name, metadata->trap_name) __string(trap_group_name, metadata->trap_group_name) __dynamic_array(char, input_dev_name, IFNAMSIZ) @@ -192,9 +192,9 @@ TRACE_EVENT(devlink_trap_report, TP_fast_assign( struct net_device *input_dev = metadata->input_dev; - __assign_str(bus_name, devlink->dev->bus->name); - __assign_str(dev_name, dev_name(devlink->dev)); - __assign_str(driver_name, devlink->dev->driver->name); + __assign_str(bus_name, devlink_to_dev(devlink)->bus->name); + __assign_str(dev_name, dev_name(devlink_to_dev(devlink))); + __assign_str(driver_name, devlink_to_dev(devlink)->driver->name); __assign_str(trap_name, metadata->trap_name); __assign_str(trap_group_name, metadata->trap_group_name); __assign_str(input_dev_name, diff --git a/net/core/devlink.c b/net/core/devlink.c index 4917112406a0..87d1766ebdbf 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -30,6 +30,63 @@ #define CREATE_TRACE_POINTS #include +#define DEVLINK_RELOAD_STATS_ARRAY_SIZE \ + (__DEVLINK_RELOAD_LIMIT_MAX * __DEVLINK_RELOAD_ACTION_MAX) + +struct devlink_dev_stats { + u32 reload_stats[DEVLINK_RELOAD_STATS_ARRAY_SIZE]; + u32 remote_reload_stats[DEVLINK_RELOAD_STATS_ARRAY_SIZE]; +}; + +struct devlink { + u32 index; + struct list_head port_list; + struct list_head rate_list; + struct list_head sb_list; + struct list_head dpipe_table_list; + struct list_head resource_list; + struct list_head param_list; + struct list_head region_list; + struct list_head reporter_list; + struct mutex reporters_lock; /* protects reporter_list */ + struct devlink_dpipe_headers *dpipe_headers; + struct list_head trap_list; + struct list_head trap_group_list; + struct list_head trap_policer_list; + const struct devlink_ops *ops; + struct xarray snapshot_ids; + struct devlink_dev_stats stats; + struct device *dev; + possible_net_t _net; + /* Serializes access to devlink instance specific objects such as + * port, sb, dpipe, resource, params, region, traps and more. + */ + struct mutex lock; + u8 reload_failed:1, + reload_enabled:1; + refcount_t refcount; + struct completion comp; + char priv[0] __aligned(NETDEV_ALIGN); +}; + +void *devlink_priv(struct devlink *devlink) +{ + return &devlink->priv; +} +EXPORT_SYMBOL_GPL(devlink_priv); + +struct devlink *priv_to_devlink(void *priv) +{ + return container_of(priv, struct devlink, priv); +} +EXPORT_SYMBOL_GPL(priv_to_devlink); + +struct device *devlink_to_dev(const struct devlink *devlink) +{ + return devlink->dev; +} +EXPORT_SYMBOL_GPL(devlink_to_dev); + static struct devlink_dpipe_field devlink_dpipe_fields_ethernet[] = { { .name = "destination mac", From 2bc50987dc1f467551f2ca537239016915256f6d Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 12 Oct 2021 16:15:22 +0300 Subject: [PATCH 120/440] devlink: Move netdev_to_devlink helpers to devlink.c Both netdev_to_devlink and netdev_to_devlink_port are used in devlink.c only, so move them in order to reduce their scope. Signed-off-by: Leon Romanovsky Signed-off-by: Jakub Kicinski --- include/net/devlink.h | 17 ----------------- net/core/devlink.c | 18 ++++++++++++++++++ 2 files changed, 18 insertions(+), 17 deletions(-) diff --git a/include/net/devlink.h b/include/net/devlink.h index ae03eb1c6cc9..7f44ad14e5ed 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1486,23 +1486,6 @@ void *devlink_priv(struct devlink *devlink); struct devlink *priv_to_devlink(void *priv); struct device *devlink_to_dev(const struct devlink *devlink); -static inline struct devlink_port * -netdev_to_devlink_port(struct net_device *dev) -{ - if (dev->netdev_ops->ndo_get_devlink_port) - return dev->netdev_ops->ndo_get_devlink_port(dev); - return NULL; -} - -static inline struct devlink *netdev_to_devlink(struct net_device *dev) -{ - struct devlink_port *devlink_port = netdev_to_devlink_port(dev); - - if (devlink_port) - return devlink_port->devlink; - return NULL; -} - struct ib_device; struct net *devlink_net(const struct devlink *devlink); diff --git a/net/core/devlink.c b/net/core/devlink.c index 87d1766ebdbf..e9e908c4cfb4 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -11381,6 +11381,24 @@ free_msg: nlmsg_free(msg); } +static struct devlink_port *netdev_to_devlink_port(struct net_device *dev) +{ + if (!dev->netdev_ops->ndo_get_devlink_port) + return NULL; + + return dev->netdev_ops->ndo_get_devlink_port(dev); +} + +static struct devlink *netdev_to_devlink(struct net_device *dev) +{ + struct devlink_port *devlink_port = netdev_to_devlink_port(dev); + + if (!devlink_port) + return NULL; + + return devlink_port->devlink; +} + void devlink_compat_running_version(struct net_device *dev, char *buf, size_t len) { From b88f7b1203bf481af58291c28957aaab09503f43 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 12 Oct 2021 16:15:23 +0300 Subject: [PATCH 121/440] devlink: Annotate devlink API calls Initial annotation patch to separate calls that needs to be executed before or after devlink_register(). Signed-off-by: Leon Romanovsky Signed-off-by: Jakub Kicinski --- net/core/devlink.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/net/core/devlink.c b/net/core/devlink.c index e9e908c4cfb4..276e1e421eb4 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -152,6 +152,22 @@ static const struct nla_policy devlink_function_nl_policy[DEVLINK_PORT_FUNCTION_ static DEFINE_XARRAY_FLAGS(devlinks, XA_FLAGS_ALLOC); #define DEVLINK_REGISTERED XA_MARK_1 +/* devlink instances are open to the access from the user space after + * devlink_register() call. Such logical barrier allows us to have certain + * expectations related to locking. + * + * Before *_register() - we are in initialization stage and no parallel + * access possible to the devlink instance. All drivers perform that phase + * by implicitly holding device_lock. + * + * After *_register() - users and driver can access devlink instance at + * the same time. + */ +#define ASSERT_DEVLINK_REGISTERED(d) \ + WARN_ON_ONCE(!xa_get_mark(&devlinks, (d)->index, DEVLINK_REGISTERED)) +#define ASSERT_DEVLINK_NOT_REGISTERED(d) \ + WARN_ON_ONCE(xa_get_mark(&devlinks, (d)->index, DEVLINK_REGISTERED)) + /* devlink_mutex * * An overall lock guarding every operation coming from userspace. @@ -9113,6 +9129,10 @@ static void devlink_notify_unregister(struct devlink *devlink) */ void devlink_register(struct devlink *devlink) { + ASSERT_DEVLINK_NOT_REGISTERED(devlink); + /* Make sure that we are in .probe() routine */ + device_lock_assert(devlink->dev); + mutex_lock(&devlink_mutex); xa_set_mark(&devlinks, devlink->index, DEVLINK_REGISTERED); devlink_notify_register(devlink); @@ -9127,6 +9147,10 @@ EXPORT_SYMBOL_GPL(devlink_register); */ void devlink_unregister(struct devlink *devlink) { + ASSERT_DEVLINK_REGISTERED(devlink); + /* Make sure that we are in .remove() routine */ + device_lock_assert(devlink->dev); + devlink_put(devlink); wait_for_completion(&devlink->comp); @@ -9181,6 +9205,8 @@ EXPORT_SYMBOL_GPL(devlink_reload_disable); */ void devlink_free(struct devlink *devlink) { + ASSERT_DEVLINK_NOT_REGISTERED(devlink); + mutex_destroy(&devlink->reporters_lock); mutex_destroy(&devlink->lock); WARN_ON(!list_empty(&devlink->trap_policer_list)); From bd032e35c568ff7cfa347aa4ddd6491e2453f00c Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 12 Oct 2021 16:15:24 +0300 Subject: [PATCH 122/440] devlink: Allow control devlink ops behavior through feature mask Introduce new devlink call to set feature mask to control devlink behavior during device initialization phase after devlink_alloc() is already called. This allows us to set reload ops based on device property which is not known at the beginning of driver initialization. For the sake of simplicity, this API lacks any type of locking and needs to be called before devlink_register() to make sure that no parallel access to the ops is possible at this stage. Signed-off-by: Leon Romanovsky Signed-off-by: Jakub Kicinski --- .../hisilicon/hns3/hns3pf/hclge_devlink.c | 1 + .../hisilicon/hns3/hns3vf/hclgevf_devlink.c | 1 + drivers/net/ethernet/mellanox/mlx4/main.c | 1 + .../net/ethernet/mellanox/mlx5/core/devlink.c | 1 + drivers/net/ethernet/mellanox/mlxsw/core.c | 1 + drivers/net/netdevsim/dev.c | 1 + include/net/devlink.h | 6 +++++ net/core/devlink.c | 24 +++++++++++++++++-- 8 files changed, 34 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.c index 59b0ae7d59e0..438fe62fea4d 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.c @@ -119,6 +119,7 @@ int hclge_devlink_init(struct hclge_dev *hdev) priv->hdev = hdev; hdev->devlink = devlink; + devlink_set_features(devlink, DEVLINK_F_RELOAD); devlink_register(devlink); devlink_reload_enable(devlink); return 0; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_devlink.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_devlink.c index d60cc9426f70..519f4108422e 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_devlink.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_devlink.c @@ -121,6 +121,7 @@ int hclgevf_devlink_init(struct hclgevf_dev *hdev) priv->hdev = hdev; hdev->devlink = devlink; + devlink_set_features(devlink, DEVLINK_F_RELOAD); devlink_register(devlink); devlink_reload_enable(devlink); return 0; diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 9541f3a920c8..05e850a40b36 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -4025,6 +4025,7 @@ static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id) goto err_params_unregister; pci_save_state(pdev); + devlink_set_features(devlink, DEVLINK_F_RELOAD); devlink_register(devlink); devlink_reload_enable(devlink); return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index b9a6cea03951..fa98b7b95990 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -813,6 +813,7 @@ int mlx5_devlink_register(struct devlink *devlink) if (err) goto traps_reg_err; + devlink_set_features(devlink, DEVLINK_F_RELOAD); return 0; traps_reg_err: diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index 9e831e8b607a..0f1567149a54 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -2008,6 +2008,7 @@ __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, } if (!reload) { + devlink_set_features(devlink, DEVLINK_F_RELOAD); devlink_register(devlink); devlink_reload_enable(devlink); } diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index cb6645012a30..520c019dbad5 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -1511,6 +1511,7 @@ int nsim_dev_probe(struct nsim_bus_dev *nsim_bus_dev) goto err_psample_exit; nsim_dev->esw_mode = DEVLINK_ESWITCH_MODE_LEGACY; + devlink_set_features(devlink, DEVLINK_F_RELOAD); devlink_register(devlink); devlink_reload_enable(devlink); return 0; diff --git a/include/net/devlink.h b/include/net/devlink.h index 7f44ad14e5ed..287f18b88293 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1186,6 +1186,11 @@ enum devlink_trap_group_generic_id { .min_burst = _min_burst, \ } +enum { + /* device supports reload operations */ + DEVLINK_F_RELOAD = 1UL << 0, +}; + struct devlink_ops { /** * @supported_flash_update_params: @@ -1503,6 +1508,7 @@ static inline struct devlink *devlink_alloc(const struct devlink_ops *ops, { return devlink_alloc_ns(ops, priv_size, &init_net, dev); } +void devlink_set_features(struct devlink *devlink, u64 features); void devlink_register(struct devlink *devlink); void devlink_unregister(struct devlink *devlink); void devlink_reload_enable(struct devlink *devlink); diff --git a/net/core/devlink.c b/net/core/devlink.c index 276e1e421eb4..1931caa0ce1e 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -54,6 +54,7 @@ struct devlink { struct list_head trap_group_list; struct list_head trap_policer_list; const struct devlink_ops *ops; + u64 features; struct xarray snapshot_ids; struct devlink_dev_stats stats; struct device *dev; @@ -4032,7 +4033,7 @@ static int devlink_reload(struct devlink *devlink, struct net *dest_net, struct net *curr_net; int err; - if (!devlink->reload_enabled) + if (!devlink->reload_enabled || !(devlink->features & DEVLINK_F_RELOAD)) return -EOPNOTSUPP; memcpy(remote_reload_stats, devlink->stats.remote_reload_stats, @@ -8985,6 +8986,25 @@ static bool devlink_reload_actions_valid(const struct devlink_ops *ops) return true; } +/** + * devlink_set_features - Set devlink supported features + * + * @devlink: devlink + * @features: devlink support features + * + * This interface allows us to set reload ops separatelly from + * the devlink_alloc. + */ +void devlink_set_features(struct devlink *devlink, u64 features) +{ + ASSERT_DEVLINK_NOT_REGISTERED(devlink); + + WARN_ON(features & DEVLINK_F_RELOAD && + !devlink_reload_supported(devlink->ops)); + devlink->features = features; +} +EXPORT_SYMBOL_GPL(devlink_set_features); + /** * devlink_alloc_ns - Allocate new devlink instance resources * in specific namespace @@ -9155,7 +9175,7 @@ void devlink_unregister(struct devlink *devlink) wait_for_completion(&devlink->comp); mutex_lock(&devlink_mutex); - WARN_ON(devlink_reload_supported(devlink->ops) && + WARN_ON(devlink->features & DEVLINK_F_RELOAD && devlink->reload_enabled); devlink_notify_unregister(devlink); xa_clear_mark(&devlinks, devlink->index, DEVLINK_REGISTERED); From 96869f193cfd26c2b47db32e4d8bcad50461df7a Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 12 Oct 2021 16:15:25 +0300 Subject: [PATCH 123/440] net/mlx5: Set devlink reload feature bit for supported devices only Mulitport slave device doesn't support devlink reload, so instead of complicating initialization flow with devlink_reload_enable() which will be removed in next patch, don't set DEVLINK_F_RELOAD feature bit for such devices. This fixes an error when reload counters exposed (and equal zero) for the mode that is not supported at all. Fixes: d89ddaae1766 ("net/mlx5: Disable devlink reload for multi port slave device") Signed-off-by: Leon Romanovsky Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/devlink.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index fa98b7b95990..a85341a41cd0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -796,6 +796,7 @@ static void mlx5_devlink_traps_unregister(struct devlink *devlink) int mlx5_devlink_register(struct devlink *devlink) { + struct mlx5_core_dev *dev = devlink_priv(devlink); int err; err = devlink_params_register(devlink, mlx5_devlink_params, @@ -813,7 +814,9 @@ int mlx5_devlink_register(struct devlink *devlink) if (err) goto traps_reg_err; - devlink_set_features(devlink, DEVLINK_F_RELOAD); + if (!mlx5_core_is_mp_slave(dev)) + devlink_set_features(devlink, DEVLINK_F_RELOAD); + return 0; traps_reg_err: From 82465bec3e9713c395b0a02f5aaf7949443cb877 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 12 Oct 2021 16:15:26 +0300 Subject: [PATCH 124/440] devlink: Delete reload enable/disable interface Commit a0c76345e3d3 ("devlink: disallow reload operation during device cleanup") added devlink_reload_{enable,disable}() APIs to prevent reload operation from racing with device probe/dismantle. After recent changes to move devlink_register() to the end of device probe and devlink_unregister() to the beginning of device dismantle, these races can no longer happen. Reload operations will be denied if the devlink instance is unregistered and devlink_unregister() will block until all in-flight operations are done. Therefore, remove these devlink_reload_{enable,disable}() APIs. Signed-off-by: Leon Romanovsky Signed-off-by: Jakub Kicinski --- .../hisilicon/hns3/hns3pf/hclge_devlink.c | 3 -- .../hisilicon/hns3/hns3vf/hclgevf_devlink.c | 3 -- drivers/net/ethernet/mellanox/mlx4/main.c | 2 - .../net/ethernet/mellanox/mlx5/core/main.c | 3 -- .../mellanox/mlx5/core/sf/dev/driver.c | 5 +- drivers/net/ethernet/mellanox/mlxsw/core.c | 7 +-- drivers/net/netdevsim/dev.c | 3 -- include/net/devlink.h | 2 - net/core/devlink.c | 47 ++----------------- 9 files changed, 6 insertions(+), 69 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.c index 438fe62fea4d..4c441e6a5082 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.c @@ -121,7 +121,6 @@ int hclge_devlink_init(struct hclge_dev *hdev) devlink_set_features(devlink, DEVLINK_F_RELOAD); devlink_register(devlink); - devlink_reload_enable(devlink); return 0; } @@ -129,8 +128,6 @@ void hclge_devlink_uninit(struct hclge_dev *hdev) { struct devlink *devlink = hdev->devlink; - devlink_reload_disable(devlink); - devlink_unregister(devlink); devlink_free(devlink); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_devlink.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_devlink.c index 519f4108422e..fdc19868b818 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_devlink.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_devlink.c @@ -123,7 +123,6 @@ int hclgevf_devlink_init(struct hclgevf_dev *hdev) devlink_set_features(devlink, DEVLINK_F_RELOAD); devlink_register(devlink); - devlink_reload_enable(devlink); return 0; } @@ -131,8 +130,6 @@ void hclgevf_devlink_uninit(struct hclgevf_dev *hdev) { struct devlink *devlink = hdev->devlink; - devlink_reload_disable(devlink); - devlink_unregister(devlink); devlink_free(devlink); diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 05e850a40b36..b187c210d4d6 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -4027,7 +4027,6 @@ static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id) pci_save_state(pdev); devlink_set_features(devlink, DEVLINK_F_RELOAD); devlink_register(devlink); - devlink_reload_enable(devlink); return 0; err_params_unregister: @@ -4136,7 +4135,6 @@ static void mlx4_remove_one(struct pci_dev *pdev) struct devlink *devlink = priv_to_devlink(priv); int active_vfs = 0; - devlink_reload_disable(devlink); devlink_unregister(devlink); if (mlx4_is_slave(dev)) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 5893fdd5aedb..65313448a47c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1538,8 +1538,6 @@ static int probe_one(struct pci_dev *pdev, const struct pci_device_id *id) pci_save_state(pdev); devlink_register(devlink); - if (!mlx5_core_is_mp_slave(dev)) - devlink_reload_enable(devlink); return 0; err_init_one: @@ -1559,7 +1557,6 @@ static void remove_one(struct pci_dev *pdev) struct mlx5_core_dev *dev = pci_get_drvdata(pdev); struct devlink *devlink = priv_to_devlink(dev); - devlink_reload_disable(devlink); devlink_unregister(devlink); mlx5_crdump_disable(dev); mlx5_drain_health_wq(dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c index 3cf272fa2164..7b4783ce213e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c @@ -47,7 +47,6 @@ static int mlx5_sf_dev_probe(struct auxiliary_device *adev, const struct auxilia goto init_one_err; } devlink_register(devlink); - devlink_reload_enable(devlink); return 0; init_one_err: @@ -62,10 +61,8 @@ mdev_err: static void mlx5_sf_dev_remove(struct auxiliary_device *adev) { struct mlx5_sf_dev *sf_dev = container_of(adev, struct mlx5_sf_dev, adev); - struct devlink *devlink; + struct devlink *devlink = priv_to_devlink(sf_dev->mdev); - devlink = priv_to_devlink(sf_dev->mdev); - devlink_reload_disable(devlink); devlink_unregister(devlink); mlx5_uninit_one(sf_dev->mdev); iounmap(sf_dev->mdev->iseg); diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index 0f1567149a54..3fd3812b8f31 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -2010,9 +2010,7 @@ __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, if (!reload) { devlink_set_features(devlink, DEVLINK_F_RELOAD); devlink_register(devlink); - devlink_reload_enable(devlink); } - return 0; err_driver_init: @@ -2076,10 +2074,9 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core, { struct devlink *devlink = priv_to_devlink(mlxsw_core); - if (!reload) { - devlink_reload_disable(devlink); + if (!reload) devlink_unregister(devlink); - } + if (devlink_is_reload_failed(devlink)) { if (!reload) /* Only the parts that were not de-initialized in the diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index 520c019dbad5..9661aca35703 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -1513,7 +1513,6 @@ int nsim_dev_probe(struct nsim_bus_dev *nsim_bus_dev) nsim_dev->esw_mode = DEVLINK_ESWITCH_MODE_LEGACY; devlink_set_features(devlink, DEVLINK_F_RELOAD); devlink_register(devlink); - devlink_reload_enable(devlink); return 0; err_psample_exit: @@ -1567,9 +1566,7 @@ void nsim_dev_remove(struct nsim_bus_dev *nsim_bus_dev) struct nsim_dev *nsim_dev = dev_get_drvdata(&nsim_bus_dev->dev); struct devlink *devlink = priv_to_devlink(nsim_dev); - devlink_reload_disable(devlink); devlink_unregister(devlink); - nsim_dev_reload_destroy(nsim_dev); nsim_bpf_dev_exit(nsim_dev); diff --git a/include/net/devlink.h b/include/net/devlink.h index 287f18b88293..da3ceeb8b87b 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1511,8 +1511,6 @@ static inline struct devlink *devlink_alloc(const struct devlink_ops *ops, void devlink_set_features(struct devlink *devlink, u64 features); void devlink_register(struct devlink *devlink); void devlink_unregister(struct devlink *devlink); -void devlink_reload_enable(struct devlink *devlink); -void devlink_reload_disable(struct devlink *devlink); void devlink_free(struct devlink *devlink); int devlink_port_register(struct devlink *devlink, struct devlink_port *devlink_port, diff --git a/net/core/devlink.c b/net/core/devlink.c index 1931caa0ce1e..3ce6147a2fe8 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -63,8 +63,7 @@ struct devlink { * port, sb, dpipe, resource, params, region, traps and more. */ struct mutex lock; - u8 reload_failed:1, - reload_enabled:1; + u8 reload_failed:1; refcount_t refcount; struct completion comp; char priv[0] __aligned(NETDEV_ALIGN); @@ -4033,9 +4032,6 @@ static int devlink_reload(struct devlink *devlink, struct net *dest_net, struct net *curr_net; int err; - if (!devlink->reload_enabled || !(devlink->features & DEVLINK_F_RELOAD)) - return -EOPNOTSUPP; - memcpy(remote_reload_stats, devlink->stats.remote_reload_stats, sizeof(remote_reload_stats)); @@ -4103,7 +4099,7 @@ static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info) u32 actions_performed; int err; - if (!devlink_reload_supported(devlink->ops)) + if (!(devlink->features & DEVLINK_F_RELOAD)) return -EOPNOTSUPP; err = devlink_resources_validate(devlink, NULL, info); @@ -9175,49 +9171,12 @@ void devlink_unregister(struct devlink *devlink) wait_for_completion(&devlink->comp); mutex_lock(&devlink_mutex); - WARN_ON(devlink->features & DEVLINK_F_RELOAD && - devlink->reload_enabled); devlink_notify_unregister(devlink); xa_clear_mark(&devlinks, devlink->index, DEVLINK_REGISTERED); mutex_unlock(&devlink_mutex); } EXPORT_SYMBOL_GPL(devlink_unregister); -/** - * devlink_reload_enable - Enable reload of devlink instance - * - * @devlink: devlink - * - * Should be called at end of device initialization - * process when reload operation is supported. - */ -void devlink_reload_enable(struct devlink *devlink) -{ - mutex_lock(&devlink_mutex); - devlink->reload_enabled = true; - mutex_unlock(&devlink_mutex); -} -EXPORT_SYMBOL_GPL(devlink_reload_enable); - -/** - * devlink_reload_disable - Disable reload of devlink instance - * - * @devlink: devlink - * - * Should be called at the beginning of device cleanup - * process when reload operation is supported. - */ -void devlink_reload_disable(struct devlink *devlink) -{ - mutex_lock(&devlink_mutex); - /* Mutex is taken which ensures that no reload operation is in - * progress while setting up forbidded flag. - */ - devlink->reload_enabled = false; - mutex_unlock(&devlink_mutex); -} -EXPORT_SYMBOL_GPL(devlink_reload_disable); - /** * devlink_free - Free devlink instance resources * @@ -11554,7 +11513,7 @@ static void __net_exit devlink_pernet_pre_exit(struct net *net) if (!net_eq(devlink_net(devlink), net)) goto retry; - WARN_ON(!devlink_reload_supported(devlink->ops)); + WARN_ON(!(devlink->features & DEVLINK_F_RELOAD)); err = devlink_reload(devlink, &init_net, DEVLINK_RELOAD_ACTION_DRIVER_REINIT, DEVLINK_RELOAD_LIMIT_UNSPEC, From d1f24712a86abd04d82cf4b00fb4ab8ff2d23c8a Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Tue, 12 Oct 2021 16:15:20 -0700 Subject: [PATCH 125/440] ionic: no devlink_unregister if not registered Don't try to unregister the devlink if it hasn't been registered yet. This bit of error cleanup code got missed in the recent devlink registration changes. Fixes: 7911c8bd546f ("ionic: Move devlink registration to be last devlink command") Signed-off-by: Shannon Nelson Reviewed-by: Leon Romanovsky Link: https://lore.kernel.org/r/20211012231520.72582-1-snelson@pensando.io Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/pensando/ionic/ionic_devlink.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_devlink.c b/drivers/net/ethernet/pensando/ionic/ionic_devlink.c index 2267da95640b..4297ed9024c0 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_devlink.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_devlink.c @@ -87,7 +87,6 @@ int ionic_devlink_register(struct ionic *ionic) err = devlink_port_register(dl, &ionic->dl_port, 0); if (err) { dev_err(ionic->dev, "devlink_port_register failed: %d\n", err); - devlink_unregister(dl); return err; } From edce2a93dd781ac13bd7f651ee0b5edb8af6c0e4 Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Tue, 12 Oct 2021 15:13:58 +0300 Subject: [PATCH 126/440] net: enetc: include ip6_checksum.h for csum_ipv6_magic For those architectures which do not define_HAVE_ARCH_IPV6_CSUM, we need to include ip6_checksum.h which provides the csum_ipv6_magic() function. Fixes: fb8629e2cbfc ("net: enetc: add support for software TSO") Reported-by: kernel test robot Signed-off-by: Ioana Ciornei Reviewed-by: Vladimir Oltean Link: https://lore.kernel.org/r/20211012121358.16641-1-ioana.ciornei@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/enetc/enetc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index 09193b478ab3..3ae4f49a9055 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include From 50515cac8d0ea2026db2f0cb456bfa51d5e8e85f Mon Sep 17 00:00:00 2001 From: Jean Sacren Date: Tue, 12 Oct 2021 01:46:45 -0600 Subject: [PATCH 127/440] net: qed_debug: fix check of false (grc_param < 0) expression The type of enum dbg_grc_params has the enumerator list starting from 0. When grc_param is declared by enum dbg_grc_params, (grc_param < 0) is always false. We should remove the check of this expression. Signed-off-by: Jean Sacren Acked-by: Shai Malin Link: https://lore.kernel.org/r/20211012074645.12864-1-sakiwit@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/qlogic/qed/qed_debug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_debug.c b/drivers/net/ethernet/qlogic/qed/qed_debug.c index f6198b9a1b02..e3edca187ddf 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_debug.c +++ b/drivers/net/ethernet/qlogic/qed/qed_debug.c @@ -5256,7 +5256,7 @@ enum dbg_status qed_dbg_grc_config(struct qed_hwfn *p_hwfn, */ qed_dbg_grc_init_params(p_hwfn); - if (grc_param >= MAX_DBG_GRC_PARAMS || grc_param < 0) + if (grc_param >= MAX_DBG_GRC_PARAMS) return DBG_STATUS_INVALID_ARGS; if (val < s_grc_param_defs[grc_param].min || val > s_grc_param_defs[grc_param].max) From 23144a915684b645529b0a51cbbfd5eae727ced7 Mon Sep 17 00:00:00 2001 From: Biju Das Date: Tue, 12 Oct 2021 17:36:00 +0100 Subject: [PATCH 128/440] ravb: Use ALIGN macro for max_rx_len Use ALIGN macro for calculating the value for max_rx_len. Signed-off-by: Biju Das Suggested-by: Sergey Shtylyov Reviewed-by: Sergey Shtylyov Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/renesas/ravb_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 50038e76c72f..0e3cda1140e5 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -2227,7 +2227,7 @@ static const struct ravb_hw_info gbeth_hw_info = { .set_feature = ravb_set_features_gbeth, .dmac_init = ravb_dmac_init_gbeth, .emac_init = ravb_emac_init_gbeth, - .max_rx_len = GBETH_RX_BUFF_MAX + RAVB_ALIGN - 1, + .max_rx_len = ALIGN(GBETH_RX_BUFF_MAX, RAVB_ALIGN), .tsrq = TCCR_TSRQ0, .aligned_tx = 1, .tx_counters = 1, From 2e95e08ac009273184c259848851d495258448c2 Mon Sep 17 00:00:00 2001 From: Biju Das Date: Tue, 12 Oct 2021 17:36:01 +0100 Subject: [PATCH 129/440] ravb: Add rx_max_buf_size to struct ravb_hw_info R-Car AVB-DMAC has maximum 2K size on RX buffer, whereas on RZ/G2L it is 8K. We need to allow for changing the MTU within the limit of the maximum size of a descriptor. Add a rx_max_buf_size variable to struct ravb_hw_info to handle this difference. Signed-off-by: Biju Das Reviewed-by: Lad Prabhakar Reviewed-by: Sergey Shtylyov Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/renesas/ravb.h | 1 + drivers/net/ethernet/renesas/ravb_main.c | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/renesas/ravb.h b/drivers/net/ethernet/renesas/ravb.h index 5dc1324786e0..b147c4a0dc0b 100644 --- a/drivers/net/ethernet/renesas/ravb.h +++ b/drivers/net/ethernet/renesas/ravb.h @@ -1010,6 +1010,7 @@ struct ravb_hw_info { int stats_len; size_t max_rx_len; u32 tsrq; + u32 rx_max_buf_size; unsigned aligned_tx: 1; /* hardware features */ diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 0e3cda1140e5..fe3449fb4135 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -2188,6 +2188,7 @@ static const struct ravb_hw_info ravb_gen3_hw_info = { .stats_len = ARRAY_SIZE(ravb_gstrings_stats), .max_rx_len = RX_BUF_SZ + RAVB_ALIGN - 1, .tsrq = TCCR_TSRQ0 | TCCR_TSRQ1 | TCCR_TSRQ2 | TCCR_TSRQ3, + .rx_max_buf_size = SZ_2K, .internal_delay = 1, .tx_counters = 1, .multi_irqs = 1, @@ -2212,6 +2213,7 @@ static const struct ravb_hw_info ravb_gen2_hw_info = { .stats_len = ARRAY_SIZE(ravb_gstrings_stats), .max_rx_len = RX_BUF_SZ + RAVB_ALIGN - 1, .tsrq = TCCR_TSRQ0 | TCCR_TSRQ1 | TCCR_TSRQ2 | TCCR_TSRQ3, + .rx_max_buf_size = SZ_2K, .aligned_tx = 1, .gptp = 1, .nc_queue = 1, @@ -2229,6 +2231,7 @@ static const struct ravb_hw_info gbeth_hw_info = { .emac_init = ravb_emac_init_gbeth, .max_rx_len = ALIGN(GBETH_RX_BUFF_MAX, RAVB_ALIGN), .tsrq = TCCR_TSRQ0, + .rx_max_buf_size = SZ_8K, .aligned_tx = 1, .tx_counters = 1, .half_duplex = 1, @@ -2452,7 +2455,7 @@ static int ravb_probe(struct platform_device *pdev) } clk_prepare_enable(priv->refclk); - ndev->max_mtu = 2048 - (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN); + ndev->max_mtu = info->rx_max_buf_size - (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN); ndev->min_mtu = ETH_MIN_MTU; /* FIXME: R-Car Gen2 has 4byte alignment restriction for tx buffer From 3d4e37df882b0f4f28b7223a42492650b71252c5 Mon Sep 17 00:00:00 2001 From: Biju Das Date: Tue, 12 Oct 2021 17:36:02 +0100 Subject: [PATCH 130/440] ravb: Fillup ravb_alloc_rx_desc_gbeth() stub Fillup ravb_alloc_rx_desc_gbeth() function to support RZ/G2L. This patch also renames ravb_alloc_rx_desc to ravb_alloc_rx_desc_rcar to be consistent with the naming convention used in sh_eth driver. Signed-off-by: Biju Das Reviewed-by: Lad Prabhakar Reviewed-by: Sergey Shtylyov Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/renesas/ravb.h | 1 + drivers/net/ethernet/renesas/ravb_main.c | 30 ++++++++++++++++++------ 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/renesas/ravb.h b/drivers/net/ethernet/renesas/ravb.h index b147c4a0dc0b..ed771ead61b9 100644 --- a/drivers/net/ethernet/renesas/ravb.h +++ b/drivers/net/ethernet/renesas/ravb.h @@ -1038,6 +1038,7 @@ struct ravb_private { struct ravb_desc *desc_bat; dma_addr_t rx_desc_dma[NUM_RX_QUEUE]; dma_addr_t tx_desc_dma[NUM_TX_QUEUE]; + struct ravb_rx_desc *gbeth_rx_ring; struct ravb_ex_rx_desc *rx_ring[NUM_RX_QUEUE]; struct ravb_tx_desc *tx_ring[NUM_TX_QUEUE]; void *tx_align[NUM_TX_QUEUE]; diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index fe3449fb4135..c6bab0dfd3bc 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -385,11 +385,18 @@ static void ravb_ring_format(struct net_device *ndev, int q) static void *ravb_alloc_rx_desc_gbeth(struct net_device *ndev, int q) { - /* Place holder */ - return NULL; + struct ravb_private *priv = netdev_priv(ndev); + unsigned int ring_size; + + ring_size = sizeof(struct ravb_rx_desc) * (priv->num_rx_ring[q] + 1); + + priv->gbeth_rx_ring = dma_alloc_coherent(ndev->dev.parent, ring_size, + &priv->rx_desc_dma[q], + GFP_KERNEL); + return priv->gbeth_rx_ring; } -static void *ravb_alloc_rx_desc(struct net_device *ndev, int q) +static void *ravb_alloc_rx_desc_rcar(struct net_device *ndev, int q) { struct ravb_private *priv = netdev_priv(ndev); unsigned int ring_size; @@ -1084,16 +1091,25 @@ static int ravb_poll(struct napi_struct *napi, int budget) struct net_device *ndev = napi->dev; struct ravb_private *priv = netdev_priv(ndev); const struct ravb_hw_info *info = priv->info; + bool gptp = info->gptp || info->ccc_gac; + struct ravb_rx_desc *desc; unsigned long flags; int q = napi - priv->napi; int mask = BIT(q); int quota = budget; + unsigned int entry; + if (!gptp) { + entry = priv->cur_rx[q] % priv->num_rx_ring[q]; + desc = &priv->gbeth_rx_ring[entry]; + } /* Processing RX Descriptor Ring */ /* Clear RX interrupt */ ravb_write(ndev, ~(mask | RIS0_RESERVED), RIS0); - if (ravb_rx(ndev, "a, q)) - goto out; + if (gptp || desc->die_dt != DT_FEMPTY) { + if (ravb_rx(ndev, "a, q)) + goto out; + } /* Processing TX Descriptor Ring */ spin_lock_irqsave(&priv->lock, flags); @@ -2175,7 +2191,7 @@ static int ravb_mdio_release(struct ravb_private *priv) static const struct ravb_hw_info ravb_gen3_hw_info = { .rx_ring_free = ravb_rx_ring_free, .rx_ring_format = ravb_rx_ring_format, - .alloc_rx_desc = ravb_alloc_rx_desc, + .alloc_rx_desc = ravb_alloc_rx_desc_rcar, .receive = ravb_rcar_rx, .set_rate = ravb_set_rate_rcar, .set_feature = ravb_set_features_rcar, @@ -2200,7 +2216,7 @@ static const struct ravb_hw_info ravb_gen3_hw_info = { static const struct ravb_hw_info ravb_gen2_hw_info = { .rx_ring_free = ravb_rx_ring_free, .rx_ring_format = ravb_rx_ring_format, - .alloc_rx_desc = ravb_alloc_rx_desc, + .alloc_rx_desc = ravb_alloc_rx_desc_rcar, .receive = ravb_rcar_rx, .set_rate = ravb_set_rate_rcar, .set_feature = ravb_set_features_rcar, From 2458b8edb887108b8a6aae567b01e5e2a73bd8d8 Mon Sep 17 00:00:00 2001 From: Biju Das Date: Tue, 12 Oct 2021 17:36:03 +0100 Subject: [PATCH 131/440] ravb: Fillup ravb_rx_ring_free_gbeth() stub Fillup ravb_rx_ring_free_gbeth() function to support RZ/G2L. This patch also renames ravb_rx_ring_free to ravb_rx_ring_free_rcar to be consistent with the naming convention used in sh_eth driver. Signed-off-by: Biju Das Reviewed-by: Lad Prabhakar Reviewed-by: Sergey Shtylyov Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/renesas/ravb_main.c | 28 ++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index c6bab0dfd3bc..1ea25a8d208e 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -236,10 +236,30 @@ static int ravb_tx_free(struct net_device *ndev, int q, bool free_txed_only) static void ravb_rx_ring_free_gbeth(struct net_device *ndev, int q) { - /* Place holder */ + struct ravb_private *priv = netdev_priv(ndev); + unsigned int ring_size; + unsigned int i; + + if (!priv->gbeth_rx_ring) + return; + + for (i = 0; i < priv->num_rx_ring[q]; i++) { + struct ravb_rx_desc *desc = &priv->gbeth_rx_ring[i]; + + if (!dma_mapping_error(ndev->dev.parent, + le32_to_cpu(desc->dptr))) + dma_unmap_single(ndev->dev.parent, + le32_to_cpu(desc->dptr), + GBETH_RX_BUFF_MAX, + DMA_FROM_DEVICE); + } + ring_size = sizeof(struct ravb_rx_desc) * (priv->num_rx_ring[q] + 1); + dma_free_coherent(ndev->dev.parent, ring_size, priv->gbeth_rx_ring, + priv->rx_desc_dma[q]); + priv->gbeth_rx_ring = NULL; } -static void ravb_rx_ring_free(struct net_device *ndev, int q) +static void ravb_rx_ring_free_rcar(struct net_device *ndev, int q) { struct ravb_private *priv = netdev_priv(ndev); unsigned int ring_size; @@ -2189,7 +2209,7 @@ static int ravb_mdio_release(struct ravb_private *priv) } static const struct ravb_hw_info ravb_gen3_hw_info = { - .rx_ring_free = ravb_rx_ring_free, + .rx_ring_free = ravb_rx_ring_free_rcar, .rx_ring_format = ravb_rx_ring_format, .alloc_rx_desc = ravb_alloc_rx_desc_rcar, .receive = ravb_rcar_rx, @@ -2214,7 +2234,7 @@ static const struct ravb_hw_info ravb_gen3_hw_info = { }; static const struct ravb_hw_info ravb_gen2_hw_info = { - .rx_ring_free = ravb_rx_ring_free, + .rx_ring_free = ravb_rx_ring_free_rcar, .rx_ring_format = ravb_rx_ring_format, .alloc_rx_desc = ravb_alloc_rx_desc_rcar, .receive = ravb_rcar_rx, From 16a6e245a9f3cd661f3824940bb62b0ab4503d53 Mon Sep 17 00:00:00 2001 From: Biju Das Date: Tue, 12 Oct 2021 17:36:04 +0100 Subject: [PATCH 132/440] ravb: Fillup ravb_rx_ring_format_gbeth() stub Fillup ravb_rx_ring_format_gbeth() function to support RZ/G2L. This patch also renames ravb_rx_ring_format to ravb_rx_ring_format_rcar to be consistent with the naming convention used in sh_eth driver. Signed-off-by: Biju Das Reviewed-by: Lad Prabhakar Reviewed-by: Sergey Shtylyov Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/renesas/ravb.h | 1 + drivers/net/ethernet/renesas/ravb_main.c | 34 +++++++++++++++++++++--- 2 files changed, 31 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/renesas/ravb.h b/drivers/net/ethernet/renesas/ravb.h index ed771ead61b9..e9de3f8306ce 100644 --- a/drivers/net/ethernet/renesas/ravb.h +++ b/drivers/net/ethernet/renesas/ravb.h @@ -970,6 +970,7 @@ enum CXR31_BIT { #define RX_BUF_SZ (2048 - ETH_FCS_LEN + sizeof(__sum16)) #define GBETH_RX_BUFF_MAX 8192 +#define GBETH_RX_DESC_DATA_SIZE 4080 struct ravb_tstamp_skb { struct list_head list; diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 1ea25a8d208e..f7ce9b6e596f 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -327,10 +327,36 @@ static void ravb_ring_free(struct net_device *ndev, int q) static void ravb_rx_ring_format_gbeth(struct net_device *ndev, int q) { - /* Place holder */ + struct ravb_private *priv = netdev_priv(ndev); + struct ravb_rx_desc *rx_desc; + unsigned int rx_ring_size; + dma_addr_t dma_addr; + unsigned int i; + + rx_ring_size = sizeof(*rx_desc) * priv->num_rx_ring[q]; + memset(priv->gbeth_rx_ring, 0, rx_ring_size); + /* Build RX ring buffer */ + for (i = 0; i < priv->num_rx_ring[q]; i++) { + /* RX descriptor */ + rx_desc = &priv->gbeth_rx_ring[i]; + rx_desc->ds_cc = cpu_to_le16(GBETH_RX_DESC_DATA_SIZE); + dma_addr = dma_map_single(ndev->dev.parent, priv->rx_skb[q][i]->data, + GBETH_RX_BUFF_MAX, + DMA_FROM_DEVICE); + /* We just set the data size to 0 for a failed mapping which + * should prevent DMA from happening... + */ + if (dma_mapping_error(ndev->dev.parent, dma_addr)) + rx_desc->ds_cc = cpu_to_le16(0); + rx_desc->dptr = cpu_to_le32(dma_addr); + rx_desc->die_dt = DT_FEMPTY; + } + rx_desc = &priv->gbeth_rx_ring[i]; + rx_desc->dptr = cpu_to_le32((u32)priv->rx_desc_dma[q]); + rx_desc->die_dt = DT_LINKFIX; /* type */ } -static void ravb_rx_ring_format(struct net_device *ndev, int q) +static void ravb_rx_ring_format_rcar(struct net_device *ndev, int q) { struct ravb_private *priv = netdev_priv(ndev); struct ravb_ex_rx_desc *rx_desc; @@ -2210,7 +2236,7 @@ static int ravb_mdio_release(struct ravb_private *priv) static const struct ravb_hw_info ravb_gen3_hw_info = { .rx_ring_free = ravb_rx_ring_free_rcar, - .rx_ring_format = ravb_rx_ring_format, + .rx_ring_format = ravb_rx_ring_format_rcar, .alloc_rx_desc = ravb_alloc_rx_desc_rcar, .receive = ravb_rcar_rx, .set_rate = ravb_set_rate_rcar, @@ -2235,7 +2261,7 @@ static const struct ravb_hw_info ravb_gen3_hw_info = { static const struct ravb_hw_info ravb_gen2_hw_info = { .rx_ring_free = ravb_rx_ring_free_rcar, - .rx_ring_format = ravb_rx_ring_format, + .rx_ring_format = ravb_rx_ring_format_rcar, .alloc_rx_desc = ravb_alloc_rx_desc_rcar, .receive = ravb_rcar_rx, .set_rate = ravb_set_rate_rcar, From 1c59eb678cbd8d322d06d3a5514d36e8e1a4e84c Mon Sep 17 00:00:00 2001 From: Biju Das Date: Tue, 12 Oct 2021 17:36:05 +0100 Subject: [PATCH 133/440] ravb: Fillup ravb_rx_gbeth() stub Fillup ravb_rx_gbeth() function to support RZ/G2L. This patch also renames ravb_rcar_rx to ravb_rx_rcar to be consistent with the naming convention used in sh_eth driver. Signed-off-by: Biju Das Reviewed-by: Lad Prabhakar Reviewed-by: Sergey Shtylyov Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/renesas/ravb.h | 1 + drivers/net/ethernet/renesas/ravb_main.c | 146 ++++++++++++++++++++++- 2 files changed, 142 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/renesas/ravb.h b/drivers/net/ethernet/renesas/ravb.h index e9de3f8306ce..99164318db26 100644 --- a/drivers/net/ethernet/renesas/ravb.h +++ b/drivers/net/ethernet/renesas/ravb.h @@ -1043,6 +1043,7 @@ struct ravb_private { struct ravb_ex_rx_desc *rx_ring[NUM_RX_QUEUE]; struct ravb_tx_desc *tx_ring[NUM_TX_QUEUE]; void *tx_align[NUM_TX_QUEUE]; + struct sk_buff *rx_1st_skb; struct sk_buff **rx_skb[NUM_RX_QUEUE]; struct sk_buff **tx_skb[NUM_TX_QUEUE]; u32 rx_over_errors; diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index f7ce9b6e596f..1a5ebc58682d 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -734,15 +734,151 @@ static void ravb_rx_csum(struct sk_buff *skb) skb_trim(skb, skb->len - sizeof(__sum16)); } +static struct sk_buff *ravb_get_skb_gbeth(struct net_device *ndev, int entry, + struct ravb_rx_desc *desc) +{ + struct ravb_private *priv = netdev_priv(ndev); + struct sk_buff *skb; + + skb = priv->rx_skb[RAVB_BE][entry]; + priv->rx_skb[RAVB_BE][entry] = NULL; + dma_unmap_single(ndev->dev.parent, le32_to_cpu(desc->dptr), + ALIGN(GBETH_RX_BUFF_MAX, 16), DMA_FROM_DEVICE); + + return skb; +} + /* Packet receive function for Gigabit Ethernet */ static bool ravb_rx_gbeth(struct net_device *ndev, int *quota, int q) { - /* Place holder */ - return true; + struct ravb_private *priv = netdev_priv(ndev); + const struct ravb_hw_info *info = priv->info; + struct net_device_stats *stats; + struct ravb_rx_desc *desc; + struct sk_buff *skb; + dma_addr_t dma_addr; + u8 desc_status; + int boguscnt; + u16 pkt_len; + u8 die_dt; + int entry; + int limit; + + entry = priv->cur_rx[q] % priv->num_rx_ring[q]; + boguscnt = priv->dirty_rx[q] + priv->num_rx_ring[q] - priv->cur_rx[q]; + stats = &priv->stats[q]; + + boguscnt = min(boguscnt, *quota); + limit = boguscnt; + desc = &priv->gbeth_rx_ring[entry]; + while (desc->die_dt != DT_FEMPTY) { + /* Descriptor type must be checked before all other reads */ + dma_rmb(); + desc_status = desc->msc; + pkt_len = le16_to_cpu(desc->ds_cc) & RX_DS; + + if (--boguscnt < 0) + break; + + /* We use 0-byte descriptors to mark the DMA mapping errors */ + if (!pkt_len) + continue; + + if (desc_status & MSC_MC) + stats->multicast++; + + if (desc_status & (MSC_CRC | MSC_RFE | MSC_RTSF | MSC_RTLF | MSC_CEEF)) { + stats->rx_errors++; + if (desc_status & MSC_CRC) + stats->rx_crc_errors++; + if (desc_status & MSC_RFE) + stats->rx_frame_errors++; + if (desc_status & (MSC_RTLF | MSC_RTSF)) + stats->rx_length_errors++; + if (desc_status & MSC_CEEF) + stats->rx_missed_errors++; + } else { + die_dt = desc->die_dt & 0xF0; + switch (die_dt) { + case DT_FSINGLE: + skb = ravb_get_skb_gbeth(ndev, entry, desc); + skb_put(skb, pkt_len); + skb->protocol = eth_type_trans(skb, ndev); + napi_gro_receive(&priv->napi[q], skb); + stats->rx_packets++; + stats->rx_bytes += pkt_len; + break; + case DT_FSTART: + priv->rx_1st_skb = ravb_get_skb_gbeth(ndev, entry, desc); + skb_put(priv->rx_1st_skb, pkt_len); + break; + case DT_FMID: + skb = ravb_get_skb_gbeth(ndev, entry, desc); + skb_copy_to_linear_data_offset(priv->rx_1st_skb, + priv->rx_1st_skb->len, + skb->data, + pkt_len); + skb_put(priv->rx_1st_skb, pkt_len); + dev_kfree_skb(skb); + break; + case DT_FEND: + skb = ravb_get_skb_gbeth(ndev, entry, desc); + skb_copy_to_linear_data_offset(priv->rx_1st_skb, + priv->rx_1st_skb->len, + skb->data, + pkt_len); + skb_put(priv->rx_1st_skb, pkt_len); + dev_kfree_skb(skb); + priv->rx_1st_skb->protocol = + eth_type_trans(priv->rx_1st_skb, ndev); + napi_gro_receive(&priv->napi[q], + priv->rx_1st_skb); + stats->rx_packets++; + stats->rx_bytes += priv->rx_1st_skb->len; + break; + } + } + + entry = (++priv->cur_rx[q]) % priv->num_rx_ring[q]; + desc = &priv->gbeth_rx_ring[entry]; + } + + /* Refill the RX ring buffers. */ + for (; priv->cur_rx[q] - priv->dirty_rx[q] > 0; priv->dirty_rx[q]++) { + entry = priv->dirty_rx[q] % priv->num_rx_ring[q]; + desc = &priv->gbeth_rx_ring[entry]; + desc->ds_cc = cpu_to_le16(GBETH_RX_DESC_DATA_SIZE); + + if (!priv->rx_skb[q][entry]) { + skb = netdev_alloc_skb(ndev, info->max_rx_len); + if (!skb) + break; + ravb_set_buffer_align(skb); + dma_addr = dma_map_single(ndev->dev.parent, + skb->data, + GBETH_RX_BUFF_MAX, + DMA_FROM_DEVICE); + skb_checksum_none_assert(skb); + /* We just set the data size to 0 for a failed mapping + * which should prevent DMA from happening... + */ + if (dma_mapping_error(ndev->dev.parent, dma_addr)) + desc->ds_cc = cpu_to_le16(0); + desc->dptr = cpu_to_le32(dma_addr); + priv->rx_skb[q][entry] = skb; + } + /* Descriptor type must be set after all the above writes */ + dma_wmb(); + desc->die_dt = DT_FEMPTY; + } + + *quota -= limit - (++boguscnt); + + return boguscnt <= 0; } /* Packet receive function for Ethernet AVB */ -static bool ravb_rcar_rx(struct net_device *ndev, int *quota, int q) +static bool ravb_rx_rcar(struct net_device *ndev, int *quota, int q) { struct ravb_private *priv = netdev_priv(ndev); const struct ravb_hw_info *info = priv->info; @@ -2238,7 +2374,7 @@ static const struct ravb_hw_info ravb_gen3_hw_info = { .rx_ring_free = ravb_rx_ring_free_rcar, .rx_ring_format = ravb_rx_ring_format_rcar, .alloc_rx_desc = ravb_alloc_rx_desc_rcar, - .receive = ravb_rcar_rx, + .receive = ravb_rx_rcar, .set_rate = ravb_set_rate_rcar, .set_feature = ravb_set_features_rcar, .dmac_init = ravb_dmac_init_rcar, @@ -2263,7 +2399,7 @@ static const struct ravb_hw_info ravb_gen2_hw_info = { .rx_ring_free = ravb_rx_ring_free_rcar, .rx_ring_format = ravb_rx_ring_format_rcar, .alloc_rx_desc = ravb_alloc_rx_desc_rcar, - .receive = ravb_rcar_rx, + .receive = ravb_rx_rcar, .set_rate = ravb_set_rate_rcar, .set_feature = ravb_set_features_rcar, .dmac_init = ravb_dmac_init_rcar, From b6a4ee6e74ded40bc261fee28f28a5df7021af3b Mon Sep 17 00:00:00 2001 From: Biju Das Date: Tue, 12 Oct 2021 17:36:06 +0100 Subject: [PATCH 134/440] ravb: Add carrier_counters to struct ravb_hw_info RZ/G2L E-MAC supports carrier counters. Add a carrier_counter hw feature bit to struct ravb_hw_info to add this feature only for RZ/G2L. Signed-off-by: Biju Das Reviewed-by: Sergey Shtylyov Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/renesas/ravb.h | 3 +++ drivers/net/ethernet/renesas/ravb_main.c | 8 ++++++++ 2 files changed, 11 insertions(+) diff --git a/drivers/net/ethernet/renesas/ravb.h b/drivers/net/ethernet/renesas/ravb.h index 99164318db26..527e865dee81 100644 --- a/drivers/net/ethernet/renesas/ravb.h +++ b/drivers/net/ethernet/renesas/ravb.h @@ -196,6 +196,8 @@ enum ravb_reg { MAHR = 0x05c0, MALR = 0x05c8, TROCR = 0x0700, /* R-Car Gen3 and RZ/G2L only */ + CXR41 = 0x0708, /* RZ/G2L only */ + CXR42 = 0x0710, /* RZ/G2L only */ CEFCR = 0x0740, FRECR = 0x0748, TSFRCR = 0x0750, @@ -1017,6 +1019,7 @@ struct ravb_hw_info { /* hardware features */ unsigned internal_delay:1; /* AVB-DMAC has internal delays */ unsigned tx_counters:1; /* E-MAC has TX counters */ + unsigned carrier_counters:1; /* E-MAC has carrier counters */ unsigned multi_irqs:1; /* AVB-DMAC and E-MAC has multiple irqs */ unsigned gptp:1; /* AVB-DMAC has gPTP support */ unsigned ccc_gac:1; /* AVB-DMAC has gPTP support active in config mode */ diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 1a5ebc58682d..2221a2565397 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -2053,6 +2053,13 @@ static struct net_device_stats *ravb_get_stats(struct net_device *ndev) ravb_write(ndev, 0, TROCR); /* (write clear) */ } + if (info->carrier_counters) { + nstats->collisions += ravb_read(ndev, CXR41); + ravb_write(ndev, 0, CXR41); /* (write clear) */ + nstats->tx_carrier_errors += ravb_read(ndev, CXR42); + ravb_write(ndev, 0, CXR42); /* (write clear) */ + } + nstats->rx_packets = stats0->rx_packets; nstats->tx_packets = stats0->tx_packets; nstats->rx_bytes = stats0->rx_bytes; @@ -2432,6 +2439,7 @@ static const struct ravb_hw_info gbeth_hw_info = { .rx_max_buf_size = SZ_8K, .aligned_tx = 1, .tx_counters = 1, + .carrier_counters = 1, .half_duplex = 1, }; From 0ee65bc14ff2f755c83b9261c510a70bf6ba8d60 Mon Sep 17 00:00:00 2001 From: Biju Das Date: Tue, 12 Oct 2021 17:36:07 +0100 Subject: [PATCH 135/440] ravb: Add support to retrieve stats for GbEthernet Add support for retrieving stats information for GbEthernet. Signed-off-by: Biju Das Reviewed-by: Lad Prabhakar Reviewed-by: Sergey Shtylyov Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/renesas/ravb_main.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 2221a2565397..c984875e8ae0 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -1493,6 +1493,24 @@ static void ravb_set_msglevel(struct net_device *ndev, u32 value) priv->msg_enable = value; } +static const char ravb_gstrings_stats_gbeth[][ETH_GSTRING_LEN] = { + "rx_queue_0_current", + "tx_queue_0_current", + "rx_queue_0_dirty", + "tx_queue_0_dirty", + "rx_queue_0_packets", + "tx_queue_0_packets", + "rx_queue_0_bytes", + "tx_queue_0_bytes", + "rx_queue_0_mcast_packets", + "rx_queue_0_errors", + "rx_queue_0_crc_errors", + "rx_queue_0_frame_errors", + "rx_queue_0_length_errors", + "rx_queue_0_csum_offload_errors", + "rx_queue_0_over_errors", +}; + static const char ravb_gstrings_stats[][ETH_GSTRING_LEN] = { "rx_queue_0_current", "tx_queue_0_current", @@ -2434,6 +2452,9 @@ static const struct ravb_hw_info gbeth_hw_info = { .set_feature = ravb_set_features_gbeth, .dmac_init = ravb_dmac_init_gbeth, .emac_init = ravb_emac_init_gbeth, + .gstrings_stats = ravb_gstrings_stats_gbeth, + .gstrings_size = sizeof(ravb_gstrings_stats_gbeth), + .stats_len = ARRAY_SIZE(ravb_gstrings_stats_gbeth), .max_rx_len = ALIGN(GBETH_RX_BUFF_MAX, RAVB_ALIGN), .tsrq = TCCR_TSRQ0, .rx_max_buf_size = SZ_8K, From 4ea3167bad275afda5a0b5a794831b312112ec29 Mon Sep 17 00:00:00 2001 From: Biju Das Date: Tue, 12 Oct 2021 17:36:08 +0100 Subject: [PATCH 136/440] ravb: Rename "tsrq" variable Rename the variable "tsrq" with "tccr_mask" as we are passing TCCR mask to the ravb_wait() function. There is no functional change. Signed-off-by: Biju Das Suggested-by: Sergey Shtylyov Reviewed-by: Sergey Shtylyov Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/renesas/ravb.h | 2 +- drivers/net/ethernet/renesas/ravb_main.c | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/renesas/ravb.h b/drivers/net/ethernet/renesas/ravb.h index 527e865dee81..99d666a5fb49 100644 --- a/drivers/net/ethernet/renesas/ravb.h +++ b/drivers/net/ethernet/renesas/ravb.h @@ -1012,7 +1012,7 @@ struct ravb_hw_info { netdev_features_t net_features; int stats_len; size_t max_rx_len; - u32 tsrq; + u32 tccr_mask; u32 rx_max_buf_size; unsigned aligned_tx: 1; diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index c984875e8ae0..d9b5297e38c8 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -1021,7 +1021,7 @@ static int ravb_stop_dma(struct net_device *ndev) int error; /* Wait for stopping the hardware TX process */ - error = ravb_wait(ndev, TCCR, info->tsrq, 0); + error = ravb_wait(ndev, TCCR, info->tccr_mask, 0); if (error) return error; @@ -2410,7 +2410,7 @@ static const struct ravb_hw_info ravb_gen3_hw_info = { .net_features = NETIF_F_RXCSUM, .stats_len = ARRAY_SIZE(ravb_gstrings_stats), .max_rx_len = RX_BUF_SZ + RAVB_ALIGN - 1, - .tsrq = TCCR_TSRQ0 | TCCR_TSRQ1 | TCCR_TSRQ2 | TCCR_TSRQ3, + .tccr_mask = TCCR_TSRQ0 | TCCR_TSRQ1 | TCCR_TSRQ2 | TCCR_TSRQ3, .rx_max_buf_size = SZ_2K, .internal_delay = 1, .tx_counters = 1, @@ -2435,7 +2435,7 @@ static const struct ravb_hw_info ravb_gen2_hw_info = { .net_features = NETIF_F_RXCSUM, .stats_len = ARRAY_SIZE(ravb_gstrings_stats), .max_rx_len = RX_BUF_SZ + RAVB_ALIGN - 1, - .tsrq = TCCR_TSRQ0 | TCCR_TSRQ1 | TCCR_TSRQ2 | TCCR_TSRQ3, + .tccr_mask = TCCR_TSRQ0 | TCCR_TSRQ1 | TCCR_TSRQ2 | TCCR_TSRQ3, .rx_max_buf_size = SZ_2K, .aligned_tx = 1, .gptp = 1, @@ -2456,7 +2456,7 @@ static const struct ravb_hw_info gbeth_hw_info = { .gstrings_size = sizeof(ravb_gstrings_stats_gbeth), .stats_len = ARRAY_SIZE(ravb_gstrings_stats_gbeth), .max_rx_len = ALIGN(GBETH_RX_BUFF_MAX, RAVB_ALIGN), - .tsrq = TCCR_TSRQ0, + .tccr_mask = TCCR_TSRQ0, .rx_max_buf_size = SZ_8K, .aligned_tx = 1, .tx_counters = 1, From 030634f37db996f5b8cc82f43ed6e3b6b009d56c Mon Sep 17 00:00:00 2001 From: Biju Das Date: Tue, 12 Oct 2021 17:36:09 +0100 Subject: [PATCH 137/440] ravb: Optimize ravb_emac_init_gbeth function Optimize CXR31 register initialization on ravb_emac_init_gbeth function. Signed-off-by: Biju Das Suggested-by: Sergey Shtylyov Reviewed-by: Sergey Shtylyov Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/renesas/ravb_main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index d9b5297e38c8..95ce92eff7eb 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -538,8 +538,7 @@ static void ravb_emac_init_gbeth(struct net_device *ndev) /* E-MAC interrupt enable register */ ravb_write(ndev, ECSIPR_ICDIP, ECSIPR); - ravb_modify(ndev, CXR31, CXR31_SEL_LINK1, 0); - ravb_modify(ndev, CXR31, CXR31_SEL_LINK0, CXR31_SEL_LINK0); + ravb_modify(ndev, CXR31, CXR31_SEL_LINK0 | CXR31_SEL_LINK1, CXR31_SEL_LINK0); } static void ravb_emac_init_rcar(struct net_device *ndev) From 1091da579d7ccdba887bdfd11e31a23be694bbd0 Mon Sep 17 00:00:00 2001 From: Biju Das Date: Tue, 12 Oct 2021 17:36:10 +0100 Subject: [PATCH 138/440] ravb: Rename "nc_queue" feature bit Rename the feature bit "nc_queue" with "nc_queues" as AVB DMAC has RX and TX NC queues. There is no functional change. Signed-off-by: Biju Das Suggested-by: Sergey Shtylyov Reviewed-by: Sergey Shtylyov Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/renesas/ravb.h | 2 +- drivers/net/ethernet/renesas/ravb_main.c | 36 ++++++++++++------------ 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/renesas/ravb.h b/drivers/net/ethernet/renesas/ravb.h index 99d666a5fb49..5d07732ef35a 100644 --- a/drivers/net/ethernet/renesas/ravb.h +++ b/drivers/net/ethernet/renesas/ravb.h @@ -1023,7 +1023,7 @@ struct ravb_hw_info { unsigned multi_irqs:1; /* AVB-DMAC and E-MAC has multiple irqs */ unsigned gptp:1; /* AVB-DMAC has gPTP support */ unsigned ccc_gac:1; /* AVB-DMAC has gPTP support active in config mode */ - unsigned nc_queue:1; /* AVB-DMAC has NC queue */ + unsigned nc_queues:1; /* AVB-DMAC has RX and TX NC queues */ unsigned magic_pkt:1; /* E-MAC supports magic packet detection */ unsigned half_duplex:1; /* E-MAC supports half duplex mode */ }; diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 95ce92eff7eb..21fb83f209d5 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -1176,7 +1176,7 @@ static irqreturn_t ravb_interrupt(int irq, void *dev_id) result = IRQ_HANDLED; /* Network control and best effort queue RX/TX */ - if (info->nc_queue) { + if (info->nc_queues) { for (q = RAVB_NC; q >= RAVB_BE; q--) { if (ravb_queue_interrupt(ndev, q)) result = IRQ_HANDLED; @@ -1315,7 +1315,7 @@ static int ravb_poll(struct napi_struct *napi, int budget) /* Receive error message handling */ priv->rx_over_errors = priv->stats[RAVB_BE].rx_over_errors; - if (info->nc_queue) + if (info->nc_queues) priv->rx_over_errors += priv->stats[RAVB_NC].rx_over_errors; if (priv->rx_over_errors != ndev->stats.rx_over_errors) ndev->stats.rx_over_errors = priv->rx_over_errors; @@ -1566,7 +1566,7 @@ static void ravb_get_ethtool_stats(struct net_device *ndev, int i = 0; int q; - num_rx_q = info->nc_queue ? NUM_RX_QUEUE : 1; + num_rx_q = info->nc_queues ? NUM_RX_QUEUE : 1; /* Device-specific stats */ for (q = RAVB_BE; q < num_rx_q; q++) { struct net_device_stats *stats = &priv->stats[q]; @@ -1643,7 +1643,7 @@ static int ravb_set_ringparam(struct net_device *ndev, /* Free all the skb's in the RX queue and the DMA buffers. */ ravb_ring_free(ndev, RAVB_BE); - if (info->nc_queue) + if (info->nc_queues) ravb_ring_free(ndev, RAVB_NC); } @@ -1763,7 +1763,7 @@ static int ravb_open(struct net_device *ndev) int error; napi_enable(&priv->napi[RAVB_BE]); - if (info->nc_queue) + if (info->nc_queues) napi_enable(&priv->napi[RAVB_NC]); if (!info->multi_irqs) { @@ -1838,7 +1838,7 @@ out_free_irq_emac: out_free_irq: free_irq(ndev->irq, ndev); out_napi_off: - if (info->nc_queue) + if (info->nc_queues) napi_disable(&priv->napi[RAVB_NC]); napi_disable(&priv->napi[RAVB_BE]); return error; @@ -1888,7 +1888,7 @@ static void ravb_tx_timeout_work(struct work_struct *work) } ravb_ring_free(ndev, RAVB_BE); - if (info->nc_queue) + if (info->nc_queues) ravb_ring_free(ndev, RAVB_NC); /* Device init */ @@ -2088,7 +2088,7 @@ static struct net_device_stats *ravb_get_stats(struct net_device *ndev) nstats->rx_length_errors = stats0->rx_length_errors; nstats->rx_missed_errors = stats0->rx_missed_errors; nstats->rx_over_errors = stats0->rx_over_errors; - if (info->nc_queue) { + if (info->nc_queues) { stats1 = &priv->stats[RAVB_NC]; nstats->rx_packets += stats1->rx_packets; @@ -2169,13 +2169,13 @@ static int ravb_close(struct net_device *ndev) } free_irq(ndev->irq, ndev); - if (info->nc_queue) + if (info->nc_queues) napi_disable(&priv->napi[RAVB_NC]); napi_disable(&priv->napi[RAVB_BE]); /* Free all the skb's in the RX queue and the DMA buffers. */ ravb_ring_free(ndev, RAVB_BE); - if (info->nc_queue) + if (info->nc_queues) ravb_ring_free(ndev, RAVB_NC); return 0; @@ -2415,7 +2415,7 @@ static const struct ravb_hw_info ravb_gen3_hw_info = { .tx_counters = 1, .multi_irqs = 1, .ccc_gac = 1, - .nc_queue = 1, + .nc_queues = 1, .magic_pkt = 1, }; @@ -2438,7 +2438,7 @@ static const struct ravb_hw_info ravb_gen2_hw_info = { .rx_max_buf_size = SZ_2K, .aligned_tx = 1, .gptp = 1, - .nc_queue = 1, + .nc_queues = 1, .magic_pkt = 1, }; @@ -2618,7 +2618,7 @@ static int ravb_probe(struct platform_device *pdev) priv->pdev = pdev; priv->num_tx_ring[RAVB_BE] = BE_TX_RING_SIZE; priv->num_rx_ring[RAVB_BE] = BE_RX_RING_SIZE; - if (info->nc_queue) { + if (info->nc_queues) { priv->num_tx_ring[RAVB_NC] = NC_TX_RING_SIZE; priv->num_rx_ring[RAVB_NC] = NC_RX_RING_SIZE; } @@ -2754,7 +2754,7 @@ static int ravb_probe(struct platform_device *pdev) } netif_napi_add(ndev, &priv->napi[RAVB_BE], ravb_poll, 64); - if (info->nc_queue) + if (info->nc_queues) netif_napi_add(ndev, &priv->napi[RAVB_NC], ravb_poll, 64); /* Network device register */ @@ -2773,7 +2773,7 @@ static int ravb_probe(struct platform_device *pdev) return 0; out_napi_del: - if (info->nc_queue) + if (info->nc_queues) netif_napi_del(&priv->napi[RAVB_NC]); netif_napi_del(&priv->napi[RAVB_BE]); @@ -2814,7 +2814,7 @@ static int ravb_remove(struct platform_device *pdev) ravb_write(ndev, CCC_OPC_RESET, CCC); pm_runtime_put_sync(&pdev->dev); unregister_netdev(ndev); - if (info->nc_queue) + if (info->nc_queues) netif_napi_del(&priv->napi[RAVB_NC]); netif_napi_del(&priv->napi[RAVB_BE]); ravb_mdio_release(priv); @@ -2838,7 +2838,7 @@ static int ravb_wol_setup(struct net_device *ndev) /* Only allow ECI interrupts */ synchronize_irq(priv->emac_irq); - if (info->nc_queue) + if (info->nc_queues) napi_disable(&priv->napi[RAVB_NC]); napi_disable(&priv->napi[RAVB_BE]); ravb_write(ndev, ECSIPR_MPDIP, ECSIPR); @@ -2855,7 +2855,7 @@ static int ravb_wol_restore(struct net_device *ndev) const struct ravb_hw_info *info = priv->info; int ret; - if (info->nc_queue) + if (info->nc_queues) napi_enable(&priv->napi[RAVB_NC]); napi_enable(&priv->napi[RAVB_BE]); From 95e99b10482ddc4eecfbef8247d81646676d5c46 Mon Sep 17 00:00:00 2001 From: Biju Das Date: Tue, 12 Oct 2021 17:36:11 +0100 Subject: [PATCH 139/440] ravb: Document PFRI register bit Document PFRI register bit, as it is documented on R-Car Gen3 and RZ/G2L hardware manuals. Signed-off-by: Biju Das Suggested-by: Sergey Shtylyov Reviewed-by: Sergey Shtylyov Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/renesas/ravb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/renesas/ravb.h b/drivers/net/ethernet/renesas/ravb.h index 5d07732ef35a..69a771526776 100644 --- a/drivers/net/ethernet/renesas/ravb.h +++ b/drivers/net/ethernet/renesas/ravb.h @@ -828,7 +828,7 @@ enum ECSR_BIT { ECSR_MPD = 0x00000002, ECSR_LCHNG = 0x00000004, ECSR_PHYI = 0x00000008, - ECSR_PFRI = 0x00000010, + ECSR_PFRI = 0x00000010, /* Documented for R-Car Gen3 and RZ/G2L */ }; /* ECSIPR */ From 3d6b24a2ada3d53f7da9362d03856bdec74806e5 Mon Sep 17 00:00:00 2001 From: Biju Das Date: Tue, 12 Oct 2021 17:36:12 +0100 Subject: [PATCH 140/440] ravb: Update ravb_emac_init_gbeth() This patch enables Receive/Transmit port of TOE and removes the setting of promiscuous bit from EMAC configuration mode register. This patch also update EMAC configuration mode comment from "PAUSE prohibition" to "EMAC Mode: PAUSE prohibition; Duplex; TX; RX; CRC Pass Through". Signed-off-by: Biju Das Reviewed-by: Sergey Shtylyov Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/renesas/ravb.h | 6 ++++++ drivers/net/ethernet/renesas/ravb_main.c | 5 +++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/renesas/ravb.h b/drivers/net/ethernet/renesas/ravb.h index 69a771526776..08062d73df10 100644 --- a/drivers/net/ethernet/renesas/ravb.h +++ b/drivers/net/ethernet/renesas/ravb.h @@ -204,6 +204,7 @@ enum ravb_reg { TLFRCR = 0x0758, RFCR = 0x0760, MAFCR = 0x0778, + CSR0 = 0x0800, /* RZ/G2L only */ }; @@ -964,6 +965,11 @@ enum CXR31_BIT { CXR31_SEL_LINK1 = 0x00000008, }; +enum CSR0_BIT { + CSR0_TPE = 0x00000010, + CSR0_RPE = 0x00000020, +}; + #define DBAT_ENTRY_NUM 22 #define RX_QUEUE_OFFSET 4 #define NUM_RX_QUEUE 2 diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 21fb83f209d5..f3f676e433d1 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -519,10 +519,10 @@ static void ravb_emac_init_gbeth(struct net_device *ndev) /* Receive frame limit set register */ ravb_write(ndev, GBETH_RX_BUFF_MAX + ETH_FCS_LEN, RFLR); - /* PAUSE prohibition */ + /* EMAC Mode: PAUSE prohibition; Duplex; TX; RX; CRC Pass Through */ ravb_write(ndev, ECMR_ZPF | ((priv->duplex > 0) ? ECMR_DM : 0) | ECMR_TE | ECMR_RE | ECMR_RCPT | - ECMR_TXF | ECMR_RXF | ECMR_PRM, ECMR); + ECMR_TXF | ECMR_RXF, ECMR); ravb_set_rate_gbeth(ndev); @@ -534,6 +534,7 @@ static void ravb_emac_init_gbeth(struct net_device *ndev) /* E-MAC status register clear */ ravb_write(ndev, ECSR_ICD | ECSR_LCHNG | ECSR_PFRI, ECSR); + ravb_write(ndev, CSR0_TPE | CSR0_RPE, CSR0); /* E-MAC interrupt enable register */ ravb_write(ndev, ECSIPR_ICDIP, ECSIPR); From 9404092646473180ef16198f0502da050fc0fa61 Mon Sep 17 00:00:00 2001 From: Biju Das Date: Tue, 12 Oct 2021 17:36:13 +0100 Subject: [PATCH 141/440] ravb: Fix typo AVB->DMAC Fix the typo AVB->DMAC in comment, as the code following the comment is for DMAC on Gigabit Ethernet IP. Signed-off-by: Biju Das Suggested-by: Sergey Shtylyov Reviewed-by: Sergey Shtylyov Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/renesas/ravb_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index f3f676e433d1..e5243cc87a19 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -588,7 +588,7 @@ static int ravb_dmac_init_gbeth(struct net_device *ndev) /* Descriptor format */ ravb_ring_format(ndev, RAVB_BE); - /* Set AVB RX */ + /* Set DMAC RX */ ravb_write(ndev, 0x60000000, RCR); /* Set Max Frame Length (RTC) */ From c045ad2cc01e473b98771379eb58fe13560b7f70 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 12 Oct 2021 08:58:35 -0700 Subject: [PATCH 142/440] ax25: constify dev_addr passing In preparation for netdev->dev_addr being constant make all relevant arguments in AX25 constant. Modify callers as well (netrom, rose). Signed-off-by: Jakub Kicinski --- include/net/ax25.h | 13 +++++++------ net/ax25/af_ax25.c | 2 +- net/ax25/ax25_dev.c | 2 +- net/ax25/ax25_iface.c | 6 +++--- net/ax25/ax25_in.c | 4 ++-- net/ax25/ax25_out.c | 2 +- net/netrom/af_netrom.c | 4 ++-- net/netrom/nr_dev.c | 6 +++--- net/netrom/nr_route.c | 4 ++-- net/rose/rose_link.c | 8 ++++---- 10 files changed, 26 insertions(+), 25 deletions(-) diff --git a/include/net/ax25.h b/include/net/ax25.h index 8b7eb46ad72d..03d409de61ad 100644 --- a/include/net/ax25.h +++ b/include/net/ax25.h @@ -304,7 +304,7 @@ extern spinlock_t ax25_list_lock; void ax25_cb_add(ax25_cb *); struct sock *ax25_find_listener(ax25_address *, int, struct net_device *, int); struct sock *ax25_get_socket(ax25_address *, ax25_address *, int); -ax25_cb *ax25_find_cb(ax25_address *, ax25_address *, ax25_digi *, +ax25_cb *ax25_find_cb(const ax25_address *, ax25_address *, ax25_digi *, struct net_device *); void ax25_send_to_raw(ax25_address *, struct sk_buff *, int); void ax25_destroy_socket(ax25_cb *); @@ -384,10 +384,11 @@ struct ax25_linkfail { void ax25_linkfail_register(struct ax25_linkfail *lf); void ax25_linkfail_release(struct ax25_linkfail *lf); -int __must_check ax25_listen_register(ax25_address *, struct net_device *); -void ax25_listen_release(ax25_address *, struct net_device *); +int __must_check ax25_listen_register(const ax25_address *, + struct net_device *); +void ax25_listen_release(const ax25_address *, struct net_device *); int(*ax25_protocol_function(unsigned int))(struct sk_buff *, ax25_cb *); -int ax25_listen_mine(ax25_address *, struct net_device *); +int ax25_listen_mine(const ax25_address *, struct net_device *); void ax25_link_failed(ax25_cb *, int); int ax25_protocol_is_registered(unsigned int); @@ -401,8 +402,8 @@ netdev_tx_t ax25_ip_xmit(struct sk_buff *skb); extern const struct header_ops ax25_header_ops; /* ax25_out.c */ -ax25_cb *ax25_send_frame(struct sk_buff *, int, ax25_address *, ax25_address *, - ax25_digi *, struct net_device *); +ax25_cb *ax25_send_frame(struct sk_buff *, int, const ax25_address *, + ax25_address *, ax25_digi *, struct net_device *); void ax25_output(ax25_cb *, int, struct sk_buff *); void ax25_kick(ax25_cb *); void ax25_transmit_buffer(ax25_cb *, struct sk_buff *, int); diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 2631efc6e359..2f34bbdde0e8 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -202,7 +202,7 @@ struct sock *ax25_get_socket(ax25_address *my_addr, ax25_address *dest_addr, * Find an AX.25 control block given both ends. It will only pick up * floating AX.25 control blocks or non Raw socket bound control blocks. */ -ax25_cb *ax25_find_cb(ax25_address *src_addr, ax25_address *dest_addr, +ax25_cb *ax25_find_cb(const ax25_address *src_addr, ax25_address *dest_addr, ax25_digi *digi, struct net_device *dev) { ax25_cb *s; diff --git a/net/ax25/ax25_dev.c b/net/ax25/ax25_dev.c index 4ac2e0847652..d0a043a51848 100644 --- a/net/ax25/ax25_dev.c +++ b/net/ax25/ax25_dev.c @@ -35,7 +35,7 @@ ax25_dev *ax25_addr_ax25dev(ax25_address *addr) spin_lock_bh(&ax25_dev_lock); for (ax25_dev = ax25_dev_list; ax25_dev != NULL; ax25_dev = ax25_dev->next) - if (ax25cmp(addr, (ax25_address *)ax25_dev->dev->dev_addr) == 0) { + if (ax25cmp(addr, (const ax25_address *)ax25_dev->dev->dev_addr) == 0) { res = ax25_dev; } spin_unlock_bh(&ax25_dev_lock); diff --git a/net/ax25/ax25_iface.c b/net/ax25/ax25_iface.c index b4083f30af0d..979bc4b828a0 100644 --- a/net/ax25/ax25_iface.c +++ b/net/ax25/ax25_iface.c @@ -98,7 +98,7 @@ void ax25_linkfail_release(struct ax25_linkfail *lf) EXPORT_SYMBOL(ax25_linkfail_release); -int ax25_listen_register(ax25_address *callsign, struct net_device *dev) +int ax25_listen_register(const ax25_address *callsign, struct net_device *dev) { struct listen_struct *listen; @@ -121,7 +121,7 @@ int ax25_listen_register(ax25_address *callsign, struct net_device *dev) EXPORT_SYMBOL(ax25_listen_register); -void ax25_listen_release(ax25_address *callsign, struct net_device *dev) +void ax25_listen_release(const ax25_address *callsign, struct net_device *dev) { struct listen_struct *s, *listen; @@ -171,7 +171,7 @@ int (*ax25_protocol_function(unsigned int pid))(struct sk_buff *, ax25_cb *) return res; } -int ax25_listen_mine(ax25_address *callsign, struct net_device *dev) +int ax25_listen_mine(const ax25_address *callsign, struct net_device *dev) { struct listen_struct *listen; diff --git a/net/ax25/ax25_in.c b/net/ax25/ax25_in.c index cd6afe895db9..1cac25aca637 100644 --- a/net/ax25/ax25_in.c +++ b/net/ax25/ax25_in.c @@ -181,7 +181,7 @@ static int ax25_process_rx_frame(ax25_cb *ax25, struct sk_buff *skb, int type, i } static int ax25_rcv(struct sk_buff *skb, struct net_device *dev, - ax25_address *dev_addr, struct packet_type *ptype) + const ax25_address *dev_addr, struct packet_type *ptype) { ax25_address src, dest, *next_digi = NULL; int type = 0, mine = 0, dama; @@ -447,5 +447,5 @@ int ax25_kiss_rcv(struct sk_buff *skb, struct net_device *dev, skb_pull(skb, AX25_KISS_HEADER_LEN); /* Remove the KISS byte */ - return ax25_rcv(skb, dev, (ax25_address *)dev->dev_addr, ptype); + return ax25_rcv(skb, dev, (const ax25_address *)dev->dev_addr, ptype); } diff --git a/net/ax25/ax25_out.c b/net/ax25/ax25_out.c index 22f2f66c6e0a..3db76d2470e9 100644 --- a/net/ax25/ax25_out.c +++ b/net/ax25/ax25_out.c @@ -29,7 +29,7 @@ static DEFINE_SPINLOCK(ax25_frag_lock); -ax25_cb *ax25_send_frame(struct sk_buff *skb, int paclen, ax25_address *src, ax25_address *dest, ax25_digi *digi, struct net_device *dev) +ax25_cb *ax25_send_frame(struct sk_buff *skb, int paclen, const ax25_address *src, ax25_address *dest, ax25_digi *digi, struct net_device *dev) { ax25_dev *ax25_dev; ax25_cb *ax25; diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 6d16e1ab1a8a..775064cdd0ee 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -633,7 +633,7 @@ static int nr_connect(struct socket *sock, struct sockaddr *uaddr, struct sock *sk = sock->sk; struct nr_sock *nr = nr_sk(sk); struct sockaddr_ax25 *addr = (struct sockaddr_ax25 *)uaddr; - ax25_address *source = NULL; + const ax25_address *source = NULL; ax25_uid_assoc *user; struct net_device *dev; int err = 0; @@ -673,7 +673,7 @@ static int nr_connect(struct socket *sock, struct sockaddr *uaddr, err = -ENETUNREACH; goto out_release; } - source = (ax25_address *)dev->dev_addr; + source = (const ax25_address *)dev->dev_addr; user = ax25_findbyuid(current_euid()); if (user) { diff --git a/net/netrom/nr_dev.c b/net/netrom/nr_dev.c index d1ca413d3317..3aaac4a22b38 100644 --- a/net/netrom/nr_dev.c +++ b/net/netrom/nr_dev.c @@ -108,7 +108,7 @@ static int __must_check nr_set_mac_address(struct net_device *dev, void *addr) if (err) return err; - ax25_listen_release((ax25_address *)dev->dev_addr, NULL); + ax25_listen_release((const ax25_address *)dev->dev_addr, NULL); } dev_addr_set(dev, sa->sa_data); @@ -120,7 +120,7 @@ static int nr_open(struct net_device *dev) { int err; - err = ax25_listen_register((ax25_address *)dev->dev_addr, NULL); + err = ax25_listen_register((const ax25_address *)dev->dev_addr, NULL); if (err) return err; @@ -131,7 +131,7 @@ static int nr_open(struct net_device *dev) static int nr_close(struct net_device *dev) { - ax25_listen_release((ax25_address *)dev->dev_addr, NULL); + ax25_listen_release((const ax25_address *)dev->dev_addr, NULL); netif_stop_queue(dev); return 0; } diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index ddd5cbd455e3..baea3cbd76ca 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -598,7 +598,7 @@ struct net_device *nr_dev_get(ax25_address *addr) rcu_read_lock(); for_each_netdev_rcu(&init_net, dev) { if ((dev->flags & IFF_UP) && dev->type == ARPHRD_NETROM && - ax25cmp(addr, (ax25_address *)dev->dev_addr) == 0) { + ax25cmp(addr, (const ax25_address *)dev->dev_addr) == 0) { dev_hold(dev); goto out; } @@ -825,7 +825,7 @@ int nr_route_frame(struct sk_buff *skb, ax25_cb *ax25) ax25s = nr_neigh->ax25; nr_neigh->ax25 = ax25_send_frame(skb, 256, - (ax25_address *)dev->dev_addr, + (const ax25_address *)dev->dev_addr, &nr_neigh->callsign, nr_neigh->digipeat, nr_neigh->dev); if (ax25s) diff --git a/net/rose/rose_link.c b/net/rose/rose_link.c index f6102e6f5161..8b96a56d3a49 100644 --- a/net/rose/rose_link.c +++ b/net/rose/rose_link.c @@ -94,11 +94,11 @@ static void rose_t0timer_expiry(struct timer_list *t) */ static int rose_send_frame(struct sk_buff *skb, struct rose_neigh *neigh) { - ax25_address *rose_call; + const ax25_address *rose_call; ax25_cb *ax25s; if (ax25cmp(&rose_callsign, &null_ax25_address) == 0) - rose_call = (ax25_address *)neigh->dev->dev_addr; + rose_call = (const ax25_address *)neigh->dev->dev_addr; else rose_call = &rose_callsign; @@ -117,11 +117,11 @@ static int rose_send_frame(struct sk_buff *skb, struct rose_neigh *neigh) */ static int rose_link_up(struct rose_neigh *neigh) { - ax25_address *rose_call; + const ax25_address *rose_call; ax25_cb *ax25s; if (ax25cmp(&rose_callsign, &null_ax25_address) == 0) - rose_call = (ax25_address *)neigh->dev->dev_addr; + rose_call = (const ax25_address *)neigh->dev->dev_addr; else rose_call = &rose_callsign; From db95732446a86e310dbbe5f890fae6f0c719833b Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 12 Oct 2021 08:58:36 -0700 Subject: [PATCH 143/440] rose: constify dev_addr passing In preparation for netdev->dev_addr being constant make all relevant arguments in rose constant. Signed-off-by: Jakub Kicinski --- include/net/rose.h | 8 ++++---- net/rose/af_rose.c | 5 +++-- net/rose/rose_dev.c | 6 +++--- net/rose/rose_route.c | 10 ++++++---- 4 files changed, 16 insertions(+), 13 deletions(-) diff --git a/include/net/rose.h b/include/net/rose.h index cf517d306a28..0f0a4ce0fee7 100644 --- a/include/net/rose.h +++ b/include/net/rose.h @@ -162,8 +162,8 @@ extern int sysctl_rose_link_fail_timeout; extern int sysctl_rose_maximum_vcs; extern int sysctl_rose_window_size; -int rosecmp(rose_address *, rose_address *); -int rosecmpm(rose_address *, rose_address *, unsigned short); +int rosecmp(const rose_address *, const rose_address *); +int rosecmpm(const rose_address *, const rose_address *, unsigned short); char *rose2asc(char *buf, const rose_address *); struct sock *rose_find_socket(unsigned int, struct rose_neigh *); void rose_kill_by_neigh(struct rose_neigh *); @@ -205,8 +205,8 @@ extern const struct seq_operations rose_node_seqops; extern struct seq_operations rose_route_seqops; void rose_add_loopback_neigh(void); -int __must_check rose_add_loopback_node(rose_address *); -void rose_del_loopback_node(rose_address *); +int __must_check rose_add_loopback_node(const rose_address *); +void rose_del_loopback_node(const rose_address *); void rose_rt_device_down(struct net_device *); void rose_link_device_down(struct net_device *); struct net_device *rose_dev_first(void); diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index cf7d974e0f61..30a1cf4c16c6 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -109,7 +109,7 @@ char *rose2asc(char *buf, const rose_address *addr) /* * Compare two ROSE addresses, 0 == equal. */ -int rosecmp(rose_address *addr1, rose_address *addr2) +int rosecmp(const rose_address *addr1, const rose_address *addr2) { int i; @@ -123,7 +123,8 @@ int rosecmp(rose_address *addr1, rose_address *addr2) /* * Compare two ROSE addresses for only mask digits, 0 == equal. */ -int rosecmpm(rose_address *addr1, rose_address *addr2, unsigned short mask) +int rosecmpm(const rose_address *addr1, const rose_address *addr2, + unsigned short mask) { unsigned int i, j; diff --git a/net/rose/rose_dev.c b/net/rose/rose_dev.c index 2a35e188b389..f1a76a5820f1 100644 --- a/net/rose/rose_dev.c +++ b/net/rose/rose_dev.c @@ -66,7 +66,7 @@ static int rose_set_mac_address(struct net_device *dev, void *addr) if (err) return err; - rose_del_loopback_node((rose_address *)dev->dev_addr); + rose_del_loopback_node((const rose_address *)dev->dev_addr); } dev_addr_set(dev, sa->sa_data); @@ -78,7 +78,7 @@ static int rose_open(struct net_device *dev) { int err; - err = rose_add_loopback_node((rose_address *)dev->dev_addr); + err = rose_add_loopback_node((const rose_address *)dev->dev_addr); if (err) return err; @@ -90,7 +90,7 @@ static int rose_open(struct net_device *dev) static int rose_close(struct net_device *dev) { netif_stop_queue(dev); - rose_del_loopback_node((rose_address *)dev->dev_addr); + rose_del_loopback_node((const rose_address *)dev->dev_addr); return 0; } diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index c0e04c261a15..e2e6b6b78578 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c @@ -401,7 +401,7 @@ void rose_add_loopback_neigh(void) /* * Add a loopback node. */ -int rose_add_loopback_node(rose_address *address) +int rose_add_loopback_node(const rose_address *address) { struct rose_node *rose_node; int err = 0; @@ -446,7 +446,7 @@ out: /* * Delete a loopback node. */ -void rose_del_loopback_node(rose_address *address) +void rose_del_loopback_node(const rose_address *address) { struct rose_node *rose_node; @@ -629,7 +629,8 @@ struct net_device *rose_dev_get(rose_address *addr) rcu_read_lock(); for_each_netdev_rcu(&init_net, dev) { - if ((dev->flags & IFF_UP) && dev->type == ARPHRD_ROSE && rosecmp(addr, (rose_address *)dev->dev_addr) == 0) { + if ((dev->flags & IFF_UP) && dev->type == ARPHRD_ROSE && + rosecmp(addr, (const rose_address *)dev->dev_addr) == 0) { dev_hold(dev); goto out; } @@ -646,7 +647,8 @@ static int rose_dev_exists(rose_address *addr) rcu_read_lock(); for_each_netdev_rcu(&init_net, dev) { - if ((dev->flags & IFF_UP) && dev->type == ARPHRD_ROSE && rosecmp(addr, (rose_address *)dev->dev_addr) == 0) + if ((dev->flags & IFF_UP) && dev->type == ARPHRD_ROSE && + rosecmp(addr, (const rose_address *)dev->dev_addr) == 0) goto out; } dev = NULL; From 2ef6db76bac0f3006daceb9eeeaf5f09820b1caf Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 12 Oct 2021 08:58:37 -0700 Subject: [PATCH 144/440] llc/snap: constify dev_addr passing In preparation for netdev->dev_addr being constant make all relevant arguments in LLC and SNAP constant. Signed-off-by: Jakub Kicinski --- include/net/datalink.h | 2 +- include/net/llc.h | 2 +- include/net/llc_if.h | 3 ++- net/802/p8022.c | 2 +- net/802/psnap.c | 2 +- net/llc/llc_c_ac.c | 2 +- net/llc/llc_if.c | 2 +- net/llc/llc_output.c | 2 +- net/llc/llc_proc.c | 2 +- 9 files changed, 10 insertions(+), 9 deletions(-) diff --git a/include/net/datalink.h b/include/net/datalink.h index a9663229b913..d9b7faaa539f 100644 --- a/include/net/datalink.h +++ b/include/net/datalink.h @@ -12,7 +12,7 @@ struct datalink_proto { int (*rcvfunc)(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *); int (*request)(struct datalink_proto *, struct sk_buff *, - unsigned char *); + const unsigned char *); struct list_head node; }; diff --git a/include/net/llc.h b/include/net/llc.h index df282d9b4017..fd1f9a3fd8dd 100644 --- a/include/net/llc.h +++ b/include/net/llc.h @@ -133,7 +133,7 @@ static inline void llc_sap_put(struct llc_sap *sap) struct llc_sap *llc_sap_find(unsigned char sap_value); int llc_build_and_send_ui_pkt(struct llc_sap *sap, struct sk_buff *skb, - unsigned char *dmac, unsigned char dsap); + const unsigned char *dmac, unsigned char dsap); void llc_sap_handler(struct llc_sap *sap, struct sk_buff *skb); void llc_conn_handler(struct llc_sap *sap, struct sk_buff *skb); diff --git a/include/net/llc_if.h b/include/net/llc_if.h index 8d5c543cd620..c72570a21a4f 100644 --- a/include/net/llc_if.h +++ b/include/net/llc_if.h @@ -62,7 +62,8 @@ #define LLC_STATUS_CONFLICT 7 /* disconnect conn */ #define LLC_STATUS_RESET_DONE 8 /* */ -int llc_establish_connection(struct sock *sk, u8 *lmac, u8 *dmac, u8 dsap); +int llc_establish_connection(struct sock *sk, const u8 *lmac, u8 *dmac, + u8 dsap); int llc_build_and_send_pkt(struct sock *sk, struct sk_buff *skb); int llc_send_disc(struct sock *sk); #endif /* LLC_IF_H */ diff --git a/net/802/p8022.c b/net/802/p8022.c index a6585627051d..79c23173116c 100644 --- a/net/802/p8022.c +++ b/net/802/p8022.c @@ -23,7 +23,7 @@ #include static int p8022_request(struct datalink_proto *dl, struct sk_buff *skb, - unsigned char *dest) + const unsigned char *dest) { llc_build_and_send_ui_pkt(dl->sap, skb, dest, dl->sap->laddr.lsap); return 0; diff --git a/net/802/psnap.c b/net/802/psnap.c index 4492e8d7ad20..1406bfdbda13 100644 --- a/net/802/psnap.c +++ b/net/802/psnap.c @@ -79,7 +79,7 @@ drop: * Put a SNAP header on a frame and pass to 802.2 */ static int snap_request(struct datalink_proto *dl, - struct sk_buff *skb, u8 *dest) + struct sk_buff *skb, const u8 *dest) { memcpy(skb_push(skb, 5), dl->type, 5); llc_build_and_send_ui_pkt(snap_sap, skb, dest, snap_sap->laddr.lsap); diff --git a/net/llc/llc_c_ac.c b/net/llc/llc_c_ac.c index 647c0554d04c..40ca3c1e42a2 100644 --- a/net/llc/llc_c_ac.c +++ b/net/llc/llc_c_ac.c @@ -781,7 +781,7 @@ int llc_conn_ac_send_sabme_cmd_p_set_x(struct sock *sk, struct sk_buff *skb) if (nskb) { struct llc_sap *sap = llc->sap; - u8 *dmac = llc->daddr.mac; + const u8 *dmac = llc->daddr.mac; if (llc->dev->flags & IFF_LOOPBACK) dmac = llc->dev->dev_addr; diff --git a/net/llc/llc_if.c b/net/llc/llc_if.c index ad6547736c21..dde9bf08a593 100644 --- a/net/llc/llc_if.c +++ b/net/llc/llc_if.c @@ -80,7 +80,7 @@ out_free: * establishment will inform to upper layer via calling it's confirm * function and passing proper information. */ -int llc_establish_connection(struct sock *sk, u8 *lmac, u8 *dmac, u8 dsap) +int llc_establish_connection(struct sock *sk, const u8 *lmac, u8 *dmac, u8 dsap) { int rc = -EISCONN; struct llc_addr laddr, daddr; diff --git a/net/llc/llc_output.c b/net/llc/llc_output.c index b9ad087bcbd7..5a6466fc626a 100644 --- a/net/llc/llc_output.c +++ b/net/llc/llc_output.c @@ -56,7 +56,7 @@ int llc_mac_hdr_init(struct sk_buff *skb, * package primitive as an event and send to SAP event handler */ int llc_build_and_send_ui_pkt(struct llc_sap *sap, struct sk_buff *skb, - unsigned char *dmac, unsigned char dsap) + const unsigned char *dmac, unsigned char dsap) { int rc; llc_pdu_header_init(skb, LLC_PDU_TYPE_U, sap->laddr.lsap, diff --git a/net/llc/llc_proc.c b/net/llc/llc_proc.c index a4eccb98220a..0ff490a73fae 100644 --- a/net/llc/llc_proc.c +++ b/net/llc/llc_proc.c @@ -26,7 +26,7 @@ #include #include -static void llc_ui_format_mac(struct seq_file *seq, u8 *addr) +static void llc_ui_format_mac(struct seq_file *seq, const u8 *addr) { seq_printf(seq, "%pM", addr); } From 1a8a23d2da4fec6f090ec26bbe76eab2b77410e9 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 12 Oct 2021 08:58:38 -0700 Subject: [PATCH 145/440] ipv6: constify dev_addr passing In preparation for netdev->dev_addr being constant make all relevant arguments in ndisc constant. Signed-off-by: Jakub Kicinski --- include/net/ndisc.h | 2 +- net/ipv6/addrconf.c | 4 ++-- net/ipv6/ndisc.c | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/include/net/ndisc.h b/include/net/ndisc.h index 38e4094960ce..04341d86585d 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h @@ -137,7 +137,7 @@ struct ndisc_options *ndisc_parse_options(const struct net_device *dev, u8 *opt, int opt_len, struct ndisc_options *ndopts); -void __ndisc_fill_addr_option(struct sk_buff *skb, int type, void *data, +void __ndisc_fill_addr_option(struct sk_buff *skb, int type, const void *data, int data_len, int pad); #define NDISC_OPS_REDIRECT_DATA_SPACE 2 diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index c6a90b7bbb70..d4fae16deec4 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2237,12 +2237,12 @@ static int addrconf_ifid_6lowpan(u8 *eui, struct net_device *dev) static int addrconf_ifid_ieee1394(u8 *eui, struct net_device *dev) { - union fwnet_hwaddr *ha; + const union fwnet_hwaddr *ha; if (dev->addr_len != FWNET_ALEN) return -1; - ha = (union fwnet_hwaddr *)dev->dev_addr; + ha = (const union fwnet_hwaddr *)dev->dev_addr; memcpy(eui, &ha->uc.uniq_id, sizeof(ha->uc.uniq_id)); eui[0] ^= 2; diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 4b098521a44c..184190b9ea25 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -142,7 +142,7 @@ struct neigh_table nd_tbl = { }; EXPORT_SYMBOL_GPL(nd_tbl); -void __ndisc_fill_addr_option(struct sk_buff *skb, int type, void *data, +void __ndisc_fill_addr_option(struct sk_buff *skb, int type, const void *data, int data_len, int pad) { int space = __ndisc_opt_addr_space(data_len, pad); @@ -165,7 +165,7 @@ void __ndisc_fill_addr_option(struct sk_buff *skb, int type, void *data, EXPORT_SYMBOL_GPL(__ndisc_fill_addr_option); static inline void ndisc_fill_addr_option(struct sk_buff *skb, int type, - void *data, u8 icmp6_type) + const void *data, u8 icmp6_type) { __ndisc_fill_addr_option(skb, type, data, skb->dev->addr_len, ndisc_addr_option_pad(skb->dev->type)); From 6cf862807234fa8d7c91a3a2d327a87471b9adbd Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 12 Oct 2021 08:58:39 -0700 Subject: [PATCH 146/440] tipc: constify dev_addr passing In preparation for netdev->dev_addr being constant make all relevant arguments in tipc constant. Signed-off-by: Jakub Kicinski --- net/tipc/bearer.c | 4 ++-- net/tipc/bearer.h | 2 +- net/tipc/eth_media.c | 2 +- net/tipc/ib_media.c | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 443f8e5b9477..60bc74b76adc 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -462,7 +462,7 @@ int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b, b->bcast_addr.media_id = b->media->type_id; b->bcast_addr.broadcast = TIPC_BROADCAST_SUPPORT; b->mtu = dev->mtu; - b->media->raw2addr(b, &b->addr, (char *)dev->dev_addr); + b->media->raw2addr(b, &b->addr, (const char *)dev->dev_addr); rcu_assign_pointer(dev->tipc_ptr, b); return 0; } @@ -703,7 +703,7 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt, break; case NETDEV_CHANGEADDR: b->media->raw2addr(b, &b->addr, - (char *)dev->dev_addr); + (const char *)dev->dev_addr); tipc_reset_bearer(net, b); break; case NETDEV_UNREGISTER: diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index 57c6a1a719e2..490ad6e5f7a3 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -117,7 +117,7 @@ struct tipc_media { char *msg); int (*raw2addr)(struct tipc_bearer *b, struct tipc_media_addr *addr, - char *raw); + const char *raw); u32 priority; u32 tolerance; u32 min_win; diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c index c68019697cfe..cb0d185e06af 100644 --- a/net/tipc/eth_media.c +++ b/net/tipc/eth_media.c @@ -60,7 +60,7 @@ static int tipc_eth_addr2msg(char *msg, struct tipc_media_addr *addr) /* Convert raw mac address format to media addr format */ static int tipc_eth_raw2addr(struct tipc_bearer *b, struct tipc_media_addr *addr, - char *msg) + const char *msg) { memset(addr, 0, sizeof(*addr)); ether_addr_copy(addr->value, msg); diff --git a/net/tipc/ib_media.c b/net/tipc/ib_media.c index 7aa9ff88458d..b9ad0434c3cd 100644 --- a/net/tipc/ib_media.c +++ b/net/tipc/ib_media.c @@ -67,7 +67,7 @@ static int tipc_ib_addr2msg(char *msg, struct tipc_media_addr *addr) /* Convert raw InfiniBand address format to media addr format */ static int tipc_ib_raw2addr(struct tipc_bearer *b, struct tipc_media_addr *addr, - char *msg) + const char *msg) { memset(addr, 0, sizeof(*addr)); memcpy(addr->value, msg, INFINIBAND_ALEN); From 1bfcd1cc546eaa9ecafb9718bc877f24b8a349c2 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 12 Oct 2021 08:58:40 -0700 Subject: [PATCH 147/440] decnet: constify dev_addr passing In preparation for netdev->dev_addr being constant make all relevant arguments in decnet constant. Signed-off-by: Jakub Kicinski --- include/net/dn.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/dn.h b/include/net/dn.h index 56ab0726c641..ba9655b0098a 100644 --- a/include/net/dn.h +++ b/include/net/dn.h @@ -166,7 +166,7 @@ struct dn_skb_cb { int iif; }; -static inline __le16 dn_eth2dn(unsigned char *ethaddr) +static inline __le16 dn_eth2dn(const unsigned char *ethaddr) { return get_unaligned((__le16 *)(ethaddr + 4)); } From 40af35fdf79c77e19c597e47cc8fe9a8e200de30 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 12 Oct 2021 09:06:32 -0700 Subject: [PATCH 148/440] netdevice: demote the type of some dev_addr_set() helpers __dev_addr_set() and dev_addr_mod() and pretty low level, let the arguments be void, there's no chance for confusion in callers converted to use them. Keep u8 in dev_addr_set() because some of the callers are converted from a loop and we want to make sure assignments are not from an array of a different type. Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 0723c1314ea2..f33af341bfb2 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4643,7 +4643,7 @@ void __hw_addr_init(struct netdev_hw_addr_list *list); /* Functions used for device addresses handling */ static inline void -__dev_addr_set(struct net_device *dev, const u8 *addr, size_t len) +__dev_addr_set(struct net_device *dev, const void *addr, size_t len) { memcpy(dev->dev_addr, addr, len); } @@ -4655,7 +4655,7 @@ static inline void dev_addr_set(struct net_device *dev, const u8 *addr) static inline void dev_addr_mod(struct net_device *dev, unsigned int offset, - const u8 *addr, size_t len) + const void *addr, size_t len) { memcpy(&dev->dev_addr[offset], addr, len); } From 20c3d9e45ba630a7156d682a40988c0e96be1b92 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 12 Oct 2021 09:06:33 -0700 Subject: [PATCH 149/440] hamradio: use dev_addr_set() for setting device address Use dev_addr_set() instead of writing to netdev->dev_addr directly in hamradio drivers. Signed-off-by: Jakub Kicinski --- drivers/net/hamradio/6pack.c | 6 +++--- drivers/net/hamradio/baycom_epp.c | 2 +- drivers/net/hamradio/bpqether.c | 5 ++--- drivers/net/hamradio/dmascc.c | 2 +- drivers/net/hamradio/hdlcdrv.c | 2 +- drivers/net/hamradio/mkiss.c | 6 +++--- drivers/net/hamradio/scc.c | 5 ++--- drivers/net/hamradio/yam.c | 2 +- 8 files changed, 14 insertions(+), 16 deletions(-) diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c index 6192244b304a..f4e8793e995d 100644 --- a/drivers/net/hamradio/6pack.c +++ b/drivers/net/hamradio/6pack.c @@ -288,7 +288,7 @@ static int sp_set_mac_address(struct net_device *dev, void *addr) netif_tx_lock_bh(dev); netif_addr_lock(dev); - memcpy(dev->dev_addr, &sa->sax25_call, AX25_ADDR_LEN); + __dev_addr_set(dev, &sa->sax25_call, AX25_ADDR_LEN); netif_addr_unlock(dev); netif_tx_unlock_bh(dev); @@ -317,7 +317,7 @@ static void sp_setup(struct net_device *dev) /* Only activated in AX.25 mode */ memcpy(dev->broadcast, &ax25_bcast, AX25_ADDR_LEN); - memcpy(dev->dev_addr, &ax25_defaddr, AX25_ADDR_LEN); + dev_addr_set(dev, (u8 *)&ax25_defaddr); dev->flags = 0; } @@ -726,7 +726,7 @@ static int sixpack_ioctl(struct tty_struct *tty, struct file *file, } netif_tx_lock_bh(dev); - memcpy(dev->dev_addr, &addr, AX25_ADDR_LEN); + __dev_addr_set(dev, &addr, AX25_ADDR_LEN); netif_tx_unlock_bh(dev); err = 0; break; diff --git a/drivers/net/hamradio/baycom_epp.c b/drivers/net/hamradio/baycom_epp.c index f6428ff780ab..62da837cc707 100644 --- a/drivers/net/hamradio/baycom_epp.c +++ b/drivers/net/hamradio/baycom_epp.c @@ -1159,7 +1159,7 @@ static void baycom_probe(struct net_device *dev) dev->mtu = AX25_DEF_PACLEN; /* eth_mtu is the default */ dev->addr_len = AX25_ADDR_LEN; /* sizeof an ax.25 address */ memcpy(dev->broadcast, &ax25_bcast, AX25_ADDR_LEN); - memcpy(dev->dev_addr, &null_ax25_address, AX25_ADDR_LEN); + dev_addr_set(dev, (u8 *)&null_ax25_address); dev->tx_queue_len = 16; /* New style flags */ diff --git a/drivers/net/hamradio/bpqether.c b/drivers/net/hamradio/bpqether.c index 1ac816164a9b..30af0081e2be 100644 --- a/drivers/net/hamradio/bpqether.c +++ b/drivers/net/hamradio/bpqether.c @@ -457,9 +457,6 @@ static void bpq_setup(struct net_device *dev) dev->netdev_ops = &bpq_netdev_ops; dev->needs_free_netdev = true; - memcpy(dev->broadcast, &ax25_bcast, AX25_ADDR_LEN); - memcpy(dev->dev_addr, &ax25_defaddr, AX25_ADDR_LEN); - dev->flags = 0; dev->features = NETIF_F_LLTX; /* Allow recursion */ @@ -472,6 +469,8 @@ static void bpq_setup(struct net_device *dev) dev->mtu = AX25_DEF_PACLEN; dev->addr_len = AX25_ADDR_LEN; + memcpy(dev->broadcast, &ax25_bcast, AX25_ADDR_LEN); + dev_addr_set(dev, (u8 *)&ax25_defaddr); } /* diff --git a/drivers/net/hamradio/dmascc.c b/drivers/net/hamradio/dmascc.c index a4ba1b79d2d0..7e527499d3ad 100644 --- a/drivers/net/hamradio/dmascc.c +++ b/drivers/net/hamradio/dmascc.c @@ -426,7 +426,7 @@ static void __init dev_setup(struct net_device *dev) dev->addr_len = AX25_ADDR_LEN; dev->tx_queue_len = 64; memcpy(dev->broadcast, &ax25_bcast, AX25_ADDR_LEN); - memcpy(dev->dev_addr, &ax25_defaddr, AX25_ADDR_LEN); + dev_addr_set(dev, (u8 *)&ax25_defaddr); } static const struct net_device_ops scc_netdev_ops = { diff --git a/drivers/net/hamradio/hdlcdrv.c b/drivers/net/hamradio/hdlcdrv.c index ab5327acec22..b0edb91bb10a 100644 --- a/drivers/net/hamradio/hdlcdrv.c +++ b/drivers/net/hamradio/hdlcdrv.c @@ -675,7 +675,7 @@ static void hdlcdrv_setup(struct net_device *dev) dev->mtu = AX25_DEF_PACLEN; /* eth_mtu is the default */ dev->addr_len = AX25_ADDR_LEN; /* sizeof an ax.25 address */ memcpy(dev->broadcast, &ax25_bcast, AX25_ADDR_LEN); - memcpy(dev->dev_addr, &ax25_defaddr, AX25_ADDR_LEN); + dev_addr_set(dev, (u8 *)&ax25_defaddr); dev->tx_queue_len = 16; } diff --git a/drivers/net/hamradio/mkiss.c b/drivers/net/hamradio/mkiss.c index 8666110bec55..867252a0247b 100644 --- a/drivers/net/hamradio/mkiss.c +++ b/drivers/net/hamradio/mkiss.c @@ -344,7 +344,7 @@ static int ax_set_mac_address(struct net_device *dev, void *addr) netif_tx_lock_bh(dev); netif_addr_lock(dev); - memcpy(dev->dev_addr, &sa->sax25_call, AX25_ADDR_LEN); + __dev_addr_set(dev, &sa->sax25_call, AX25_ADDR_LEN); netif_addr_unlock(dev); netif_tx_unlock_bh(dev); @@ -647,7 +647,7 @@ static void ax_setup(struct net_device *dev) memcpy(dev->broadcast, &ax25_bcast, AX25_ADDR_LEN); - memcpy(dev->dev_addr, &ax25_defaddr, AX25_ADDR_LEN); + dev_addr_set(dev, (u8 *)&ax25_defaddr); dev->flags = IFF_BROADCAST | IFF_MULTICAST; } @@ -850,7 +850,7 @@ static int mkiss_ioctl(struct tty_struct *tty, struct file *file, } netif_tx_lock_bh(dev); - memcpy(dev->dev_addr, addr, AX25_ADDR_LEN); + __dev_addr_set(dev, addr, AX25_ADDR_LEN); netif_tx_unlock_bh(dev); err = 0; diff --git a/drivers/net/hamradio/scc.c b/drivers/net/hamradio/scc.c index 9b745d09d327..3d59dac063ac 100644 --- a/drivers/net/hamradio/scc.c +++ b/drivers/net/hamradio/scc.c @@ -1563,9 +1563,6 @@ static void scc_net_setup(struct net_device *dev) dev->netdev_ops = &scc_netdev_ops; dev->header_ops = &ax25_header_ops; - memcpy(dev->broadcast, &ax25_bcast, AX25_ADDR_LEN); - memcpy(dev->dev_addr, &ax25_defaddr, AX25_ADDR_LEN); - dev->flags = 0; dev->type = ARPHRD_AX25; @@ -1573,6 +1570,8 @@ static void scc_net_setup(struct net_device *dev) dev->mtu = AX25_DEF_PACLEN; dev->addr_len = AX25_ADDR_LEN; + memcpy(dev->broadcast, &ax25_bcast, AX25_ADDR_LEN); + dev_addr_set(dev, (u8 *)&ax25_defaddr); } /* ----> open network device <---- */ diff --git a/drivers/net/hamradio/yam.c b/drivers/net/hamradio/yam.c index e557ccf98bcf..6376b8485976 100644 --- a/drivers/net/hamradio/yam.c +++ b/drivers/net/hamradio/yam.c @@ -1107,7 +1107,7 @@ static void yam_setup(struct net_device *dev) dev->mtu = AX25_MTU; dev->addr_len = AX25_ADDR_LEN; memcpy(dev->broadcast, &ax25_bcast, AX25_ADDR_LEN); - memcpy(dev->dev_addr, &ax25_defaddr, AX25_ADDR_LEN); + dev_addr_set(dev, (u8 *)&ax25_defaddr); } static int __init yam_init_driver(void) From 5a1b7e1a532557e3af48c3ed7e61f3e757ee09ea Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 12 Oct 2021 09:06:34 -0700 Subject: [PATCH 150/440] ip: use dev_addr_set() in tunnels Use dev_addr_set() instead of writing to netdev->dev_addr directly in ip tunnels drivers. Signed-off-by: Jakub Kicinski --- net/ipv4/ip_gre.c | 2 +- net/ipv4/ip_tunnel.c | 2 +- net/ipv4/ip_vti.c | 2 +- net/ipv4/ipip.c | 2 +- net/ipv6/ip6_gre.c | 4 ++-- net/ipv6/ip6_tunnel.c | 2 +- net/ipv6/ip6_vti.c | 2 +- net/ipv6/sit.c | 4 ++-- 8 files changed, 10 insertions(+), 10 deletions(-) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 0fe6c936dc54..2ac2b95c5694 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -986,7 +986,7 @@ static int ipgre_tunnel_init(struct net_device *dev) __gre_tunnel_init(dev); - memcpy(dev->dev_addr, &iph->saddr, 4); + __dev_addr_set(dev, &iph->saddr, 4); memcpy(dev->broadcast, &iph->daddr, 4); dev->flags = IFF_NOARP; diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index fe9101d3d69e..5a473319d3a5 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -834,7 +834,7 @@ static void ip_tunnel_update(struct ip_tunnel_net *itn, t->parms.i_key = p->i_key; t->parms.o_key = p->o_key; if (dev->type != ARPHRD_ETHER) { - memcpy(dev->dev_addr, &p->iph.saddr, 4); + __dev_addr_set(dev, &p->iph.saddr, 4); memcpy(dev->broadcast, &p->iph.daddr, 4); } ip_tunnel_add(itn, t); diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index efe25a0172e6..8c2bd1d9ddce 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -425,7 +425,7 @@ static int vti_tunnel_init(struct net_device *dev) struct ip_tunnel *tunnel = netdev_priv(dev); struct iphdr *iph = &tunnel->parms.iph; - memcpy(dev->dev_addr, &iph->saddr, 4); + __dev_addr_set(dev, &iph->saddr, 4); memcpy(dev->broadcast, &iph->daddr, 4); dev->flags = IFF_NOARP; diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 3aa78ccbec3e..123ea63a04cb 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -380,7 +380,7 @@ static int ipip_tunnel_init(struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); - memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); + __dev_addr_set(dev, &tunnel->parms.iph.saddr, 4); memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); tunnel->tun_hlen = 0; diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 3ad201d372d8..d831d2439693 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1088,7 +1088,7 @@ static void ip6gre_tnl_link_config_common(struct ip6_tnl *t) struct flowi6 *fl6 = &t->fl.u.ip6; if (dev->type != ARPHRD_ETHER) { - memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr)); + __dev_addr_set(dev, &p->laddr, sizeof(struct in6_addr)); memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr)); } @@ -1521,7 +1521,7 @@ static int ip6gre_tunnel_init(struct net_device *dev) if (tunnel->parms.collect_md) return 0; - memcpy(dev->dev_addr, &tunnel->parms.laddr, sizeof(struct in6_addr)); + __dev_addr_set(dev, &tunnel->parms.laddr, sizeof(struct in6_addr)); memcpy(dev->broadcast, &tunnel->parms.raddr, sizeof(struct in6_addr)); if (ipv6_addr_any(&tunnel->parms.raddr)) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 20a67efda47f..484aca492cc0 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1449,7 +1449,7 @@ static void ip6_tnl_link_config(struct ip6_tnl *t) unsigned int mtu; int t_hlen; - memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr)); + __dev_addr_set(dev, &p->laddr, sizeof(struct in6_addr)); memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr)); /* Set up flowi template */ diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 1d8e3ffa225d..527e9ead7449 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -660,7 +660,7 @@ static void vti6_link_config(struct ip6_tnl *t, bool keep_mtu) struct net_device *tdev = NULL; int mtu; - memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr)); + __dev_addr_set(dev, &p->laddr, sizeof(struct in6_addr)); memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr)); p->flags &= ~(IP6_TNL_F_CAP_XMIT | IP6_TNL_F_CAP_RCV | diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index ef0c7a7c18e2..1b57ee36d668 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -204,7 +204,7 @@ static int ipip6_tunnel_create(struct net_device *dev) struct sit_net *sitn = net_generic(net, sit_net_id); int err; - memcpy(dev->dev_addr, &t->parms.iph.saddr, 4); + __dev_addr_set(dev, &t->parms.iph.saddr, 4); memcpy(dev->broadcast, &t->parms.iph.daddr, 4); if ((__force u16)t->parms.i_flags & SIT_ISATAP) @@ -1149,7 +1149,7 @@ static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p, synchronize_net(); t->parms.iph.saddr = p->iph.saddr; t->parms.iph.daddr = p->iph.daddr; - memcpy(t->dev->dev_addr, &p->iph.saddr, 4); + __dev_addr_set(t->dev, &p->iph.saddr, 4); memcpy(t->dev->broadcast, &p->iph.daddr, 4); ipip6_tunnel_link(sitn, t); t->parms.iph.ttl = p->iph.ttl; From 13b5ffa0e282f3d18e57735e37b8fed3a392872b Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 12 Oct 2021 07:27:57 -0700 Subject: [PATCH 151/440] net: remove single-byte netdev->dev_addr writes Make the drivers which use single-byte netdev addresses (netdev->addr_len == 1) use the appropriate address setting helpers. arcnet copies from int variables and io reads a lot, so add a helper for arcnet drivers to use. Similar helper could be reused for phonet and appletalk but there isn't any good central location where we could put it, and netdevice.h is already very crowded. Acked-by: Sebastian Reichel # for HSI Link: https://lore.kernel.org/r/20211012142757.4124842-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- drivers/hsi/clients/ssi_protocol.c | 4 +++- drivers/net/appletalk/cops.c | 2 +- drivers/net/appletalk/ltpc.c | 3 +-- drivers/net/arcnet/arc-rimi.c | 5 +++-- drivers/net/arcnet/arcdevice.h | 5 +++++ drivers/net/arcnet/com20020-isa.c | 2 +- drivers/net/arcnet/com20020-pci.c | 2 +- drivers/net/arcnet/com20020.c | 4 ++-- drivers/net/arcnet/com20020_cs.c | 2 +- drivers/net/arcnet/com90io.c | 2 +- drivers/net/arcnet/com90xx.c | 3 ++- drivers/net/usb/cdc-phonet.c | 4 +++- drivers/usb/gadget/function/f_phonet.c | 5 ++++- 13 files changed, 28 insertions(+), 15 deletions(-) diff --git a/drivers/hsi/clients/ssi_protocol.c b/drivers/hsi/clients/ssi_protocol.c index 96d0eccca3aa..21f11a5b965b 100644 --- a/drivers/hsi/clients/ssi_protocol.c +++ b/drivers/hsi/clients/ssi_protocol.c @@ -1055,14 +1055,16 @@ static const struct net_device_ops ssip_pn_ops = { static void ssip_pn_setup(struct net_device *dev) { + static const u8 addr = PN_MEDIA_SOS; + dev->features = 0; dev->netdev_ops = &ssip_pn_ops; dev->type = ARPHRD_PHONET; dev->flags = IFF_POINTOPOINT | IFF_NOARP; dev->mtu = SSIP_DEFAULT_MTU; dev->hard_header_len = 1; - dev->dev_addr[0] = PN_MEDIA_SOS; dev->addr_len = 1; + dev_addr_set(dev, &addr); dev->tx_queue_len = SSIP_TXQUEUE_LEN; dev->needs_free_netdev = true; diff --git a/drivers/net/appletalk/cops.c b/drivers/net/appletalk/cops.c index f0695d68c47e..97f254bdbb16 100644 --- a/drivers/net/appletalk/cops.c +++ b/drivers/net/appletalk/cops.c @@ -945,8 +945,8 @@ static int cops_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) dev->broadcast[0] = 0xFF; /* Set hardware address. */ - dev->dev_addr[0] = aa->s_node; dev->addr_len = 1; + dev_addr_set(dev, &aa->s_node); return 0; case SIOCGIFADDR: diff --git a/drivers/net/appletalk/ltpc.c b/drivers/net/appletalk/ltpc.c index 1f8925e75b3f..388d7b3bd4c2 100644 --- a/drivers/net/appletalk/ltpc.c +++ b/drivers/net/appletalk/ltpc.c @@ -846,9 +846,8 @@ static int ltpc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) set_30 (dev,ltflags); dev->broadcast[0] = 0xFF; - dev->dev_addr[0] = aa->s_node; - dev->addr_len=1; + dev_addr_set(dev, &aa->s_node); return 0; diff --git a/drivers/net/arcnet/arc-rimi.c b/drivers/net/arcnet/arc-rimi.c index 12d085405bd0..8c3ccc7c83cd 100644 --- a/drivers/net/arcnet/arc-rimi.c +++ b/drivers/net/arcnet/arc-rimi.c @@ -207,7 +207,8 @@ static int __init arcrimi_found(struct net_device *dev) } /* get and check the station ID from offset 1 in shmem */ - dev->dev_addr[0] = arcnet_readb(lp->mem_start, COM9026_REG_R_STATION); + arcnet_set_addr(dev, arcnet_readb(lp->mem_start, + COM9026_REG_R_STATION)); arc_printk(D_NORMAL, dev, "ARCnet RIM I: station %02Xh found at IRQ %d, ShMem %lXh (%ld*%d bytes)\n", dev->dev_addr[0], @@ -324,7 +325,7 @@ static int __init arc_rimi_init(void) return -ENOMEM; if (node && node != 0xff) - dev->dev_addr[0] = node; + arcnet_set_addr(dev, node); dev->mem_start = io; dev->irq = irq; diff --git a/drivers/net/arcnet/arcdevice.h b/drivers/net/arcnet/arcdevice.h index 5d4a4c7efbbf..19e996a829c9 100644 --- a/drivers/net/arcnet/arcdevice.h +++ b/drivers/net/arcnet/arcdevice.h @@ -364,6 +364,11 @@ netdev_tx_t arcnet_send_packet(struct sk_buff *skb, struct net_device *dev); void arcnet_timeout(struct net_device *dev, unsigned int txqueue); +static inline void arcnet_set_addr(struct net_device *dev, u8 addr) +{ + dev_addr_set(dev, &addr); +} + /* I/O equivalents */ #ifdef CONFIG_SA1100_CT6001 diff --git a/drivers/net/arcnet/com20020-isa.c b/drivers/net/arcnet/com20020-isa.c index be618e4b9ed5..293a621e654c 100644 --- a/drivers/net/arcnet/com20020-isa.c +++ b/drivers/net/arcnet/com20020-isa.c @@ -151,7 +151,7 @@ static int __init com20020_init(void) return -ENOMEM; if (node && node != 0xff) - dev->dev_addr[0] = node; + arcnet_set_addr(dev, node); dev->netdev_ops = &com20020_netdev_ops; diff --git a/drivers/net/arcnet/com20020-pci.c b/drivers/net/arcnet/com20020-pci.c index 3c8f665c1558..6382e1937cca 100644 --- a/drivers/net/arcnet/com20020-pci.c +++ b/drivers/net/arcnet/com20020-pci.c @@ -194,7 +194,7 @@ static int com20020pci_probe(struct pci_dev *pdev, SET_NETDEV_DEV(dev, &pdev->dev); dev->base_addr = ioaddr; - dev->dev_addr[0] = node; + arcnet_set_addr(dev, node); dev->sysfs_groups[0] = &com20020_state_group; dev->irq = pdev->irq; lp->card_name = "PCI COM20020"; diff --git a/drivers/net/arcnet/com20020.c b/drivers/net/arcnet/com20020.c index 78043a9c5981..06e1651b594b 100644 --- a/drivers/net/arcnet/com20020.c +++ b/drivers/net/arcnet/com20020.c @@ -157,7 +157,7 @@ static int com20020_set_hwaddr(struct net_device *dev, void *addr) struct arcnet_local *lp = netdev_priv(dev); struct sockaddr *hwaddr = addr; - memcpy(dev->dev_addr, hwaddr->sa_data, 1); + dev_addr_set(dev, hwaddr->sa_data); com20020_set_subaddress(lp, ioaddr, SUB_NODE); arcnet_outb(dev->dev_addr[0], ioaddr, COM20020_REG_W_XREG); @@ -220,7 +220,7 @@ int com20020_found(struct net_device *dev, int shared) /* FIXME: do this some other way! */ if (!dev->dev_addr[0]) - dev->dev_addr[0] = arcnet_inb(ioaddr, 8); + arcnet_set_addr(dev, arcnet_inb(ioaddr, 8)); com20020_set_subaddress(lp, ioaddr, SUB_SETUP1); arcnet_outb(lp->setup, ioaddr, COM20020_REG_W_XREG); diff --git a/drivers/net/arcnet/com20020_cs.c b/drivers/net/arcnet/com20020_cs.c index b88a109b3b15..24150c933fcb 100644 --- a/drivers/net/arcnet/com20020_cs.c +++ b/drivers/net/arcnet/com20020_cs.c @@ -133,7 +133,7 @@ static int com20020_probe(struct pcmcia_device *p_dev) lp->hw.owner = THIS_MODULE; /* fill in our module parameters as defaults */ - dev->dev_addr[0] = node; + arcnet_set_addr(dev, node); p_dev->resource[0]->flags |= IO_DATA_PATH_WIDTH_8; p_dev->resource[0]->end = 16; diff --git a/drivers/net/arcnet/com90io.c b/drivers/net/arcnet/com90io.c index 3856b447d38e..37b47749fc8b 100644 --- a/drivers/net/arcnet/com90io.c +++ b/drivers/net/arcnet/com90io.c @@ -252,7 +252,7 @@ static int __init com90io_found(struct net_device *dev) /* get and check the station ID from offset 1 in shmem */ - dev->dev_addr[0] = get_buffer_byte(dev, 1); + arcnet_set_addr(dev, get_buffer_byte(dev, 1)); err = register_netdev(dev); if (err) { diff --git a/drivers/net/arcnet/com90xx.c b/drivers/net/arcnet/com90xx.c index d8dfb9ea0de8..f49dae194284 100644 --- a/drivers/net/arcnet/com90xx.c +++ b/drivers/net/arcnet/com90xx.c @@ -531,7 +531,8 @@ static int __init com90xx_found(int ioaddr, int airq, u_long shmem, } /* get and check the station ID from offset 1 in shmem */ - dev->dev_addr[0] = arcnet_readb(lp->mem_start, COM9026_REG_R_STATION); + arcnet_set_addr(dev, arcnet_readb(lp->mem_start, + COM9026_REG_R_STATION)); dev->base_addr = ioaddr; diff --git a/drivers/net/usb/cdc-phonet.c b/drivers/net/usb/cdc-phonet.c index e1da9102a540..ad5121e9cf5d 100644 --- a/drivers/net/usb/cdc-phonet.c +++ b/drivers/net/usb/cdc-phonet.c @@ -275,6 +275,8 @@ static const struct net_device_ops usbpn_ops = { static void usbpn_setup(struct net_device *dev) { + const u8 addr = PN_MEDIA_USB; + dev->features = 0; dev->netdev_ops = &usbpn_ops; dev->header_ops = &phonet_header_ops; @@ -284,8 +286,8 @@ static void usbpn_setup(struct net_device *dev) dev->min_mtu = PHONET_MIN_MTU; dev->max_mtu = PHONET_MAX_MTU; dev->hard_header_len = 1; - dev->dev_addr[0] = PN_MEDIA_USB; dev->addr_len = 1; + dev_addr_set(dev, &addr); dev->tx_queue_len = 3; dev->needs_free_netdev = true; diff --git a/drivers/usb/gadget/function/f_phonet.c b/drivers/usb/gadget/function/f_phonet.c index 0b468f5d55bc..068ed8417e5a 100644 --- a/drivers/usb/gadget/function/f_phonet.c +++ b/drivers/usb/gadget/function/f_phonet.c @@ -267,6 +267,8 @@ static const struct net_device_ops pn_netdev_ops = { static void pn_net_setup(struct net_device *dev) { + const u8 addr = PN_MEDIA_USB; + dev->features = 0; dev->type = ARPHRD_PHONET; dev->flags = IFF_POINTOPOINT | IFF_NOARP; @@ -274,8 +276,9 @@ static void pn_net_setup(struct net_device *dev) dev->min_mtu = PHONET_MIN_MTU; dev->max_mtu = PHONET_MAX_MTU; dev->hard_header_len = 1; - dev->dev_addr[0] = PN_MEDIA_USB; dev->addr_len = 1; + dev_addr_set(dev, &addr); + dev->tx_queue_len = 1; dev->netdev_ops = &pn_netdev_ops; From 6312d52838b21f5c4a5afa1269a00df4364fd354 Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Wed, 13 Oct 2021 15:57:43 +0200 Subject: [PATCH 152/440] marvell: octeontx2: build error: unknown type name 'u64' Building an allmodconfig kernel arm64 kernel, the following build error shows up: In file included from drivers/crypto/marvell/octeontx2/cn10k_cpt.c:4: include/linux/soc/marvell/octeontx2/asm.h:38:15: error: unknown type name 'u64' 38 | static inline u64 otx2_atomic64_fetch_add(u64 incr, u64 *ptr) | ^~~ Include linux/types.h in asm.h so the compiler knows what the type 'u64' are. Fixes: af3826db74d1 ("octeontx2-pf: Use hardware register for CQE count") Signed-off-by: Anders Roxell Link: https://lore.kernel.org/r/20211013135743.3826594-1-anders.roxell@linaro.org Signed-off-by: Jakub Kicinski --- include/linux/soc/marvell/octeontx2/asm.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/soc/marvell/octeontx2/asm.h b/include/linux/soc/marvell/octeontx2/asm.h index 0f79fd7f81a1..d683251a0b40 100644 --- a/include/linux/soc/marvell/octeontx2/asm.h +++ b/include/linux/soc/marvell/octeontx2/asm.h @@ -5,6 +5,7 @@ #ifndef __SOC_OTX2_ASM_H #define __SOC_OTX2_ASM_H +#include #if defined(CONFIG_ARM64) /* * otx2_lmt_flush is used for LMT store operation. From 39e222bfd7f37e7a98069869375b903d7096c113 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 12 Oct 2021 15:37:35 +0300 Subject: [PATCH 153/440] net: dsa: unregister cross-chip notifier after ds->ops->teardown To be symmetric with the error unwind path of dsa_switch_setup(), call dsa_switch_unregister_notifier() after ds->ops->teardown. The implication is that ds->ops->teardown cannot emit cross-chip notifiers. For example, currently the dsa_tag_8021q_unregister() call from sja1105_teardown() does not propagate to the entire tree due to this reason. However I cannot find an actual issue caused by this, observed using code inspection. Signed-off-by: Vladimir Oltean Link: https://lore.kernel.org/r/20211012123735.2545742-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- net/dsa/dsa2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 8ca6a1170c9d..42d9fcd3c31b 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -924,11 +924,11 @@ static void dsa_switch_teardown(struct dsa_switch *ds) ds->slave_mii_bus = NULL; } - dsa_switch_unregister_notifier(ds); - if (ds->ops->teardown) ds->ops->teardown(ds); + dsa_switch_unregister_notifier(ds); + if (ds->devlink) { list_for_each_entry(dp, &ds->dst->ports, list) if (dp->ds == ds) From e79d82643a69df03d59fe47e32619515a83e4969 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 13 Oct 2021 11:04:56 +0300 Subject: [PATCH 154/440] net: enetc: fix check for allocation failure This was supposed to be a check for if dma_alloc_coherent() failed but it has a copy and paste bug so it will not work. Fixes: fb8629e2cbfc ("net: enetc: add support for software TSO") Signed-off-by: Dan Carpenter Reviewed-by: Ioana Ciornei Link: https://lore.kernel.org/r/20211013080456.GC6010@kili Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/enetc/enetc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index 3ae4f49a9055..8e31fe15bf3c 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -1767,8 +1767,10 @@ static int enetc_alloc_txbdr(struct enetc_bdr *txr) txr->bd_count * TSO_HEADER_SIZE, &txr->tso_headers_dma, GFP_KERNEL); - if (err) + if (!txr->tso_headers) { + err = -ENOMEM; goto err_alloc_tso; + } txr->next_to_clean = 0; txr->next_to_use = 0; From b063e0651ced6faa72b422d1b751c89bd2878636 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 13 Oct 2021 13:37:44 +0300 Subject: [PATCH 155/440] mlxsw: reg: Fix a typo in a group heading There is no such thing as "traffic group". The group that this is a heading of is "per traffic class counters". Fix the heading. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/reg.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index ed6c3356e4eb..62b2df8a0715 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -5371,7 +5371,7 @@ MLXSW_ITEM64(reg, ppcnt, tx_pause_duration, MLXSW_ITEM64(reg, ppcnt, tx_pause_transition, MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x70, 0, 64); -/* Ethernet Per Traffic Group Counters */ +/* Ethernet Per Traffic Class Counters */ /* reg_ppcnt_tc_transmit_queue * Contains the transmit queue depth in cells of traffic class From fc372cc072861e0027a950d896fc5f6912bb0acd Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 13 Oct 2021 13:37:45 +0300 Subject: [PATCH 156/440] mlxsw: reg: Rename MLXSW_REG_PPCNT_TC_CONG_TC to _CNT The name does not make sense as it is. Clearly there is a typo and the suffix should have been _CNT, like the other enumerators. Fix accordingly. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/reg.h | 2 +- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 62b2df8a0715..2f8c0c6d66e0 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -4951,7 +4951,7 @@ enum mlxsw_reg_ppcnt_grp { MLXSW_REG_PPCNT_DISCARD_CNT = 0x6, MLXSW_REG_PPCNT_PRIO_CNT = 0x10, MLXSW_REG_PPCNT_TC_CNT = 0x11, - MLXSW_REG_PPCNT_TC_CONG_TC = 0x13, + MLXSW_REG_PPCNT_TC_CONG_CNT = 0x13, }; /* reg_ppcnt_grp diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 3c9844f2aa1d..67529e9537a6 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -824,7 +824,7 @@ mlxsw_sp_port_get_hw_xstats(struct net_device *dev, for (i = 0; i < TC_MAX_QUEUE; i++) { err = mlxsw_sp_port_get_stats_raw(dev, - MLXSW_REG_PPCNT_TC_CONG_TC, + MLXSW_REG_PPCNT_TC_CONG_CNT, i, ppcnt_pl); if (!err) xstats->wred_drop[i] = From 6242b0a96302a0d5117ef47dfc4a335d709e9c57 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 13 Oct 2021 13:37:46 +0300 Subject: [PATCH 157/440] mlxsw: reg: Add ecn_marked_tc to Per-TC Congestion Counters The PPCNT register retrieves per port performance counters. The ecn_marked_tc field in per-TC Congestion counter group contains a count of packets marked as ECN or potentially marked as ECN. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/reg.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 2f8c0c6d66e0..48b817ba6d4e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -5398,6 +5398,12 @@ MLXSW_ITEM64(reg, ppcnt, tc_no_buffer_discard_uc, MLXSW_ITEM64(reg, ppcnt, wred_discard, MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x00, 0, 64); +/* reg_ppcnt_ecn_marked_tc + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, ecn_marked_tc, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x08, 0, 64); + static inline void mlxsw_reg_ppcnt_pack(char *payload, u8 local_port, enum mlxsw_reg_ppcnt_grp grp, u8 prio_tc) From 15be36b8126b8f396cceb6fe7a87a1b911575abb Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 13 Oct 2021 13:37:47 +0300 Subject: [PATCH 158/440] mlxsw: spectrum_qdisc: Introduce per-TC ECN counters The Qdisc code in mlxsw used to report a number of packets ECN-marked on a port. Because reporting a per-port value as a per-TC value was misleading, this was removed in commit 8a29581eb001 ("mlxsw: spectrum: Move the ECN-marked packet counter to ethtool"). On Spectrum-3, a per-TC number of ECN-marked packets is available in per-TC congestion counter group. Add a new array for the ECN counter, fetch the values from the per-TC congestion group, and pick the value indicated by tclass_num as appropriate. On Spectrum-1 and Spectrum-2, this per-TC value is not available, and zeroes will be reported, as they currently are. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 10 +++++++--- drivers/net/ethernet/mellanox/mlxsw/spectrum.h | 1 + drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c | 9 +++++++-- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 67529e9537a6..d05850ff3a77 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -826,10 +826,14 @@ mlxsw_sp_port_get_hw_xstats(struct net_device *dev, err = mlxsw_sp_port_get_stats_raw(dev, MLXSW_REG_PPCNT_TC_CONG_CNT, i, ppcnt_pl); - if (!err) - xstats->wred_drop[i] = - mlxsw_reg_ppcnt_wred_discard_get(ppcnt_pl); + if (err) + goto tc_cnt; + xstats->wred_drop[i] = + mlxsw_reg_ppcnt_wred_discard_get(ppcnt_pl); + xstats->tc_ecn[i] = mlxsw_reg_ppcnt_ecn_marked_tc_get(ppcnt_pl); + +tc_cnt: err = mlxsw_sp_port_get_stats_raw(dev, MLXSW_REG_PPCNT_TC_CNT, i, ppcnt_pl); if (err) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index aae1aed5345b..3ab57e98cad2 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -285,6 +285,7 @@ struct mlxsw_sp_port_vlan { /* No need an internal lock; At worse - miss a single periodic iteration */ struct mlxsw_sp_port_xstats { u64 ecn; + u64 tc_ecn[TC_MAX_QUEUE]; u64 wred_drop[TC_MAX_QUEUE]; u64 tail_drop[TC_MAX_QUEUE]; u64 backlog[TC_MAX_QUEUE]; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c index e367c63e72bf..6d1431fa31d7 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c @@ -519,6 +519,7 @@ mlxsw_sp_setup_tc_qdisc_red_clean_stats(struct mlxsw_sp_port *mlxsw_sp_port, mlxsw_sp_qdisc->prio_bitmap, &stats_base->tx_packets, &stats_base->tx_bytes); + red_base->prob_mark = xstats->tc_ecn[tclass_num]; red_base->prob_drop = xstats->wred_drop[tclass_num]; red_base->pdrop = mlxsw_sp_xstats_tail_drop(xstats, tclass_num); @@ -618,19 +619,22 @@ mlxsw_sp_qdisc_get_red_xstats(struct mlxsw_sp_port *mlxsw_sp_port, int tclass_num = mlxsw_sp_qdisc->tclass_num; struct mlxsw_sp_port_xstats *xstats; struct red_stats *res = xstats_ptr; - int early_drops, pdrops; + int early_drops, marks, pdrops; xstats = &mlxsw_sp_port->periodic_hw_stats.xstats; early_drops = xstats->wred_drop[tclass_num] - xstats_base->prob_drop; + marks = xstats->tc_ecn[tclass_num] - xstats_base->prob_mark; pdrops = mlxsw_sp_xstats_tail_drop(xstats, tclass_num) - xstats_base->pdrop; res->pdrop += pdrops; res->prob_drop += early_drops; + res->prob_mark += marks; xstats_base->pdrop += pdrops; xstats_base->prob_drop += early_drops; + xstats_base->prob_mark += marks; return 0; } @@ -648,7 +652,8 @@ mlxsw_sp_qdisc_get_red_stats(struct mlxsw_sp_port *mlxsw_sp_port, stats_base = &mlxsw_sp_qdisc->stats_base; mlxsw_sp_qdisc_get_tc_stats(mlxsw_sp_port, mlxsw_sp_qdisc, stats_ptr); - overlimits = xstats->wred_drop[tclass_num] - stats_base->overlimits; + overlimits = xstats->wred_drop[tclass_num] + + xstats->tc_ecn[tclass_num] - stats_base->overlimits; stats_ptr->qstats->overlimits += overlimits; stats_base->overlimits += overlimits; From bf862732945cbfebc8616ef6ca60a879f1445fe1 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 13 Oct 2021 13:37:48 +0300 Subject: [PATCH 159/440] selftests: mlxsw: RED: Test per-TC ECN counters Add a variant of ECN test that uses qdisc marked counter (supported on Spectrum-3 and above) instead of the aggregate ethtool ecn_marked counter. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: Jakub Kicinski --- .../drivers/net/mlxsw/sch_red_core.sh | 51 +++++++++++++++---- .../drivers/net/mlxsw/sch_red_ets.sh | 11 ++++ .../drivers/net/mlxsw/sch_red_root.sh | 8 +++ 3 files changed, 60 insertions(+), 10 deletions(-) diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh index eea3e5ad3f38..dd90cd87d4f9 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh @@ -331,6 +331,14 @@ get_nmarked() ethtool_stats_get $swp3 ecn_marked } +get_qdisc_nmarked() +{ + local vlan=$1; shift + + busywait_for_counter 1100 +1 \ + qdisc_stats_get $swp3 $(get_qdisc_handle $vlan) .marked +} + get_qdisc_npackets() { local vlan=$1; shift @@ -384,14 +392,15 @@ build_backlog() check_marking() { + local get_nmarked=$1; shift local vlan=$1; shift local cond=$1; shift local npackets_0=$(get_qdisc_npackets $vlan) - local nmarked_0=$(get_nmarked $vlan) + local nmarked_0=$($get_nmarked $vlan) sleep 5 local npackets_1=$(get_qdisc_npackets $vlan) - local nmarked_1=$(get_nmarked $vlan) + local nmarked_1=$($get_nmarked $vlan) local nmarked_d=$((nmarked_1 - nmarked_0)) local npackets_d=$((npackets_1 - npackets_0)) @@ -404,6 +413,7 @@ check_marking() ecn_test_common() { local name=$1; shift + local get_nmarked=$1; shift local vlan=$1; shift local limit=$1; shift local backlog @@ -416,7 +426,7 @@ ecn_test_common() RET=0 backlog=$(build_backlog $vlan $((2 * limit / 3)) udp) check_err $? "Could not build the requested backlog" - pct=$(check_marking $vlan "== 0") + pct=$(check_marking "$get_nmarked" $vlan "== 0") check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0." log_test "TC $((vlan - 10)): $name backlog < limit" @@ -426,22 +436,23 @@ ecn_test_common() RET=0 backlog=$(build_backlog $vlan $((3 * limit / 2)) tcp tos=0x01) check_err $? "Could not build the requested backlog" - pct=$(check_marking $vlan ">= 95") + pct=$(check_marking "$get_nmarked" $vlan ">= 95") check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected >= 95." log_test "TC $((vlan - 10)): $name backlog > limit" } -do_ecn_test() +__do_ecn_test() { + local get_nmarked=$1; shift local vlan=$1; shift local limit=$1; shift - local name=ECN + local name=${1-ECN}; shift start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) \ $h3_mac tos=0x01 sleep 1 - ecn_test_common "$name" $vlan $limit + ecn_test_common "$name" "$get_nmarked" $vlan $limit # Up there we saw that UDP gets accepted when backlog is below the # limit. Now that it is above, it should all get dropped, and backlog @@ -455,6 +466,26 @@ do_ecn_test() sleep 1 } +do_ecn_test() +{ + local vlan=$1; shift + local limit=$1; shift + + __do_ecn_test get_nmarked "$vlan" "$limit" +} + +do_ecn_test_perband() +{ + local vlan=$1; shift + local limit=$1; shift + + # Per-band ECN counters are not supported on Spectrum-1 and Spectrum-2. + [[ "$DEVLINK_VIDDID" == "15b3:cb84" || + "$DEVLINK_VIDDID" == "15b3:cf6c" ]] && return + + __do_ecn_test get_qdisc_nmarked "$vlan" "$limit" "per-band ECN" +} + do_ecn_nodrop_test() { local vlan=$1; shift @@ -465,7 +496,7 @@ do_ecn_nodrop_test() $h3_mac tos=0x01 sleep 1 - ecn_test_common "$name" $vlan $limit + ecn_test_common "$name" get_nmarked $vlan $limit # Up there we saw that UDP gets accepted when backlog is below the # limit. Now that it is above, in nodrop mode, make sure it goes to @@ -495,7 +526,7 @@ do_red_test() RET=0 backlog=$(build_backlog $vlan $((2 * limit / 3)) tcp tos=0x01) check_err $? "Could not build the requested backlog" - pct=$(check_marking $vlan "== 0") + pct=$(check_marking get_nmarked $vlan "== 0") check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0." log_test "TC $((vlan - 10)): RED backlog < limit" @@ -503,7 +534,7 @@ do_red_test() RET=0 backlog=$(build_backlog $vlan $((3 * limit / 2)) tcp tos=0x01) check_fail $? "Traffic went into backlog instead of being early-dropped" - pct=$(check_marking $vlan "== 0") + pct=$(check_marking get_nmarked $vlan "== 0") check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0." local diff=$((limit - backlog)) pct=$((100 * diff / limit)) diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh index b58b4cf9dc13..1e5ad3209436 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh @@ -4,6 +4,7 @@ ALL_TESTS=" ping_ipv4 ecn_test + ecn_test_perband ecn_nodrop_test red_test mc_backlog_test @@ -86,6 +87,16 @@ ecn_test() uninstall_qdisc } +ecn_test_perband() +{ + install_qdisc ecn + + do_ecn_test_perband 10 $BACKLOG1 + do_ecn_test_perband 11 $BACKLOG2 + + uninstall_qdisc +} + ecn_nodrop_test() { install_qdisc ecn nodrop diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh index ede9c38d3eff..d79a82f317d2 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh @@ -4,6 +4,7 @@ ALL_TESTS=" ping_ipv4 ecn_test + ecn_test_perband ecn_nodrop_test red_test mc_backlog_test @@ -35,6 +36,13 @@ ecn_test() uninstall_qdisc } +ecn_test_perband() +{ + install_qdisc ecn + do_ecn_test_perband 10 $BACKLOG + uninstall_qdisc +} + ecn_nodrop_test() { install_qdisc ecn nodrop From 9974cb5c879048f5144d0660c4932d98176213c4 Mon Sep 17 00:00:00 2001 From: Chen Wandun Date: Wed, 13 Oct 2021 17:47:02 +0800 Subject: [PATCH 160/440] net: delete redundant function declaration The implement of function netdev_all_upper_get_next_dev_rcu has been removed in: commit f1170fd462c6 ("net: Remove all_adj_list and its references") so delete redundant declaration in header file. Fixes: f1170fd462c6 ("net: Remove all_adj_list and its references") Signed-off-by: Chen Wandun Link: https://lore.kernel.org/r/20211013094702.3931071-1-chenwandun@huawei.com Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f33af341bfb2..173984414f38 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4801,8 +4801,6 @@ struct netdev_nested_priv { bool netdev_has_upper_dev(struct net_device *dev, struct net_device *upper_dev); struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev, struct list_head **iter); -struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev, - struct list_head **iter); #ifdef CONFIG_LOCKDEP static LIST_HEAD(net_unlink_list); From e00ae1a2aaf2348025cc75007ebd932a1e64c544 Mon Sep 17 00:00:00 2001 From: Maciej Machnikowski Date: Mon, 30 Aug 2021 14:26:14 +0200 Subject: [PATCH 161/440] ice: Refactor ice_aqc_link_topo_addr Separate link topo parameters in struct ice_aqc_link_topo_addr into new struct ice_aqc_link_topo_params. This keeps input parameters for the get_link_topo command in a separate structure and is required by future commands that operate only on link topo params without the node handle. Signed-off-by: Maciej Machnikowski Tested-by: Sunitha Mekala Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_adminq_cmd.h | 6 +++++- drivers/net/ethernet/intel/ice/ice_common.c | 8 +++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h index 2b4437dc112f..9f6edfa59770 100644 --- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h @@ -1279,7 +1279,7 @@ struct ice_aqc_set_mac_lb { u8 reserved[15]; }; -struct ice_aqc_link_topo_addr { +struct ice_aqc_link_topo_params { u8 lport_num; u8 lport_num_valid; #define ICE_AQC_LINK_TOPO_PORT_NUM_VALID BIT(0) @@ -1305,6 +1305,10 @@ struct ice_aqc_link_topo_addr { #define ICE_AQC_LINK_TOPO_NODE_CTX_PROVIDED 4 #define ICE_AQC_LINK_TOPO_NODE_CTX_OVERRIDE 5 u8 index; +}; + +struct ice_aqc_link_topo_addr { + struct ice_aqc_link_topo_params topo_params; __le16 handle; #define ICE_AQC_LINK_TOPO_HANDLE_S 0 #define ICE_AQC_LINK_TOPO_HANDLE_M (0x3FF << ICE_AQC_LINK_TOPO_HANDLE_S) diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 152a1405e353..b0084359a7e1 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -240,11 +240,13 @@ ice_aq_get_link_topo_handle(struct ice_port_info *pi, u8 node_type, ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_link_topo); - cmd->addr.node_type_ctx = (ICE_AQC_LINK_TOPO_NODE_CTX_PORT << - ICE_AQC_LINK_TOPO_NODE_CTX_S); + cmd->addr.topo_params.node_type_ctx = + (ICE_AQC_LINK_TOPO_NODE_CTX_PORT << + ICE_AQC_LINK_TOPO_NODE_CTX_S); /* set node type */ - cmd->addr.node_type_ctx |= (ICE_AQC_LINK_TOPO_NODE_TYPE_M & node_type); + cmd->addr.topo_params.node_type_ctx |= + (ICE_AQC_LINK_TOPO_NODE_TYPE_M & node_type); return ice_aq_send_cmd(pi->hw, &desc, NULL, 0, cd); } From 3bb6324b3dcbf30a99fea0bad9fe8941de4b75fb Mon Sep 17 00:00:00 2001 From: Maciej Machnikowski Date: Tue, 17 Aug 2021 13:09:16 +0200 Subject: [PATCH 162/440] ice: Implement functions for reading and setting GPIO pins Implement ice_aq_get_gpio and ice_aq_set_gpio for reading and changing the state of GPIO pins described in the topology. Signed-off-by: Maciej Machnikowski Tested-by: Sunitha Mekala Signed-off-by: Tony Nguyen --- .../net/ethernet/intel/ice/ice_adminq_cmd.h | 13 +++++ drivers/net/ethernet/intel/ice/ice_common.c | 58 +++++++++++++++++++ drivers/net/ethernet/intel/ice/ice_common.h | 6 ++ 3 files changed, 77 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h index 9f6edfa59770..25708c320f5d 100644 --- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h @@ -1344,6 +1344,16 @@ struct ice_aqc_set_port_id_led { u8 rsvd[13]; }; +/* Set/Get GPIO (direct, 0x06EC/0x06ED) */ +struct ice_aqc_gpio { + __le16 gpio_ctrl_handle; +#define ICE_AQC_GPIO_HANDLE_S 0 +#define ICE_AQC_GPIO_HANDLE_M (0x3FF << ICE_AQC_GPIO_HANDLE_S) + u8 gpio_num; + u8 gpio_val; + u8 rsvd[12]; +}; + /* Read/Write SFF EEPROM command (indirect 0x06EE) */ struct ice_aqc_sff_eeprom { u8 lport_num; @@ -1985,6 +1995,7 @@ struct ice_aq_desc { struct ice_aqc_get_phy_caps get_phy; struct ice_aqc_set_phy_cfg set_phy; struct ice_aqc_restart_an restart_an; + struct ice_aqc_gpio read_write_gpio; struct ice_aqc_sff_eeprom read_write_sff_param; struct ice_aqc_set_port_id_led set_port_id_led; struct ice_aqc_get_sw_cfg get_sw_conf; @@ -2140,6 +2151,8 @@ enum ice_adminq_opc { ice_aqc_opc_set_mac_lb = 0x0620, ice_aqc_opc_get_link_topo = 0x06E0, ice_aqc_opc_set_port_id_led = 0x06E9, + ice_aqc_opc_set_gpio = 0x06EC, + ice_aqc_opc_get_gpio = 0x06ED, ice_aqc_opc_sff_eeprom = 0x06EE, /* NVM commands */ diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index b0084359a7e1..7416f802a558 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -4794,6 +4794,64 @@ ice_aq_get_driver_param(struct ice_hw *hw, enum ice_aqc_driver_params idx, return 0; } +/** + * ice_aq_set_gpio + * @hw: pointer to the hw struct + * @gpio_ctrl_handle: GPIO controller node handle + * @pin_idx: IO Number of the GPIO that needs to be set + * @value: SW provide IO value to set in the LSB + * @cd: pointer to command details structure or NULL + * + * Sends 0x06EC AQ command to set the GPIO pin state that's part of the topology + */ +int +ice_aq_set_gpio(struct ice_hw *hw, u16 gpio_ctrl_handle, u8 pin_idx, bool value, + struct ice_sq_cd *cd) +{ + struct ice_aqc_gpio *cmd; + struct ice_aq_desc desc; + + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_gpio); + cmd = &desc.params.read_write_gpio; + cmd->gpio_ctrl_handle = cpu_to_le16(gpio_ctrl_handle); + cmd->gpio_num = pin_idx; + cmd->gpio_val = value ? 1 : 0; + + return ice_status_to_errno(ice_aq_send_cmd(hw, &desc, NULL, 0, cd)); +} + +/** + * ice_aq_get_gpio + * @hw: pointer to the hw struct + * @gpio_ctrl_handle: GPIO controller node handle + * @pin_idx: IO Number of the GPIO that needs to be set + * @value: IO value read + * @cd: pointer to command details structure or NULL + * + * Sends 0x06ED AQ command to get the value of a GPIO signal which is part of + * the topology + */ +int +ice_aq_get_gpio(struct ice_hw *hw, u16 gpio_ctrl_handle, u8 pin_idx, + bool *value, struct ice_sq_cd *cd) +{ + struct ice_aqc_gpio *cmd; + struct ice_aq_desc desc; + enum ice_status status; + + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_gpio); + cmd = &desc.params.read_write_gpio; + cmd->gpio_ctrl_handle = cpu_to_le16(gpio_ctrl_handle); + cmd->gpio_num = pin_idx; + + status = ice_aq_send_cmd(hw, &desc, NULL, 0, cd); + if (status) + return ice_status_to_errno(status); + + *value = !!cmd->gpio_val; + return 0; +} + /** * ice_fw_supports_link_override * @hw: pointer to the hardware structure diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h index fb16070f02e2..4273f8921e53 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.h +++ b/drivers/net/ethernet/intel/ice/ice_common.h @@ -192,6 +192,12 @@ ice_aq_set_driver_param(struct ice_hw *hw, enum ice_aqc_driver_params idx, int ice_aq_get_driver_param(struct ice_hw *hw, enum ice_aqc_driver_params idx, u32 *value, struct ice_sq_cd *cd); +int +ice_aq_set_gpio(struct ice_hw *hw, u16 gpio_ctrl_handle, u8 pin_idx, bool value, + struct ice_sq_cd *cd); +int +ice_aq_get_gpio(struct ice_hw *hw, u16 gpio_ctrl_handle, u8 pin_idx, + bool *value, struct ice_sq_cd *cd); enum ice_status ice_aq_set_lldp_mib(struct ice_hw *hw, u8 mib_type, void *buf, u16 buf_size, struct ice_sq_cd *cd); From 885fe6932a1190cc0a949c3da52223c54f6fe7a8 Mon Sep 17 00:00:00 2001 From: Maciej Machnikowski Date: Tue, 17 Aug 2021 13:09:17 +0200 Subject: [PATCH 163/440] ice: Add support for SMA control multiplexer E810-T adapters have two external bidirectional SMA connectors and two internal unidirectional U.FL connectors. Multiplexing between U.FL and SMA and SMA direction is controlled using the PCA9575 expander. Add support for the PCA9575 detection and control of the respective pins of the SMA/U.FL multiplexer using the GPIO AQ API. Signed-off-by: Maciej Machnikowski Tested-by: Sunitha Mekala Signed-off-by: Tony Nguyen --- .../net/ethernet/intel/ice/ice_adminq_cmd.h | 1 + drivers/net/ethernet/intel/ice/ice_common.c | 21 +++ drivers/net/ethernet/intel/ice/ice_common.h | 1 + drivers/net/ethernet/intel/ice/ice_devids.h | 2 + drivers/net/ethernet/intel/ice/ice_ptp_hw.c | 151 ++++++++++++++++++ drivers/net/ethernet/intel/ice/ice_ptp_hw.h | 22 +++ drivers/net/ethernet/intel/ice/ice_type.h | 1 + 7 files changed, 199 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h index 25708c320f5d..a5425f0dce3f 100644 --- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h @@ -1331,6 +1331,7 @@ struct ice_aqc_link_topo_addr { struct ice_aqc_get_link_topo { struct ice_aqc_link_topo_addr addr; u8 node_part_num; +#define ICE_AQC_GET_LINK_TOPO_NODE_NR_PCA9575 0x21 u8 rsvd[9]; }; diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 7416f802a558..16a25616cdc3 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -69,6 +69,27 @@ bool ice_is_e810(struct ice_hw *hw) return hw->mac_type == ICE_MAC_E810; } +/** + * ice_is_e810t + * @hw: pointer to the hardware structure + * + * returns true if the device is E810T based, false if not. + */ +bool ice_is_e810t(struct ice_hw *hw) +{ + switch (hw->device_id) { + case ICE_DEV_ID_E810C_SFP: + if (hw->subsystem_device_id == ICE_SUBDEV_ID_E810T || + hw->subsystem_device_id == ICE_SUBDEV_ID_E810T2) + return true; + break; + default: + break; + } + + return false; +} + /** * ice_clear_pf_cfg - Clear PF configuration * @hw: pointer to the hardware structure diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h index 4273f8921e53..65c1b3244264 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.h +++ b/drivers/net/ethernet/intel/ice/ice_common.h @@ -183,6 +183,7 @@ ice_stat_update40(struct ice_hw *hw, u32 reg, bool prev_stat_loaded, void ice_stat_update32(struct ice_hw *hw, u32 reg, bool prev_stat_loaded, u64 *prev_stat, u64 *cur_stat); +bool ice_is_e810t(struct ice_hw *hw); enum ice_status ice_sched_query_elem(struct ice_hw *hw, u32 node_teid, struct ice_aqc_txsched_elem_data *buf); diff --git a/drivers/net/ethernet/intel/ice/ice_devids.h b/drivers/net/ethernet/intel/ice/ice_devids.h index 9d8194671f6a..8d2c39ee775b 100644 --- a/drivers/net/ethernet/intel/ice/ice_devids.h +++ b/drivers/net/ethernet/intel/ice/ice_devids.h @@ -21,6 +21,8 @@ #define ICE_DEV_ID_E810C_QSFP 0x1592 /* Intel(R) Ethernet Controller E810-C for SFP */ #define ICE_DEV_ID_E810C_SFP 0x1593 +#define ICE_SUBDEV_ID_E810T 0x000E +#define ICE_SUBDEV_ID_E810T2 0x000F /* Intel(R) Ethernet Controller E810-XXV for SFP */ #define ICE_DEV_ID_E810_XXV_SFP 0x159B /* Intel(R) Ethernet Connection E823-C for backplane */ diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c index 3eca0e4eab0b..29f947c0cd2e 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c @@ -649,3 +649,154 @@ int ice_clear_phy_tstamp(struct ice_hw *hw, u8 block, u8 idx) { return ice_clear_phy_tstamp_e810(hw, block, idx); } + +/* E810T SMA functions + * + * The following functions operate specifically on E810T hardware and are used + * to access the extended GPIOs available. + */ + +/** + * ice_get_pca9575_handle + * @hw: pointer to the hw struct + * @pca9575_handle: GPIO controller's handle + * + * Find and return the GPIO controller's handle in the netlist. + * When found - the value will be cached in the hw structure and following calls + * will return cached value + */ +static int +ice_get_pca9575_handle(struct ice_hw *hw, u16 *pca9575_handle) +{ + struct ice_aqc_get_link_topo *cmd; + struct ice_aq_desc desc; + int status; + u8 idx; + + /* If handle was read previously return cached value */ + if (hw->io_expander_handle) { + *pca9575_handle = hw->io_expander_handle; + return 0; + } + + /* If handle was not detected read it from the netlist */ + cmd = &desc.params.get_link_topo; + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_link_topo); + + /* Set node type to GPIO controller */ + cmd->addr.topo_params.node_type_ctx = + (ICE_AQC_LINK_TOPO_NODE_TYPE_M & + ICE_AQC_LINK_TOPO_NODE_TYPE_GPIO_CTRL); + +#define SW_PCA9575_SFP_TOPO_IDX 2 +#define SW_PCA9575_QSFP_TOPO_IDX 1 + + /* Check if the SW IO expander controlling SMA exists in the netlist. */ + if (hw->device_id == ICE_DEV_ID_E810C_SFP) + idx = SW_PCA9575_SFP_TOPO_IDX; + else if (hw->device_id == ICE_DEV_ID_E810C_QSFP) + idx = SW_PCA9575_QSFP_TOPO_IDX; + else + return -EOPNOTSUPP; + + cmd->addr.topo_params.index = idx; + + status = ice_aq_send_cmd(hw, &desc, NULL, 0, NULL); + if (status) + return -EOPNOTSUPP; + + /* Verify if we found the right IO expander type */ + if (desc.params.get_link_topo.node_part_num != + ICE_AQC_GET_LINK_TOPO_NODE_NR_PCA9575) + return -EOPNOTSUPP; + + /* If present save the handle and return it */ + hw->io_expander_handle = + le16_to_cpu(desc.params.get_link_topo.addr.handle); + *pca9575_handle = hw->io_expander_handle; + + return 0; +} + +/** + * ice_read_sma_ctrl_e810t + * @hw: pointer to the hw struct + * @data: pointer to data to be read from the GPIO controller + * + * Read the SMA controller state. It is connected to pins 3-7 of Port 1 of the + * PCA9575 expander, so only bits 3-7 in data are valid. + */ +int ice_read_sma_ctrl_e810t(struct ice_hw *hw, u8 *data) +{ + int status; + u16 handle; + u8 i; + + status = ice_get_pca9575_handle(hw, &handle); + if (status) + return status; + + *data = 0; + + for (i = ICE_SMA_MIN_BIT_E810T; i <= ICE_SMA_MAX_BIT_E810T; i++) { + bool pin; + + status = ice_aq_get_gpio(hw, handle, i + ICE_PCA9575_P1_OFFSET, + &pin, NULL); + if (status) + break; + *data |= (u8)(!pin) << i; + } + + return status; +} + +/** + * ice_write_sma_ctrl_e810t + * @hw: pointer to the hw struct + * @data: data to be written to the GPIO controller + * + * Write the data to the SMA controller. It is connected to pins 3-7 of Port 1 + * of the PCA9575 expander, so only bits 3-7 in data are valid. + */ +int ice_write_sma_ctrl_e810t(struct ice_hw *hw, u8 data) +{ + int status; + u16 handle; + u8 i; + + status = ice_get_pca9575_handle(hw, &handle); + if (status) + return status; + + for (i = ICE_SMA_MIN_BIT_E810T; i <= ICE_SMA_MAX_BIT_E810T; i++) { + bool pin; + + pin = !(data & (1 << i)); + status = ice_aq_set_gpio(hw, handle, i + ICE_PCA9575_P1_OFFSET, + pin, NULL); + if (status) + break; + } + + return status; +} + +/** + * ice_is_pca9575_present + * @hw: pointer to the hw struct + * + * Check if the SW IO expander is present in the netlist + */ +bool ice_is_pca9575_present(struct ice_hw *hw) +{ + u16 handle = 0; + int status; + + if (!ice_is_e810t(hw)) + return false; + + status = ice_get_pca9575_handle(hw, &handle); + + return !status && handle; +} diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.h b/drivers/net/ethernet/intel/ice/ice_ptp_hw.h index 55a414e87018..b2984b5c22c1 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp_hw.h +++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.h @@ -30,6 +30,9 @@ int ice_clear_phy_tstamp(struct ice_hw *hw, u8 block, u8 idx); /* E810 family functions */ int ice_ptp_init_phy_e810(struct ice_hw *hw); +int ice_read_sma_ctrl_e810t(struct ice_hw *hw, u8 *data); +int ice_write_sma_ctrl_e810t(struct ice_hw *hw, u8 data); +bool ice_is_pca9575_present(struct ice_hw *hw); #define PFTSYN_SEM_BYTES 4 @@ -76,4 +79,23 @@ int ice_ptp_init_phy_e810(struct ice_hw *hw); #define LOW_TX_MEMORY_BANK_START 0x03090000 #define HIGH_TX_MEMORY_BANK_START 0x03090004 +/* E810T SMA controller pin control */ +#define ICE_SMA1_DIR_EN_E810T BIT(4) +#define ICE_SMA1_TX_EN_E810T BIT(5) +#define ICE_SMA2_UFL2_RX_DIS_E810T BIT(3) +#define ICE_SMA2_DIR_EN_E810T BIT(6) +#define ICE_SMA2_TX_EN_E810T BIT(7) + +#define ICE_SMA1_MASK_E810T (ICE_SMA1_DIR_EN_E810T | \ + ICE_SMA1_TX_EN_E810T) +#define ICE_SMA2_MASK_E810T (ICE_SMA2_UFL2_RX_DIS_E810T | \ + ICE_SMA2_DIR_EN_E810T | \ + ICE_SMA2_TX_EN_E810T) +#define ICE_ALL_SMA_MASK_E810T (ICE_SMA1_MASK_E810T | \ + ICE_SMA2_MASK_E810T) + +#define ICE_SMA_MIN_BIT_E810T 3 +#define ICE_SMA_MAX_BIT_E810T 7 +#define ICE_PCA9575_P1_OFFSET 8 + #endif /* _ICE_PTP_HW_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h index d22ac1d430d0..d5cb1c5a89c0 100644 --- a/drivers/net/ethernet/intel/ice/ice_type.h +++ b/drivers/net/ethernet/intel/ice/ice_type.h @@ -916,6 +916,7 @@ struct ice_hw { struct mutex rss_locks; /* protect RSS configuration */ struct list_head rss_list_head; struct ice_mbx_snapshot mbx_snapshot; + u16 io_expander_handle; }; /* Statistics collected by each port, VSI, VEB, and S-channel */ From 325b2064d00a88d10c511c96adea4e8274778ee1 Mon Sep 17 00:00:00 2001 From: Maciej Machnikowski Date: Tue, 17 Aug 2021 13:09:18 +0200 Subject: [PATCH 164/440] ice: Implement support for SMA and U.FL on E810-T Expose SMA and U.FL connectors as ptp_pins on E810-T based adapters and allow controlling them. E810-T adapters are equipped with: - 2 external bidirectional SMA connectors - 1 internal TX U.FL - 1 internal RX U.FL U.FL connectors share signal lines with the SMA connectors. The TX U.FL1 share the line with the SMA1 and the RX U.FL2 share line with the SMA2. This dependence is controlled by the ice_verify_pin_e810t. Additionally add support for the E810-T-based devices which don't use the SMA/U.FL controller. If the IO expander is not detected don't expose pins and use 2 predefined 1PPS input and output pins. Signed-off-by: Maciej Machnikowski Tested-by: Sunitha Mekala Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice.h | 1 + drivers/net/ethernet/intel/ice/ice_lib.c | 15 + drivers/net/ethernet/intel/ice/ice_lib.h | 1 + drivers/net/ethernet/intel/ice/ice_ptp.c | 370 ++++++++++++++++++++++- drivers/net/ethernet/intel/ice/ice_ptp.h | 20 +- 5 files changed, 397 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 6bfbf704eddf..30cc748337dd 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -163,6 +163,7 @@ enum ice_feature { ICE_F_DSCP, + ICE_F_SMA_CTRL, ICE_F_MAX }; diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 93565f597266..c8a50898bbc1 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -3649,6 +3649,19 @@ static void ice_set_feature_support(struct ice_pf *pf, enum ice_feature f) set_bit(f, pf->features); } +/** + * ice_clear_feature_support + * @pf: pointer to the struct ice_pf instance + * @f: feature enum to clear + */ +void ice_clear_feature_support(struct ice_pf *pf, enum ice_feature f) +{ + if (f < 0 || f >= ICE_F_MAX) + return; + + clear_bit(f, pf->features); +} + /** * ice_init_feature_support * @pf: pointer to the struct ice_pf instance @@ -3662,6 +3675,8 @@ void ice_init_feature_support(struct ice_pf *pf) case ICE_DEV_ID_E810C_QSFP: case ICE_DEV_ID_E810C_SFP: ice_set_feature_support(pf, ICE_F_DSCP); + if (ice_is_e810t(&pf->hw)) + ice_set_feature_support(pf, ICE_F_SMA_CTRL); break; default: break; diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h index 3f3fef6551c0..193f96305407 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_lib.h @@ -129,5 +129,6 @@ void ice_vsi_ctx_set_allow_override(struct ice_vsi_ctx *ctx); void ice_vsi_ctx_clear_allow_override(struct ice_vsi_ctx *ctx); bool ice_is_feature_supported(struct ice_pf *pf, enum ice_feature f); +void ice_clear_feature_support(struct ice_pf *pf, enum ice_feature f); void ice_init_feature_support(struct ice_pf *pf); #endif /* !_ICE_LIB_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index 05cc5870e4ef..77298f6cef2f 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -6,6 +6,252 @@ #define E810_OUT_PROP_DELAY_NS 1 +static const struct ptp_pin_desc ice_pin_desc_e810t[] = { + /* name idx func chan */ + { "GNSS", GNSS, PTP_PF_EXTTS, 0, { 0, } }, + { "SMA1", SMA1, PTP_PF_NONE, 1, { 0, } }, + { "U.FL1", UFL1, PTP_PF_NONE, 1, { 0, } }, + { "SMA2", SMA2, PTP_PF_NONE, 2, { 0, } }, + { "U.FL2", UFL2, PTP_PF_NONE, 2, { 0, } }, +}; + +/** + * ice_get_sma_config_e810t + * @hw: pointer to the hw struct + * @ptp_pins: pointer to the ptp_pin_desc struture + * + * Read the configuration of the SMA control logic and put it into the + * ptp_pin_desc structure + */ +static int +ice_get_sma_config_e810t(struct ice_hw *hw, struct ptp_pin_desc *ptp_pins) +{ + u8 data, i; + int status; + + /* Read initial pin state */ + status = ice_read_sma_ctrl_e810t(hw, &data); + if (status) + return status; + + /* initialize with defaults */ + for (i = 0; i < NUM_PTP_PINS_E810T; i++) { + snprintf(ptp_pins[i].name, sizeof(ptp_pins[i].name), + "%s", ice_pin_desc_e810t[i].name); + ptp_pins[i].index = ice_pin_desc_e810t[i].index; + ptp_pins[i].func = ice_pin_desc_e810t[i].func; + ptp_pins[i].chan = ice_pin_desc_e810t[i].chan; + } + + /* Parse SMA1/UFL1 */ + switch (data & ICE_SMA1_MASK_E810T) { + case ICE_SMA1_MASK_E810T: + default: + ptp_pins[SMA1].func = PTP_PF_NONE; + ptp_pins[UFL1].func = PTP_PF_NONE; + break; + case ICE_SMA1_DIR_EN_E810T: + ptp_pins[SMA1].func = PTP_PF_PEROUT; + ptp_pins[UFL1].func = PTP_PF_NONE; + break; + case ICE_SMA1_TX_EN_E810T: + ptp_pins[SMA1].func = PTP_PF_EXTTS; + ptp_pins[UFL1].func = PTP_PF_NONE; + break; + case 0: + ptp_pins[SMA1].func = PTP_PF_EXTTS; + ptp_pins[UFL1].func = PTP_PF_PEROUT; + break; + } + + /* Parse SMA2/UFL2 */ + switch (data & ICE_SMA2_MASK_E810T) { + case ICE_SMA2_MASK_E810T: + default: + ptp_pins[SMA2].func = PTP_PF_NONE; + ptp_pins[UFL2].func = PTP_PF_NONE; + break; + case (ICE_SMA2_TX_EN_E810T | ICE_SMA2_UFL2_RX_DIS_E810T): + ptp_pins[SMA2].func = PTP_PF_EXTTS; + ptp_pins[UFL2].func = PTP_PF_NONE; + break; + case (ICE_SMA2_DIR_EN_E810T | ICE_SMA2_UFL2_RX_DIS_E810T): + ptp_pins[SMA2].func = PTP_PF_PEROUT; + ptp_pins[UFL2].func = PTP_PF_NONE; + break; + case (ICE_SMA2_DIR_EN_E810T | ICE_SMA2_TX_EN_E810T): + ptp_pins[SMA2].func = PTP_PF_NONE; + ptp_pins[UFL2].func = PTP_PF_EXTTS; + break; + case ICE_SMA2_DIR_EN_E810T: + ptp_pins[SMA2].func = PTP_PF_PEROUT; + ptp_pins[UFL2].func = PTP_PF_EXTTS; + break; + } + + return 0; +} + +/** + * ice_ptp_set_sma_config_e810t + * @hw: pointer to the hw struct + * @ptp_pins: pointer to the ptp_pin_desc struture + * + * Set the configuration of the SMA control logic based on the configuration in + * num_pins parameter + */ +static int +ice_ptp_set_sma_config_e810t(struct ice_hw *hw, + const struct ptp_pin_desc *ptp_pins) +{ + int status; + u8 data; + + /* SMA1 and UFL1 cannot be set to TX at the same time */ + if (ptp_pins[SMA1].func == PTP_PF_PEROUT && + ptp_pins[UFL1].func == PTP_PF_PEROUT) + return -EINVAL; + + /* SMA2 and UFL2 cannot be set to RX at the same time */ + if (ptp_pins[SMA2].func == PTP_PF_EXTTS && + ptp_pins[UFL2].func == PTP_PF_EXTTS) + return -EINVAL; + + /* Read initial pin state value */ + status = ice_read_sma_ctrl_e810t(hw, &data); + if (status) + return status; + + /* Set the right sate based on the desired configuration */ + data &= ~ICE_SMA1_MASK_E810T; + if (ptp_pins[SMA1].func == PTP_PF_NONE && + ptp_pins[UFL1].func == PTP_PF_NONE) { + dev_info(ice_hw_to_dev(hw), "SMA1 + U.FL1 disabled"); + data |= ICE_SMA1_MASK_E810T; + } else if (ptp_pins[SMA1].func == PTP_PF_EXTTS && + ptp_pins[UFL1].func == PTP_PF_NONE) { + dev_info(ice_hw_to_dev(hw), "SMA1 RX"); + data |= ICE_SMA1_TX_EN_E810T; + } else if (ptp_pins[SMA1].func == PTP_PF_NONE && + ptp_pins[UFL1].func == PTP_PF_PEROUT) { + /* U.FL 1 TX will always enable SMA 1 RX */ + dev_info(ice_hw_to_dev(hw), "SMA1 RX + U.FL1 TX"); + } else if (ptp_pins[SMA1].func == PTP_PF_EXTTS && + ptp_pins[UFL1].func == PTP_PF_PEROUT) { + dev_info(ice_hw_to_dev(hw), "SMA1 RX + U.FL1 TX"); + } else if (ptp_pins[SMA1].func == PTP_PF_PEROUT && + ptp_pins[UFL1].func == PTP_PF_NONE) { + dev_info(ice_hw_to_dev(hw), "SMA1 TX"); + data |= ICE_SMA1_DIR_EN_E810T; + } + + data &= ~ICE_SMA2_MASK_E810T; + if (ptp_pins[SMA2].func == PTP_PF_NONE && + ptp_pins[UFL2].func == PTP_PF_NONE) { + dev_info(ice_hw_to_dev(hw), "SMA2 + U.FL2 disabled"); + data |= ICE_SMA2_MASK_E810T; + } else if (ptp_pins[SMA2].func == PTP_PF_EXTTS && + ptp_pins[UFL2].func == PTP_PF_NONE) { + dev_info(ice_hw_to_dev(hw), "SMA2 RX"); + data |= (ICE_SMA2_TX_EN_E810T | + ICE_SMA2_UFL2_RX_DIS_E810T); + } else if (ptp_pins[SMA2].func == PTP_PF_NONE && + ptp_pins[UFL2].func == PTP_PF_EXTTS) { + dev_info(ice_hw_to_dev(hw), "UFL2 RX"); + data |= (ICE_SMA2_DIR_EN_E810T | ICE_SMA2_TX_EN_E810T); + } else if (ptp_pins[SMA2].func == PTP_PF_PEROUT && + ptp_pins[UFL2].func == PTP_PF_NONE) { + dev_info(ice_hw_to_dev(hw), "SMA2 TX"); + data |= (ICE_SMA2_DIR_EN_E810T | + ICE_SMA2_UFL2_RX_DIS_E810T); + } else if (ptp_pins[SMA2].func == PTP_PF_PEROUT && + ptp_pins[UFL2].func == PTP_PF_EXTTS) { + dev_info(ice_hw_to_dev(hw), "SMA2 TX + U.FL2 RX"); + data |= ICE_SMA2_DIR_EN_E810T; + } + + return ice_write_sma_ctrl_e810t(hw, data); +} + +/** + * ice_ptp_set_sma_e810t + * @info: the driver's PTP info structure + * @pin: pin index in kernel structure + * @func: Pin function to be set (PTP_PF_NONE, PTP_PF_EXTTS or PTP_PF_PEROUT) + * + * Set the configuration of a single SMA pin + */ +static int +ice_ptp_set_sma_e810t(struct ptp_clock_info *info, unsigned int pin, + enum ptp_pin_function func) +{ + struct ptp_pin_desc ptp_pins[NUM_PTP_PINS_E810T]; + struct ice_pf *pf = ptp_info_to_pf(info); + struct ice_hw *hw = &pf->hw; + int err; + + if (pin < SMA1 || func > PTP_PF_PEROUT) + return -EOPNOTSUPP; + + err = ice_get_sma_config_e810t(hw, ptp_pins); + if (err) + return err; + + /* Disable the same function on the other pin sharing the channel */ + if (pin == SMA1 && ptp_pins[UFL1].func == func) + ptp_pins[UFL1].func = PTP_PF_NONE; + if (pin == UFL1 && ptp_pins[SMA1].func == func) + ptp_pins[SMA1].func = PTP_PF_NONE; + + if (pin == SMA2 && ptp_pins[UFL2].func == func) + ptp_pins[UFL2].func = PTP_PF_NONE; + if (pin == UFL2 && ptp_pins[SMA2].func == func) + ptp_pins[SMA2].func = PTP_PF_NONE; + + /* Set up new pin function in the temp table */ + ptp_pins[pin].func = func; + + return ice_ptp_set_sma_config_e810t(hw, ptp_pins); +} + +/** + * ice_verify_pin_e810t + * @info: the driver's PTP info structure + * @pin: Pin index + * @func: Assigned function + * @chan: Assigned channel + * + * Verify if pin supports requested pin function. If the Check pins consistency. + * Reconfigure the SMA logic attached to the given pin to enable its + * desired functionality + */ +static int +ice_verify_pin_e810t(struct ptp_clock_info *info, unsigned int pin, + enum ptp_pin_function func, unsigned int chan) +{ + /* Don't allow channel reassignment */ + if (chan != ice_pin_desc_e810t[pin].chan) + return -EOPNOTSUPP; + + /* Check if functions are properly assigned */ + switch (func) { + case PTP_PF_NONE: + break; + case PTP_PF_EXTTS: + if (pin == UFL1) + return -EOPNOTSUPP; + break; + case PTP_PF_PEROUT: + if (pin == UFL2 || pin == GNSS) + return -EOPNOTSUPP; + break; + case PTP_PF_PHYSYNC: + return -EOPNOTSUPP; + } + + return ice_ptp_set_sma_e810t(info, pin, func); +} + /** * ice_set_tx_tstamp - Enable or disable Tx timestamping * @pf: The PF pointer to search in @@ -735,17 +981,34 @@ ice_ptp_gpio_enable_e810(struct ptp_clock_info *info, { struct ice_pf *pf = ptp_info_to_pf(info); struct ice_perout_channel clk_cfg = {0}; + bool sma_pres = false; unsigned int chan; u32 gpio_pin; int err; + if (ice_is_feature_supported(pf, ICE_F_SMA_CTRL)) + sma_pres = true; + switch (rq->type) { case PTP_CLK_REQ_PEROUT: chan = rq->perout.index; - if (chan == PPS_CLK_GEN_CHAN) + if (sma_pres) { + if (chan == ice_pin_desc_e810t[SMA1].chan) + clk_cfg.gpio_pin = GPIO_20; + else if (chan == ice_pin_desc_e810t[SMA2].chan) + clk_cfg.gpio_pin = GPIO_22; + else + return -1; + } else if (ice_is_e810t(&pf->hw)) { + if (chan == 0) + clk_cfg.gpio_pin = GPIO_20; + else + clk_cfg.gpio_pin = GPIO_22; + } else if (chan == PPS_CLK_GEN_CHAN) { clk_cfg.gpio_pin = PPS_PIN_INDEX; - else + } else { clk_cfg.gpio_pin = chan; + } clk_cfg.period = ((rq->perout.period.sec * NSEC_PER_SEC) + rq->perout.period.nsec); @@ -757,7 +1020,19 @@ ice_ptp_gpio_enable_e810(struct ptp_clock_info *info, break; case PTP_CLK_REQ_EXTTS: chan = rq->extts.index; - gpio_pin = chan; + if (sma_pres) { + if (chan < ice_pin_desc_e810t[SMA2].chan) + gpio_pin = GPIO_21; + else + gpio_pin = GPIO_23; + } else if (ice_is_e810t(&pf->hw)) { + if (chan == 0) + gpio_pin = GPIO_21; + else + gpio_pin = GPIO_23; + } else { + gpio_pin = chan; + } err = ice_ptp_cfg_extts(pf, !!on, chan, gpio_pin, rq->extts.flags); @@ -1037,14 +1312,94 @@ ice_ptp_rx_hwtstamp(struct ice_ring *rx_ring, } } +/** + * ice_ptp_disable_sma_pins_e810t - Disable E810-T SMA pins + * @pf: pointer to the PF structure + * @info: PTP clock info structure + * + * Disable the OS access to the SMA pins. Called to clear out the OS + * indications of pin support when we fail to setup the E810-T SMA control + * register. + */ +static void +ice_ptp_disable_sma_pins_e810t(struct ice_pf *pf, struct ptp_clock_info *info) +{ + struct device *dev = ice_pf_to_dev(pf); + + dev_warn(dev, "Failed to configure E810-T SMA pin control\n"); + + info->enable = NULL; + info->verify = NULL; + info->n_pins = 0; + info->n_ext_ts = 0; + info->n_per_out = 0; +} + +/** + * ice_ptp_setup_sma_pins_e810t - Setup the SMA pins + * @pf: pointer to the PF structure + * @info: PTP clock info structure + * + * Finish setting up the SMA pins by allocating pin_config, and setting it up + * according to the current status of the SMA. On failure, disable all of the + * extended SMA pin support. + */ +static void +ice_ptp_setup_sma_pins_e810t(struct ice_pf *pf, struct ptp_clock_info *info) +{ + struct device *dev = ice_pf_to_dev(pf); + int err; + + /* Allocate memory for kernel pins interface */ + info->pin_config = devm_kcalloc(dev, info->n_pins, + sizeof(*info->pin_config), GFP_KERNEL); + if (!info->pin_config) { + ice_ptp_disable_sma_pins_e810t(pf, info); + return; + } + + /* Read current SMA status */ + err = ice_get_sma_config_e810t(&pf->hw, info->pin_config); + if (err) + ice_ptp_disable_sma_pins_e810t(pf, info); +} + +/** + * ice_ptp_setup_pins_e810t - Setup PTP pins in sysfs + * @pf: pointer to the PF instance + * @info: PTP clock capabilities + */ +static void +ice_ptp_setup_pins_e810t(struct ice_pf *pf, struct ptp_clock_info *info) +{ + /* Check if SMA controller is in the netlist */ + if (ice_is_feature_supported(pf, ICE_F_SMA_CTRL) && + !ice_is_pca9575_present(&pf->hw)) + ice_clear_feature_support(pf, ICE_F_SMA_CTRL); + + if (!ice_is_feature_supported(pf, ICE_F_SMA_CTRL)) { + info->n_ext_ts = N_EXT_TS_E810_NO_SMA; + info->n_per_out = N_PER_OUT_E810T_NO_SMA; + return; + } + + info->n_per_out = N_PER_OUT_E810T; + info->n_ext_ts = N_EXT_TS_E810; + info->n_pins = NUM_PTP_PINS_E810T; + info->verify = ice_verify_pin_e810t; + + /* Complete setup of the SMA pins */ + ice_ptp_setup_sma_pins_e810t(pf, info); +} + /** * ice_ptp_setup_pins_e810 - Setup PTP pins in sysfs * @info: PTP clock capabilities */ static void ice_ptp_setup_pins_e810(struct ptp_clock_info *info) { - info->n_per_out = E810_N_PER_OUT; - info->n_ext_ts = E810_N_EXT_TS; + info->n_per_out = N_PER_OUT_E810; + info->n_ext_ts = N_EXT_TS_E810; } /** @@ -1062,7 +1417,10 @@ ice_ptp_set_funcs_e810(struct ice_pf *pf, struct ptp_clock_info *info) { info->enable = ice_ptp_gpio_enable_e810; - ice_ptp_setup_pins_e810(info); + if (ice_is_e810t(&pf->hw)) + ice_ptp_setup_pins_e810t(pf, info); + else + ice_ptp_setup_pins_e810(info); } /** diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.h b/drivers/net/ethernet/intel/ice/ice_ptp.h index e1c787bd5b96..1b9aab7de319 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.h +++ b/drivers/net/ethernet/intel/ice/ice_ptp.h @@ -9,12 +9,21 @@ #include "ice_ptp_hw.h" -enum ice_ptp_pin { +enum ice_ptp_pin_e810 { GPIO_20 = 0, GPIO_21, GPIO_22, GPIO_23, - NUM_ICE_PTP_PIN + NUM_PTP_PIN_E810 +}; + +enum ice_ptp_pin_e810t { + GNSS = 0, + SMA1, + UFL1, + SMA2, + UFL2, + NUM_PTP_PINS_E810T }; struct ice_perout_channel { @@ -155,8 +164,11 @@ struct ice_ptp { #define PPS_CLK_SRC_CHAN 2 #define PPS_PIN_INDEX 5 #define TIME_SYNC_PIN_INDEX 4 -#define E810_N_EXT_TS 3 -#define E810_N_PER_OUT 4 +#define N_EXT_TS_E810 3 +#define N_PER_OUT_E810 4 +#define N_PER_OUT_E810T 3 +#define N_PER_OUT_E810T_NO_SMA 2 +#define N_EXT_TS_E810_NO_SMA 2 #if IS_ENABLED(CONFIG_PTP_1588_CLOCK) struct ice_pf; From 766607570becbd26cab6d66a544dd8d0d964df5a Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 14 Oct 2021 07:24:31 -0700 Subject: [PATCH 165/440] ethernet: constify references to netdev->dev_addr in drivers This big patch sprinkles const on local variables and function arguments which may refer to netdev->dev_addr. Commit 406f42fa0d3c ("net-next: When a bond have a massive amount of VLANs...") introduced a rbtree for faster Ethernet address look up. To maintain netdev->dev_addr in this tree we need to make all the writes to it got through appropriate helpers. Some of the changes here are not strictly required - const is sometimes cast off but pointer is not used for writing. It seems like it's still better to add the const in case the code changes later or relevant -W flags get enabled for the build. No functional changes. Link: https://lore.kernel.org/r/20211014142432.449314-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/actions/owl-emac.c | 2 +- drivers/net/ethernet/adaptec/starfire.c | 10 +++++----- drivers/net/ethernet/alacritech/slicoss.c | 2 +- drivers/net/ethernet/alteon/acenic.c | 4 ++-- drivers/net/ethernet/altera/altera_tse_main.c | 2 +- drivers/net/ethernet/amd/nmclan_cs.c | 3 ++- drivers/net/ethernet/amd/xgbe/xgbe-dev.c | 2 +- drivers/net/ethernet/amd/xgbe/xgbe.h | 2 +- drivers/net/ethernet/apm/xgene-v2/mac.c | 2 +- drivers/net/ethernet/apm/xgene/xgene_enet_hw.c | 2 +- drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c | 2 +- drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c | 2 +- drivers/net/ethernet/apple/bmac.c | 8 ++++---- drivers/net/ethernet/aquantia/atlantic/aq_hw.h | 6 +++--- drivers/net/ethernet/aquantia/atlantic/aq_macsec.c | 2 +- .../ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c | 4 ++-- .../ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c | 4 ++-- .../ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.h | 2 +- .../ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c | 4 ++-- .../aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c | 4 ++-- .../net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c | 2 +- drivers/net/ethernet/broadcom/b44.c | 6 ++++-- drivers/net/ethernet/broadcom/bcmsysport.c | 2 +- drivers/net/ethernet/broadcom/bgmac.c | 2 +- drivers/net/ethernet/broadcom/bnx2.c | 2 +- drivers/net/ethernet/broadcom/bnx2x/bnx2x.h | 2 +- drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 4 ++-- drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h | 3 ++- drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c | 2 +- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +- drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c | 4 ++-- drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h | 2 +- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 4 ++-- drivers/net/ethernet/calxeda/xgmac.c | 2 +- drivers/net/ethernet/chelsio/cxgb/gmac.h | 2 +- drivers/net/ethernet/chelsio/cxgb/pm3393.c | 2 +- drivers/net/ethernet/chelsio/cxgb/vsc7326.c | 2 +- drivers/net/ethernet/chelsio/cxgb3/common.h | 2 +- drivers/net/ethernet/chelsio/cxgb3/xgmac.c | 2 +- drivers/net/ethernet/cisco/enic/enic_pp.c | 2 +- drivers/net/ethernet/dlink/dl2k.c | 2 +- drivers/net/ethernet/dnet.c | 6 +++--- drivers/net/ethernet/emulex/benet/be_cmds.c | 2 +- drivers/net/ethernet/emulex/benet/be_cmds.h | 2 +- drivers/net/ethernet/emulex/benet/be_main.c | 2 +- drivers/net/ethernet/ethoc.c | 2 +- drivers/net/ethernet/fealnx.c | 2 +- drivers/net/ethernet/freescale/dpaa/dpaa_eth.c | 4 ++-- drivers/net/ethernet/freescale/fman/fman_dtsec.c | 8 ++++---- drivers/net/ethernet/freescale/fman/fman_dtsec.h | 2 +- drivers/net/ethernet/freescale/fman/fman_memac.c | 8 ++++---- drivers/net/ethernet/freescale/fman/fman_memac.h | 2 +- drivers/net/ethernet/freescale/fman/fman_tgec.c | 8 ++++---- drivers/net/ethernet/freescale/fman/fman_tgec.h | 2 +- drivers/net/ethernet/freescale/fman/mac.h | 2 +- drivers/net/ethernet/hisilicon/hisi_femac.c | 2 +- drivers/net/ethernet/hisilicon/hix5hd2_gmac.c | 2 +- drivers/net/ethernet/hisilicon/hns/hnae.h | 2 +- drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c | 2 +- drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c | 2 +- drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c | 2 +- drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h | 5 +++-- drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c | 2 +- drivers/net/ethernet/hisilicon/hns3/hnae3.h | 2 +- .../net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 2 +- .../ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 2 +- drivers/net/ethernet/i825xx/sun3_82586.c | 2 +- drivers/net/ethernet/intel/i40e/i40e.h | 2 +- drivers/net/ethernet/intel/ixgb/ixgb_hw.c | 2 +- drivers/net/ethernet/intel/ixgb/ixgb_hw.h | 2 +- drivers/net/ethernet/marvell/mv643xx_eth.c | 2 +- drivers/net/ethernet/marvell/mvneta.c | 4 ++-- drivers/net/ethernet/marvell/pxa168_eth.c | 6 +++--- drivers/net/ethernet/mediatek/mtk_star_emac.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_fs.c | 4 ++-- .../net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c | 2 +- drivers/net/ethernet/micrel/ks8842.c | 2 +- drivers/net/ethernet/micrel/ksz884x.c | 4 ++-- drivers/net/ethernet/myricom/myri10ge/myri10ge.c | 3 ++- drivers/net/ethernet/neterion/s2io.c | 2 +- drivers/net/ethernet/neterion/s2io.h | 2 +- .../net/ethernet/netronome/nfp/flower/tunnel_conf.c | 6 +++--- drivers/net/ethernet/nxp/lpc_eth.c | 2 +- drivers/net/ethernet/qlogic/qed/qed_dev.c | 2 +- drivers/net/ethernet/qlogic/qed/qed_dev_api.h | 2 +- drivers/net/ethernet/qlogic/qed/qed_l2.c | 2 +- drivers/net/ethernet/qlogic/qed/qed_main.c | 2 +- drivers/net/ethernet/qlogic/qed/qed_mcp.c | 2 +- drivers/net/ethernet/qlogic/qed/qed_mcp.h | 2 +- drivers/net/ethernet/qlogic/qed/qed_rdma.c | 2 +- drivers/net/ethernet/qlogic/qed/qed_vf.c | 2 +- drivers/net/ethernet/qlogic/qed/qed_vf.h | 4 ++-- drivers/net/ethernet/qlogic/qede/qede_filter.c | 2 +- drivers/net/ethernet/qualcomm/emac/emac-mac.c | 2 +- drivers/net/ethernet/rdc/r6040.c | 12 ++++++------ drivers/net/ethernet/samsung/sxgbe/sxgbe_common.h | 2 +- drivers/net/ethernet/samsung/sxgbe/sxgbe_core.c | 3 ++- drivers/net/ethernet/sfc/ef10.c | 4 ++-- drivers/net/ethernet/sfc/ef10_sriov.c | 2 +- drivers/net/ethernet/sfc/ef10_sriov.h | 6 +++--- drivers/net/ethernet/sfc/net_driver.h | 2 +- drivers/net/ethernet/sfc/siena_sriov.c | 2 +- drivers/net/ethernet/sfc/siena_sriov.h | 2 +- drivers/net/ethernet/sis/sis900.c | 2 +- drivers/net/ethernet/smsc/smsc911x.c | 2 +- drivers/net/ethernet/smsc/smsc9420.c | 2 +- drivers/net/ethernet/stmicro/stmmac/common.h | 4 ++-- drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c | 2 +- drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c | 2 +- drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c | 2 +- drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c | 2 +- drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c | 2 +- drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c | 2 +- drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c | 3 ++- drivers/net/ethernet/stmicro/stmmac/hwif.h | 3 ++- .../net/ethernet/stmicro/stmmac/stmmac_selftests.c | 4 ++-- drivers/net/ethernet/sun/sunbmac.c | 2 +- drivers/net/ethernet/sun/sunqe.c | 2 +- drivers/net/ethernet/synopsys/dwc-xlgmac-hw.c | 2 +- drivers/net/ethernet/synopsys/dwc-xlgmac.h | 2 +- drivers/net/ethernet/ti/tlan.c | 4 ++-- drivers/net/ethernet/toshiba/tc35815.c | 3 ++- drivers/net/ethernet/xilinx/xilinx_emaclite.c | 7 ++++--- drivers/net/ethernet/xircom/xirc2ps_cs.c | 2 +- drivers/net/phy/mscc/mscc_main.c | 2 +- drivers/net/usb/aqc111.c | 2 +- drivers/net/usb/ax88179_178a.c | 8 ++++---- drivers/net/usb/catc.c | 2 +- drivers/net/usb/dm9601.c | 3 ++- drivers/net/usb/mcs7830.c | 3 ++- drivers/net/usb/sr9700.c | 3 ++- include/linux/qed/qed_eth_if.h | 2 +- include/linux/qed/qed_if.h | 2 +- include/linux/qed/qed_rdma_if.h | 3 ++- 134 files changed, 204 insertions(+), 189 deletions(-) diff --git a/drivers/net/ethernet/actions/owl-emac.c b/drivers/net/ethernet/actions/owl-emac.c index dce93acd1644..1cfdd01b4c2e 100644 --- a/drivers/net/ethernet/actions/owl-emac.c +++ b/drivers/net/ethernet/actions/owl-emac.c @@ -342,7 +342,7 @@ static u32 owl_emac_dma_cmd_stop(struct owl_emac_priv *priv) static void owl_emac_set_hw_mac_addr(struct net_device *netdev) { struct owl_emac_priv *priv = netdev_priv(netdev); - u8 *mac_addr = netdev->dev_addr; + const u8 *mac_addr = netdev->dev_addr; u32 addr_high, addr_low; addr_high = mac_addr[0] << 8 | mac_addr[1]; diff --git a/drivers/net/ethernet/adaptec/starfire.c b/drivers/net/ethernet/adaptec/starfire.c index e0f6cc910bd2..16b6b83f670b 100644 --- a/drivers/net/ethernet/adaptec/starfire.c +++ b/drivers/net/ethernet/adaptec/starfire.c @@ -955,7 +955,7 @@ static int netdev_open(struct net_device *dev) writew(0, ioaddr + PerfFilterTable + 4); writew(0, ioaddr + PerfFilterTable + 8); for (i = 1; i < 16; i++) { - __be16 *eaddrs = (__be16 *)dev->dev_addr; + const __be16 *eaddrs = (const __be16 *)dev->dev_addr; void __iomem *setup_frm = ioaddr + PerfFilterTable + i * 16; writew(be16_to_cpu(eaddrs[2]), setup_frm); setup_frm += 4; writew(be16_to_cpu(eaddrs[1]), setup_frm); setup_frm += 4; @@ -1787,14 +1787,14 @@ static void set_rx_mode(struct net_device *dev) } else if (netdev_mc_count(dev) <= 14) { /* Use the 16 element perfect filter, skip first two entries. */ void __iomem *filter_addr = ioaddr + PerfFilterTable + 2 * 16; - __be16 *eaddrs; + const __be16 *eaddrs; netdev_for_each_mc_addr(ha, dev) { eaddrs = (__be16 *) ha->addr; writew(be16_to_cpu(eaddrs[2]), filter_addr); filter_addr += 4; writew(be16_to_cpu(eaddrs[1]), filter_addr); filter_addr += 4; writew(be16_to_cpu(eaddrs[0]), filter_addr); filter_addr += 8; } - eaddrs = (__be16 *)dev->dev_addr; + eaddrs = (const __be16 *)dev->dev_addr; i = netdev_mc_count(dev) + 2; while (i++ < 16) { writew(be16_to_cpu(eaddrs[0]), filter_addr); filter_addr += 4; @@ -1805,7 +1805,7 @@ static void set_rx_mode(struct net_device *dev) } else { /* Must use a multicast hash table. */ void __iomem *filter_addr; - __be16 *eaddrs; + const __be16 *eaddrs; __le16 mc_filter[32] __attribute__ ((aligned(sizeof(long)))); /* Multicast hash filter */ memset(mc_filter, 0, sizeof(mc_filter)); @@ -1819,7 +1819,7 @@ static void set_rx_mode(struct net_device *dev) } /* Clear the perfect filter list, skip first two entries. */ filter_addr = ioaddr + PerfFilterTable + 2 * 16; - eaddrs = (__be16 *)dev->dev_addr; + eaddrs = (const __be16 *)dev->dev_addr; for (i = 2; i < 16; i++) { writew(be16_to_cpu(eaddrs[0]), filter_addr); filter_addr += 4; writew(be16_to_cpu(eaddrs[1]), filter_addr); filter_addr += 4; diff --git a/drivers/net/ethernet/alacritech/slicoss.c b/drivers/net/ethernet/alacritech/slicoss.c index 82f4f2608102..1fc9a1cd3ef8 100644 --- a/drivers/net/ethernet/alacritech/slicoss.c +++ b/drivers/net/ethernet/alacritech/slicoss.c @@ -1008,7 +1008,7 @@ static void slic_set_link_autoneg(struct slic_device *sdev) static void slic_set_mac_address(struct slic_device *sdev) { - u8 *addr = sdev->netdev->dev_addr; + const u8 *addr = sdev->netdev->dev_addr; u32 val; val = addr[5] | addr[4] << 8 | addr[3] << 16 | addr[2] << 24; diff --git a/drivers/net/ethernet/alteon/acenic.c b/drivers/net/ethernet/alteon/acenic.c index 7aaef593b031..eeb86bd851f9 100644 --- a/drivers/net/ethernet/alteon/acenic.c +++ b/drivers/net/ethernet/alteon/acenic.c @@ -2712,7 +2712,7 @@ static int ace_set_mac_addr(struct net_device *dev, void *p) struct ace_private *ap = netdev_priv(dev); struct ace_regs __iomem *regs = ap->regs; struct sockaddr *addr=p; - u8 *da; + const u8 *da; struct cmd cmd; if(netif_running(dev)) @@ -2720,7 +2720,7 @@ static int ace_set_mac_addr(struct net_device *dev, void *p) eth_hw_addr_set(dev, addr->sa_data); - da = (u8 *)dev->dev_addr; + da = (const u8 *)dev->dev_addr; writel(da[0] << 8 | da[1], ®s->MacAddrHi); writel((da[2] << 24) | (da[3] << 16) | (da[4] << 8) | da[5], diff --git a/drivers/net/ethernet/altera/altera_tse_main.c b/drivers/net/ethernet/altera/altera_tse_main.c index 7b75b0cd7ac9..d75d95a97dd9 100644 --- a/drivers/net/ethernet/altera/altera_tse_main.c +++ b/drivers/net/ethernet/altera/altera_tse_main.c @@ -849,7 +849,7 @@ static int init_phy(struct net_device *dev) return 0; } -static void tse_update_mac_addr(struct altera_tse_private *priv, u8 *addr) +static void tse_update_mac_addr(struct altera_tse_private *priv, const u8 *addr) { u32 msb; u32 lsb; diff --git a/drivers/net/ethernet/amd/nmclan_cs.c b/drivers/net/ethernet/amd/nmclan_cs.c index 2c07d15c8dfe..30ee5329bd7c 100644 --- a/drivers/net/ethernet/amd/nmclan_cs.c +++ b/drivers/net/ethernet/amd/nmclan_cs.c @@ -529,7 +529,8 @@ static void mace_write(mace_private *lp, unsigned int ioaddr, int reg, mace_init Resets the MACE chip. ---------------------------------------------------------------------------- */ -static int mace_init(mace_private *lp, unsigned int ioaddr, char *enet_addr) +static int mace_init(mace_private *lp, unsigned int ioaddr, + const char *enet_addr) { int i; int ct = 0; diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c index d5fd49dd25f3..3936543a74d8 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c @@ -1080,7 +1080,7 @@ static int xgbe_add_mac_addresses(struct xgbe_prv_data *pdata) return 0; } -static int xgbe_set_mac_address(struct xgbe_prv_data *pdata, u8 *addr) +static int xgbe_set_mac_address(struct xgbe_prv_data *pdata, const u8 *addr) { unsigned int mac_addr_hi, mac_addr_lo; diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h index 3305979a9f7c..607a2c90513b 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe.h @@ -729,7 +729,7 @@ struct xgbe_ext_stats { struct xgbe_hw_if { int (*tx_complete)(struct xgbe_ring_desc *); - int (*set_mac_address)(struct xgbe_prv_data *, u8 *addr); + int (*set_mac_address)(struct xgbe_prv_data *, const u8 *addr); int (*config_rx_mode)(struct xgbe_prv_data *); int (*enable_rx_csum)(struct xgbe_prv_data *); diff --git a/drivers/net/ethernet/apm/xgene-v2/mac.c b/drivers/net/ethernet/apm/xgene-v2/mac.c index 2da979e4fad1..6423e22e05b2 100644 --- a/drivers/net/ethernet/apm/xgene-v2/mac.c +++ b/drivers/net/ethernet/apm/xgene-v2/mac.c @@ -65,7 +65,7 @@ void xge_mac_set_speed(struct xge_pdata *pdata) void xge_mac_set_station_addr(struct xge_pdata *pdata) { - u8 *dev_addr = pdata->ndev->dev_addr; + const u8 *dev_addr = pdata->ndev->dev_addr; u32 addr0, addr1; addr0 = (dev_addr[3] << 24) | (dev_addr[2] << 16) | diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c index 5f657879134e..e641dbbea1e2 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c @@ -378,8 +378,8 @@ u32 xgene_enet_rd_stat(struct xgene_enet_pdata *pdata, u32 rd_addr) static void xgene_gmac_set_mac_addr(struct xgene_enet_pdata *pdata) { + const u8 *dev_addr = pdata->ndev->dev_addr; u32 addr0, addr1; - u8 *dev_addr = pdata->ndev->dev_addr; addr0 = (dev_addr[3] << 24) | (dev_addr[2] << 16) | (dev_addr[1] << 8) | dev_addr[0]; diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c b/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c index f482ced2cadd..72b5e8eb0ec7 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c @@ -165,8 +165,8 @@ static void xgene_sgmac_reset(struct xgene_enet_pdata *p) static void xgene_sgmac_set_mac_addr(struct xgene_enet_pdata *p) { + const u8 *dev_addr = p->ndev->dev_addr; u32 addr0, addr1; - u8 *dev_addr = p->ndev->dev_addr; addr0 = (dev_addr[3] << 24) | (dev_addr[2] << 16) | (dev_addr[1] << 8) | dev_addr[0]; diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c b/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c index 304b5d43f236..86607b79c09f 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c @@ -207,8 +207,8 @@ static void xgene_pcs_reset(struct xgene_enet_pdata *pdata) static void xgene_xgmac_set_mac_addr(struct xgene_enet_pdata *pdata) { + const u8 *dev_addr = pdata->ndev->dev_addr; u32 addr0, addr1; - u8 *dev_addr = pdata->ndev->dev_addr; addr0 = (dev_addr[3] << 24) | (dev_addr[2] << 16) | (dev_addr[1] << 8) | dev_addr[0]; diff --git a/drivers/net/ethernet/apple/bmac.c b/drivers/net/ethernet/apple/bmac.c index a989d2df59ad..a63ec2005af3 100644 --- a/drivers/net/ethernet/apple/bmac.c +++ b/drivers/net/ethernet/apple/bmac.c @@ -308,7 +308,7 @@ bmac_init_registers(struct net_device *dev) { struct bmac_data *bp = netdev_priv(dev); volatile unsigned short regValue; - unsigned short *pWord16; + const unsigned short *pWord16; int i; /* XXDEBUG(("bmac: enter init_registers\n")); */ @@ -371,7 +371,7 @@ bmac_init_registers(struct net_device *dev) bmwrite(dev, BHASH1, bp->hash_table_mask[2]); /* bits 47 - 32 */ bmwrite(dev, BHASH0, bp->hash_table_mask[3]); /* bits 63 - 48 */ - pWord16 = (unsigned short *)dev->dev_addr; + pWord16 = (const unsigned short *)dev->dev_addr; bmwrite(dev, MADD0, *pWord16++); bmwrite(dev, MADD1, *pWord16++); bmwrite(dev, MADD2, *pWord16); @@ -522,7 +522,7 @@ static int bmac_set_address(struct net_device *dev, void *addr) { struct bmac_data *bp = netdev_priv(dev); unsigned char *p = addr; - unsigned short *pWord16; + const unsigned short *pWord16; unsigned long flags; int i; @@ -533,7 +533,7 @@ static int bmac_set_address(struct net_device *dev, void *addr) dev->dev_addr[i] = p[i]; } /* load up the hardware address */ - pWord16 = (unsigned short *)dev->dev_addr; + pWord16 = (const unsigned short *)dev->dev_addr; bmwrite(dev, MADD0, *pWord16++); bmwrite(dev, MADD1, *pWord16++); bmwrite(dev, MADD2, *pWord16); diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h index bed481816ea3..062a300a566a 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h @@ -217,7 +217,7 @@ struct aq_hw_ops { int (*hw_ring_tx_head_update)(struct aq_hw_s *self, struct aq_ring_s *aq_ring); - int (*hw_set_mac_address)(struct aq_hw_s *self, u8 *mac_addr); + int (*hw_set_mac_address)(struct aq_hw_s *self, const u8 *mac_addr); int (*hw_soft_reset)(struct aq_hw_s *self); @@ -226,7 +226,7 @@ struct aq_hw_ops { int (*hw_reset)(struct aq_hw_s *self); - int (*hw_init)(struct aq_hw_s *self, u8 *mac_addr); + int (*hw_init)(struct aq_hw_s *self, const u8 *mac_addr); int (*hw_start)(struct aq_hw_s *self); @@ -373,7 +373,7 @@ struct aq_fw_ops { int (*set_phyloopback)(struct aq_hw_s *self, u32 mode, bool enable); int (*set_power)(struct aq_hw_s *self, unsigned int power_state, - u8 *mac); + const u8 *mac); int (*send_fw_request)(struct aq_hw_s *self, const struct hw_fw_request_iface *fw_req, diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_macsec.c b/drivers/net/ethernet/aquantia/atlantic/aq_macsec.c index 4a6dfac857ca..02058fe79f52 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_macsec.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_macsec.c @@ -35,7 +35,7 @@ static int aq_apply_macsec_cfg(struct aq_nic_s *nic); static int aq_apply_secy_cfg(struct aq_nic_s *nic, const struct macsec_secy *secy); -static void aq_ether_addr_to_mac(u32 mac[2], unsigned char *emac) +static void aq_ether_addr_to_mac(u32 mac[2], const unsigned char *emac) { u32 tmp[2] = { 0 }; diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c index 611875ef2cd1..4625ccb79499 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c @@ -322,7 +322,7 @@ static int hw_atl_a0_hw_init_rx_path(struct aq_hw_s *self) return aq_hw_err_from_flags(self); } -static int hw_atl_a0_hw_mac_addr_set(struct aq_hw_s *self, u8 *mac_addr) +static int hw_atl_a0_hw_mac_addr_set(struct aq_hw_s *self, const u8 *mac_addr) { unsigned int h = 0U; unsigned int l = 0U; @@ -348,7 +348,7 @@ err_exit: return err; } -static int hw_atl_a0_hw_init(struct aq_hw_s *self, u8 *mac_addr) +static int hw_atl_a0_hw_init(struct aq_hw_s *self, const u8 *mac_addr) { static u32 aq_hw_atl_igcr_table_[4][2] = { [AQ_HW_IRQ_INVALID] = { 0x20000000U, 0x20000000U }, diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c index 9f1b15077e7d..d875ce3ec759 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c @@ -533,7 +533,7 @@ static int hw_atl_b0_hw_init_rx_path(struct aq_hw_s *self) return aq_hw_err_from_flags(self); } -int hw_atl_b0_hw_mac_addr_set(struct aq_hw_s *self, u8 *mac_addr) +int hw_atl_b0_hw_mac_addr_set(struct aq_hw_s *self, const u8 *mac_addr) { unsigned int h = 0U; unsigned int l = 0U; @@ -558,7 +558,7 @@ err_exit: return err; } -static int hw_atl_b0_hw_init(struct aq_hw_s *self, u8 *mac_addr) +static int hw_atl_b0_hw_init(struct aq_hw_s *self, const u8 *mac_addr) { static u32 aq_hw_atl_igcr_table_[4][2] = { [AQ_HW_IRQ_INVALID] = { 0x20000000U, 0x20000000U }, diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.h index d8db972113ec..5298846dd9f7 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.h +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.h @@ -58,7 +58,7 @@ int hw_atl_b0_hw_ring_rx_stop(struct aq_hw_s *self, struct aq_ring_s *ring); void hw_atl_b0_hw_init_rx_rss_ctrl1(struct aq_hw_s *self); -int hw_atl_b0_hw_mac_addr_set(struct aq_hw_s *self, u8 *mac_addr); +int hw_atl_b0_hw_mac_addr_set(struct aq_hw_s *self, const u8 *mac_addr); int hw_atl_b0_set_fc(struct aq_hw_s *self, u32 fc, u32 tc); int hw_atl_b0_set_loopback(struct aq_hw_s *self, u32 mode, bool enable); diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c index 404cbf60d3f2..fc0e66006644 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c @@ -944,7 +944,7 @@ u32 hw_atl_utils_get_fw_version(struct aq_hw_s *self) } static int aq_fw1x_set_wake_magic(struct aq_hw_s *self, bool wol_enabled, - u8 *mac) + const u8 *mac) { struct hw_atl_utils_fw_rpc *prpc = NULL; unsigned int rpc_size = 0U; @@ -987,7 +987,7 @@ err_exit: } static int aq_fw1x_set_power(struct aq_hw_s *self, unsigned int power_state, - u8 *mac) + const u8 *mac) { struct hw_atl_utils_fw_rpc *prpc = NULL; unsigned int rpc_size = 0U; diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c index ee0c22d04935..eac631c45c56 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c @@ -358,7 +358,7 @@ static int aq_fw2x_get_phy_temp(struct aq_hw_s *self, int *temp) return 0; } -static int aq_fw2x_set_wol(struct aq_hw_s *self, u8 *mac) +static int aq_fw2x_set_wol(struct aq_hw_s *self, const u8 *mac) { struct hw_atl_utils_fw_rpc *rpc = NULL; struct offload_info *info = NULL; @@ -404,7 +404,7 @@ err_exit: } static int aq_fw2x_set_power(struct aq_hw_s *self, unsigned int power_state, - u8 *mac) + const u8 *mac) { int err = 0; diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c index 92f64048bf69..c98708bb044c 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c @@ -516,7 +516,7 @@ static int hw_atl2_hw_init_rx_path(struct aq_hw_s *self) return aq_hw_err_from_flags(self); } -static int hw_atl2_hw_init(struct aq_hw_s *self, u8 *mac_addr) +static int hw_atl2_hw_init(struct aq_hw_s *self, const u8 *mac_addr) { static u32 aq_hw_atl2_igcr_table_[4][2] = { [AQ_HW_IRQ_INVALID] = { 0x20000000U, 0x20000000U }, diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c index 55c9e6fcb471..969591bbc066 100644 --- a/drivers/net/ethernet/broadcom/b44.c +++ b/drivers/net/ethernet/broadcom/b44.c @@ -218,7 +218,8 @@ static inline void __b44_cam_read(struct b44 *bp, unsigned char *data, int index data[1] = (val >> 0) & 0xFF; } -static inline void __b44_cam_write(struct b44 *bp, unsigned char *data, int index) +static inline void __b44_cam_write(struct b44 *bp, + const unsigned char *data, int index) { u32 val; @@ -1507,7 +1508,8 @@ static void bwfilter_table(struct b44 *bp, u8 *pp, u32 bytes, u32 table_offset) } } -static int b44_magic_pattern(u8 *macaddr, u8 *ppattern, u8 *pmask, int offset) +static int b44_magic_pattern(const u8 *macaddr, u8 *ppattern, u8 *pmask, + int offset) { int magicsync = 6; int k, j, len = offset; diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index b813745e8de3..40933bf5a710 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -1818,7 +1818,7 @@ static inline void umac_reset(struct bcm_sysport_priv *priv) } static void umac_set_hw_addr(struct bcm_sysport_priv *priv, - unsigned char *addr) + const unsigned char *addr) { u32 mac0 = (addr[0] << 24) | (addr[1] << 16) | (addr[2] << 8) | addr[3]; diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c index d2c7834850cc..7b525c65bacb 100644 --- a/drivers/net/ethernet/broadcom/bgmac.c +++ b/drivers/net/ethernet/broadcom/bgmac.c @@ -768,7 +768,7 @@ static void bgmac_umac_cmd_maskset(struct bgmac *bgmac, u32 mask, u32 set, udelay(2); } -static void bgmac_write_mac_address(struct bgmac *bgmac, u8 *addr) +static void bgmac_write_mac_address(struct bgmac *bgmac, const u8 *addr) { u32 tmp; diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c index 248b81249cb0..babc955ba64e 100644 --- a/drivers/net/ethernet/broadcom/bnx2.c +++ b/drivers/net/ethernet/broadcom/bnx2.c @@ -2704,7 +2704,7 @@ bnx2_alloc_bad_rbuf(struct bnx2 *bp) } static void -bnx2_set_mac_addr(struct bnx2 *bp, u8 *mac_addr, u32 pos) +bnx2_set_mac_addr(struct bnx2 *bp, const u8 *mac_addr, u32 pos) { u32 val; diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h index e789430f407c..2b06d78baa08 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h @@ -1994,7 +1994,7 @@ int bnx2x_idle_chk(struct bnx2x *bp); * operation has been successfully scheduled and a negative - if a requested * operations has failed. */ -int bnx2x_set_mac_one(struct bnx2x *bp, u8 *mac, +int bnx2x_set_mac_one(struct bnx2x *bp, const u8 *mac, struct bnx2x_vlan_mac_obj *obj, bool set, int mac_type, unsigned long *ramrod_flags); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index ef49e38ae027..27e712178f95 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -8417,7 +8417,7 @@ alloc_mem_err: * Init service functions */ -int bnx2x_set_mac_one(struct bnx2x *bp, u8 *mac, +int bnx2x_set_mac_one(struct bnx2x *bp, const u8 *mac, struct bnx2x_vlan_mac_obj *obj, bool set, int mac_type, unsigned long *ramrod_flags) { @@ -9146,7 +9146,7 @@ u32 bnx2x_send_unload_req(struct bnx2x *bp, int unload_mode) else if (bp->wol) { u32 emac_base = port ? GRCBASE_EMAC1 : GRCBASE_EMAC0; - u8 *mac_addr = bp->dev->dev_addr; + const u8 *mac_addr = bp->dev->dev_addr; struct pci_dev *pdev = bp->pdev; u32 val; u16 pmc; diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h index 966d5722c5e2..8c2cf5519787 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h @@ -508,7 +508,8 @@ int bnx2x_vfpf_init(struct bnx2x *bp); void bnx2x_vfpf_close_vf(struct bnx2x *bp); int bnx2x_vfpf_setup_q(struct bnx2x *bp, struct bnx2x_fastpath *fp, bool is_leading); -int bnx2x_vfpf_config_mac(struct bnx2x *bp, u8 *addr, u8 vf_qid, bool set); +int bnx2x_vfpf_config_mac(struct bnx2x *bp, const u8 *addr, u8 vf_qid, + bool set); int bnx2x_vfpf_config_rss(struct bnx2x *bp, struct bnx2x_config_rss_params *params); int bnx2x_vfpf_set_mcast(struct net_device *dev); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c index 7c96f943c6f3..c9129b9ba446 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c @@ -721,7 +721,7 @@ out: } /* request pf to add a mac for the vf */ -int bnx2x_vfpf_config_mac(struct bnx2x *bp, u8 *addr, u8 vf_qid, bool set) +int bnx2x_vfpf_config_mac(struct bnx2x *bp, const u8 *addr, u8 vf_qid, bool set) { struct vfpf_set_q_filters_tlv *req = &bp->vf2pf_mbox->req.set_q_filters; struct pfvf_general_resp_tlv *resp = &bp->vf2pf_mbox->resp.general_resp; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 800020f4d79d..66263aa0d96b 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -4869,7 +4869,7 @@ static int bnxt_hwrm_cfa_ntuple_filter_alloc(struct bnxt *bp, #endif static int bnxt_hwrm_set_vnic_filter(struct bnxt *bp, u16 vnic_id, u16 idx, - u8 *mac_addr) + const u8 *mac_addr) { struct hwrm_cfa_l2_filter_alloc_output *resp; struct hwrm_cfa_l2_filter_alloc_input *req; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c index d4ebc5d710ba..1d177fed44a6 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c @@ -1151,7 +1151,7 @@ void bnxt_hwrm_exec_fwd_req(struct bnxt *bp) } } -int bnxt_approve_mac(struct bnxt *bp, u8 *mac, bool strict) +int bnxt_approve_mac(struct bnxt *bp, const u8 *mac, bool strict) { struct hwrm_func_vf_cfg_input *req; int rc = 0; @@ -1246,7 +1246,7 @@ void bnxt_update_vf_mac(struct bnxt *bp) { } -int bnxt_approve_mac(struct bnxt *bp, u8 *mac, bool strict) +int bnxt_approve_mac(struct bnxt *bp, const u8 *mac, bool strict) { return 0; } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h index 995535e4c11b..9a4bacba477b 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h @@ -41,5 +41,5 @@ int bnxt_cfg_hw_sriov(struct bnxt *bp, int *num_vfs, bool reset); void bnxt_sriov_disable(struct bnxt *); void bnxt_hwrm_exec_fwd_req(struct bnxt *); void bnxt_update_vf_mac(struct bnxt *); -int bnxt_approve_mac(struct bnxt *, u8 *, bool); +int bnxt_approve_mac(struct bnxt *, const u8 *, bool); #endif diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 83c55e7b099f..ed53859b6f7d 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -3266,7 +3266,7 @@ static void bcmgenet_umac_reset(struct bcmgenet_priv *priv) } static void bcmgenet_set_hw_addr(struct bcmgenet_priv *priv, - unsigned char *addr) + const unsigned char *addr) { bcmgenet_umac_writel(priv, get_unaligned_be32(&addr[0]), UMAC_MAC0); bcmgenet_umac_writel(priv, get_unaligned_be16(&addr[4]), UMAC_MAC1); @@ -3560,7 +3560,7 @@ static void bcmgenet_timeout(struct net_device *dev, unsigned int txqueue) #define MAX_MDF_FILTER 17 static inline void bcmgenet_set_mdf_addr(struct bcmgenet_priv *priv, - unsigned char *addr, + const unsigned char *addr, int *i) { bcmgenet_umac_writel(priv, addr[0] << 8 | addr[1], diff --git a/drivers/net/ethernet/calxeda/xgmac.c b/drivers/net/ethernet/calxeda/xgmac.c index dc51f36fdf2a..9ad89a53c3e6 100644 --- a/drivers/net/ethernet/calxeda/xgmac.c +++ b/drivers/net/ethernet/calxeda/xgmac.c @@ -607,7 +607,7 @@ static inline void xgmac_mac_disable(void __iomem *ioaddr) writel(value, ioaddr + XGMAC_CONTROL); } -static void xgmac_set_mac_addr(void __iomem *ioaddr, unsigned char *addr, +static void xgmac_set_mac_addr(void __iomem *ioaddr, const unsigned char *addr, int num) { u32 data; diff --git a/drivers/net/ethernet/chelsio/cxgb/gmac.h b/drivers/net/ethernet/chelsio/cxgb/gmac.h index dfa77491a910..5913eaf442b5 100644 --- a/drivers/net/ethernet/chelsio/cxgb/gmac.h +++ b/drivers/net/ethernet/chelsio/cxgb/gmac.h @@ -117,7 +117,7 @@ struct cmac_ops { const struct cmac_statistics *(*statistics_update)(struct cmac *, int); int (*macaddress_get)(struct cmac *, u8 mac_addr[6]); - int (*macaddress_set)(struct cmac *, u8 mac_addr[6]); + int (*macaddress_set)(struct cmac *, const u8 mac_addr[6]); }; typedef struct _cmac_instance cmac_instance; diff --git a/drivers/net/ethernet/chelsio/cxgb/pm3393.c b/drivers/net/ethernet/chelsio/cxgb/pm3393.c index c27908e66f5e..0bb37e4680c7 100644 --- a/drivers/net/ethernet/chelsio/cxgb/pm3393.c +++ b/drivers/net/ethernet/chelsio/cxgb/pm3393.c @@ -496,7 +496,7 @@ static int pm3393_macaddress_get(struct cmac *cmac, u8 mac_addr[6]) return 0; } -static int pm3393_macaddress_set(struct cmac *cmac, u8 ma[6]) +static int pm3393_macaddress_set(struct cmac *cmac, const u8 ma[6]) { u32 val, lo, mid, hi, enabled = cmac->instance->enabled; diff --git a/drivers/net/ethernet/chelsio/cxgb/vsc7326.c b/drivers/net/ethernet/chelsio/cxgb/vsc7326.c index a19284bdb80e..2ad3efb550c2 100644 --- a/drivers/net/ethernet/chelsio/cxgb/vsc7326.c +++ b/drivers/net/ethernet/chelsio/cxgb/vsc7326.c @@ -379,7 +379,7 @@ static int mac_intr_clear(struct cmac *mac) } /* Expect MAC address to be in network byte order. */ -static int mac_set_address(struct cmac* mac, u8 addr[6]) +static int mac_set_address(struct cmac* mac, const u8 addr[6]) { u32 val; int port = mac->instance->index; diff --git a/drivers/net/ethernet/chelsio/cxgb3/common.h b/drivers/net/ethernet/chelsio/cxgb3/common.h index b706f2fbe4f4..a309016f7f8c 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/common.h +++ b/drivers/net/ethernet/chelsio/cxgb3/common.h @@ -710,7 +710,7 @@ int t3_mac_enable(struct cmac *mac, int which); int t3_mac_disable(struct cmac *mac, int which); int t3_mac_set_mtu(struct cmac *mac, unsigned int mtu); int t3_mac_set_rx_mode(struct cmac *mac, struct net_device *dev); -int t3_mac_set_address(struct cmac *mac, unsigned int idx, u8 addr[6]); +int t3_mac_set_address(struct cmac *mac, unsigned int idx, const u8 addr[6]); int t3_mac_set_num_ucast(struct cmac *mac, int n); const struct mac_stats *t3_mac_update_stats(struct cmac *mac); int t3_mac_set_speed_duplex_fc(struct cmac *mac, int speed, int duplex, int fc); diff --git a/drivers/net/ethernet/chelsio/cxgb3/xgmac.c b/drivers/net/ethernet/chelsio/cxgb3/xgmac.c index 3af19a550372..1bdc6cad1e49 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/xgmac.c +++ b/drivers/net/ethernet/chelsio/cxgb3/xgmac.c @@ -240,7 +240,7 @@ static void set_addr_filter(struct cmac *mac, int idx, const u8 * addr) } /* Set one of the station's unicast MAC addresses. */ -int t3_mac_set_address(struct cmac *mac, unsigned int idx, u8 addr[6]) +int t3_mac_set_address(struct cmac *mac, unsigned int idx, const u8 addr[6]) { if (idx >= mac->nucast) return -EINVAL; diff --git a/drivers/net/ethernet/cisco/enic/enic_pp.c b/drivers/net/ethernet/cisco/enic/enic_pp.c index e6a83198c3dd..80f46dbd5117 100644 --- a/drivers/net/ethernet/cisco/enic/enic_pp.c +++ b/drivers/net/ethernet/cisco/enic/enic_pp.c @@ -73,9 +73,9 @@ static int enic_set_port_profile(struct enic *enic, int vf) struct vic_provinfo *vp; const u8 oui[3] = VIC_PROVINFO_CISCO_OUI; const __be16 os_type = htons(VIC_GENERIC_PROV_OS_TYPE_LINUX); + const u8 *client_mac; char uuid_str[38]; char client_mac_str[18]; - u8 *client_mac; int err; ENIC_PP_BY_INDEX(enic, vf, pp, &err); diff --git a/drivers/net/ethernet/dlink/dl2k.c b/drivers/net/ethernet/dlink/dl2k.c index 202ecb132053..993bba0ffb16 100644 --- a/drivers/net/ethernet/dlink/dl2k.c +++ b/drivers/net/ethernet/dlink/dl2k.c @@ -567,7 +567,7 @@ static void rio_hw_init(struct net_device *dev) */ for (i = 0; i < 3; i++) dw16(StationAddr0 + 2 * i, - cpu_to_le16(((u16 *)dev->dev_addr)[i])); + cpu_to_le16(((const u16 *)dev->dev_addr)[i])); set_multicast (dev); if (np->coalesce) { diff --git a/drivers/net/ethernet/dnet.c b/drivers/net/ethernet/dnet.c index 6c51cf991dad..3ed21ba4eb99 100644 --- a/drivers/net/ethernet/dnet.c +++ b/drivers/net/ethernet/dnet.c @@ -60,11 +60,11 @@ static void __dnet_set_hwaddr(struct dnet *bp) { u16 tmp; - tmp = be16_to_cpup((__be16 *)bp->dev->dev_addr); + tmp = be16_to_cpup((const __be16 *)bp->dev->dev_addr); dnet_writew_mac(bp, DNET_INTERNAL_MAC_ADDR_0_REG, tmp); - tmp = be16_to_cpup((__be16 *)(bp->dev->dev_addr + 2)); + tmp = be16_to_cpup((const __be16 *)(bp->dev->dev_addr + 2)); dnet_writew_mac(bp, DNET_INTERNAL_MAC_ADDR_1_REG, tmp); - tmp = be16_to_cpup((__be16 *)(bp->dev->dev_addr + 4)); + tmp = be16_to_cpup((const __be16 *)(bp->dev->dev_addr + 4)); dnet_writew_mac(bp, DNET_INTERNAL_MAC_ADDR_2_REG, tmp); } diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c index 649c5c429bd7..528eb0f223b1 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.c +++ b/drivers/net/ethernet/emulex/benet/be_cmds.c @@ -1080,7 +1080,7 @@ err: } /* Uses synchronous MCCQ */ -int be_cmd_pmac_add(struct be_adapter *adapter, u8 *mac_addr, +int be_cmd_pmac_add(struct be_adapter *adapter, const u8 *mac_addr, u32 if_id, u32 *pmac_id, u32 domain) { struct be_mcc_wrb *wrb; diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.h b/drivers/net/ethernet/emulex/benet/be_cmds.h index c30d6d6f0f3a..db1f3b908582 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.h +++ b/drivers/net/ethernet/emulex/benet/be_cmds.h @@ -2385,7 +2385,7 @@ int be_pci_fnum_get(struct be_adapter *adapter); int be_fw_wait_ready(struct be_adapter *adapter); int be_cmd_mac_addr_query(struct be_adapter *adapter, u8 *mac_addr, bool permanent, u32 if_handle, u32 pmac_id); -int be_cmd_pmac_add(struct be_adapter *adapter, u8 *mac_addr, u32 if_id, +int be_cmd_pmac_add(struct be_adapter *adapter, const u8 *mac_addr, u32 if_id, u32 *pmac_id, u32 domain); int be_cmd_pmac_del(struct be_adapter *adapter, u32 if_id, int pmac_id, u32 domain); diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 736a6ea86eb1..d51f24c9e1b8 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -272,7 +272,7 @@ void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm, u16 num_popped) iowrite32(val, adapter->db + DB_CQ_OFFSET); } -static int be_dev_mac_add(struct be_adapter *adapter, u8 *mac) +static int be_dev_mac_add(struct be_adapter *adapter, const u8 *mac) { int i; diff --git a/drivers/net/ethernet/ethoc.c b/drivers/net/ethernet/ethoc.c index cd3a3b8f23b6..ed2ef167cdb2 100644 --- a/drivers/net/ethernet/ethoc.c +++ b/drivers/net/ethernet/ethoc.c @@ -802,8 +802,8 @@ static int ethoc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) static void ethoc_do_set_mac_address(struct net_device *dev) { + const unsigned char *mac = dev->dev_addr; struct ethoc *priv = netdev_priv(dev); - unsigned char *mac = dev->dev_addr; ethoc_write(priv, MAC_ADDR0, (mac[2] << 24) | (mac[3] << 16) | (mac[4] << 8) | (mac[5] << 0)); diff --git a/drivers/net/ethernet/fealnx.c b/drivers/net/ethernet/fealnx.c index 25c91b3c5fd3..63c935e7b625 100644 --- a/drivers/net/ethernet/fealnx.c +++ b/drivers/net/ethernet/fealnx.c @@ -827,7 +827,7 @@ static int netdev_open(struct net_device *dev) return -EAGAIN; for (i = 0; i < 3; i++) - iowrite16(((unsigned short*)dev->dev_addr)[i], + iowrite16(((const unsigned short *)dev->dev_addr)[i], ioaddr + PAR0 + i*2); init_ring(dev); diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index cd8a7d94f60c..6b2927d863e2 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -272,7 +272,7 @@ static int dpaa_netdev_init(struct net_device *net_dev, } else { eth_hw_addr_random(net_dev); err = priv->mac_dev->change_addr(priv->mac_dev->fman_mac, - (enet_addr_t *)net_dev->dev_addr); + (const enet_addr_t *)net_dev->dev_addr); if (err) { dev_err(dev, "Failed to set random MAC address\n"); return -EINVAL; @@ -452,7 +452,7 @@ static int dpaa_set_mac_address(struct net_device *net_dev, void *addr) mac_dev = priv->mac_dev; err = mac_dev->change_addr(mac_dev->fman_mac, - (enet_addr_t *)net_dev->dev_addr); + (const enet_addr_t *)net_dev->dev_addr); if (err < 0) { netif_err(priv, drv, net_dev, "mac_dev->change_addr() = %d\n", err); diff --git a/drivers/net/ethernet/freescale/fman/fman_dtsec.c b/drivers/net/ethernet/freescale/fman/fman_dtsec.c index bce3c9398887..1950a8936bc0 100644 --- a/drivers/net/ethernet/freescale/fman/fman_dtsec.c +++ b/drivers/net/ethernet/freescale/fman/fman_dtsec.c @@ -366,7 +366,7 @@ static void set_dflts(struct dtsec_cfg *cfg) cfg->maximum_frame = DEFAULT_MAXIMUM_FRAME; } -static void set_mac_address(struct dtsec_regs __iomem *regs, u8 *adr) +static void set_mac_address(struct dtsec_regs __iomem *regs, const u8 *adr) { u32 tmp; @@ -516,7 +516,7 @@ static int init(struct dtsec_regs __iomem *regs, struct dtsec_cfg *cfg, if (addr) { MAKE_ENET_ADDR_FROM_UINT64(addr, eth_addr); - set_mac_address(regs, (u8 *)eth_addr); + set_mac_address(regs, (const u8 *)eth_addr); } /* HASH */ @@ -1022,7 +1022,7 @@ int dtsec_accept_rx_pause_frames(struct fman_mac *dtsec, bool en) return 0; } -int dtsec_modify_mac_address(struct fman_mac *dtsec, enet_addr_t *enet_addr) +int dtsec_modify_mac_address(struct fman_mac *dtsec, const enet_addr_t *enet_addr) { struct dtsec_regs __iomem *regs = dtsec->regs; enum comm_mode mode = COMM_MODE_NONE; @@ -1041,7 +1041,7 @@ int dtsec_modify_mac_address(struct fman_mac *dtsec, enet_addr_t *enet_addr) * Station address have to be swapped (big endian to little endian */ dtsec->addr = ENET_ADDR_TO_UINT64(*enet_addr); - set_mac_address(dtsec->regs, (u8 *)(*enet_addr)); + set_mac_address(dtsec->regs, (const u8 *)(*enet_addr)); graceful_start(dtsec, mode); diff --git a/drivers/net/ethernet/freescale/fman/fman_dtsec.h b/drivers/net/ethernet/freescale/fman/fman_dtsec.h index 5149d96ec2c1..68512c3bd6e5 100644 --- a/drivers/net/ethernet/freescale/fman/fman_dtsec.h +++ b/drivers/net/ethernet/freescale/fman/fman_dtsec.h @@ -37,7 +37,7 @@ struct fman_mac *dtsec_config(struct fman_mac_params *params); int dtsec_set_promiscuous(struct fman_mac *dtsec, bool new_val); -int dtsec_modify_mac_address(struct fman_mac *dtsec, enet_addr_t *enet_addr); +int dtsec_modify_mac_address(struct fman_mac *dtsec, const enet_addr_t *enet_addr); int dtsec_adjust_link(struct fman_mac *dtsec, u16 speed); int dtsec_restart_autoneg(struct fman_mac *dtsec); diff --git a/drivers/net/ethernet/freescale/fman/fman_memac.c b/drivers/net/ethernet/freescale/fman/fman_memac.c index 62f42921933d..2216b7f51d26 100644 --- a/drivers/net/ethernet/freescale/fman/fman_memac.c +++ b/drivers/net/ethernet/freescale/fman/fman_memac.c @@ -354,7 +354,7 @@ struct fman_mac { bool allmulti_enabled; }; -static void add_addr_in_paddr(struct memac_regs __iomem *regs, u8 *adr, +static void add_addr_in_paddr(struct memac_regs __iomem *regs, const u8 *adr, u8 paddr_num) { u32 tmp0, tmp1; @@ -897,12 +897,12 @@ int memac_accept_rx_pause_frames(struct fman_mac *memac, bool en) return 0; } -int memac_modify_mac_address(struct fman_mac *memac, enet_addr_t *enet_addr) +int memac_modify_mac_address(struct fman_mac *memac, const enet_addr_t *enet_addr) { if (!is_init_done(memac->memac_drv_param)) return -EINVAL; - add_addr_in_paddr(memac->regs, (u8 *)(*enet_addr), 0); + add_addr_in_paddr(memac->regs, (const u8 *)(*enet_addr), 0); return 0; } @@ -1058,7 +1058,7 @@ int memac_init(struct fman_mac *memac) /* MAC Address */ if (memac->addr != 0) { MAKE_ENET_ADDR_FROM_UINT64(memac->addr, eth_addr); - add_addr_in_paddr(memac->regs, (u8 *)eth_addr, 0); + add_addr_in_paddr(memac->regs, (const u8 *)eth_addr, 0); } fixed_link = memac_drv_param->fixed_link; diff --git a/drivers/net/ethernet/freescale/fman/fman_memac.h b/drivers/net/ethernet/freescale/fman/fman_memac.h index b2c671ec0ce7..3820f7a22983 100644 --- a/drivers/net/ethernet/freescale/fman/fman_memac.h +++ b/drivers/net/ethernet/freescale/fman/fman_memac.h @@ -40,7 +40,7 @@ struct fman_mac *memac_config(struct fman_mac_params *params); int memac_set_promiscuous(struct fman_mac *memac, bool new_val); -int memac_modify_mac_address(struct fman_mac *memac, enet_addr_t *enet_addr); +int memac_modify_mac_address(struct fman_mac *memac, const enet_addr_t *enet_addr); int memac_adjust_link(struct fman_mac *memac, u16 speed); int memac_cfg_max_frame_len(struct fman_mac *memac, u16 new_val); int memac_cfg_reset_on_init(struct fman_mac *memac, bool enable); diff --git a/drivers/net/ethernet/freescale/fman/fman_tgec.c b/drivers/net/ethernet/freescale/fman/fman_tgec.c index 41946b16f6c7..311c1906e044 100644 --- a/drivers/net/ethernet/freescale/fman/fman_tgec.c +++ b/drivers/net/ethernet/freescale/fman/fman_tgec.c @@ -221,7 +221,7 @@ struct fman_mac { bool allmulti_enabled; }; -static void set_mac_address(struct tgec_regs __iomem *regs, u8 *adr) +static void set_mac_address(struct tgec_regs __iomem *regs, const u8 *adr) { u32 tmp0, tmp1; @@ -514,13 +514,13 @@ int tgec_accept_rx_pause_frames(struct fman_mac *tgec, bool en) return 0; } -int tgec_modify_mac_address(struct fman_mac *tgec, enet_addr_t *p_enet_addr) +int tgec_modify_mac_address(struct fman_mac *tgec, const enet_addr_t *p_enet_addr) { if (!is_init_done(tgec->cfg)) return -EINVAL; tgec->addr = ENET_ADDR_TO_UINT64(*p_enet_addr); - set_mac_address(tgec->regs, (u8 *)(*p_enet_addr)); + set_mac_address(tgec->regs, (const u8 *)(*p_enet_addr)); return 0; } @@ -704,7 +704,7 @@ int tgec_init(struct fman_mac *tgec) if (tgec->addr) { MAKE_ENET_ADDR_FROM_UINT64(tgec->addr, eth_addr); - set_mac_address(tgec->regs, (u8 *)eth_addr); + set_mac_address(tgec->regs, (const u8 *)eth_addr); } /* interrupts */ diff --git a/drivers/net/ethernet/freescale/fman/fman_tgec.h b/drivers/net/ethernet/freescale/fman/fman_tgec.h index 3bfd1062b386..b28b20b26148 100644 --- a/drivers/net/ethernet/freescale/fman/fman_tgec.h +++ b/drivers/net/ethernet/freescale/fman/fman_tgec.h @@ -37,7 +37,7 @@ struct fman_mac *tgec_config(struct fman_mac_params *params); int tgec_set_promiscuous(struct fman_mac *tgec, bool new_val); -int tgec_modify_mac_address(struct fman_mac *tgec, enet_addr_t *enet_addr); +int tgec_modify_mac_address(struct fman_mac *tgec, const enet_addr_t *enet_addr); int tgec_cfg_max_frame_len(struct fman_mac *tgec, u16 new_val); int tgec_enable(struct fman_mac *tgec, enum comm_mode mode); int tgec_disable(struct fman_mac *tgec, enum comm_mode mode); diff --git a/drivers/net/ethernet/freescale/fman/mac.h b/drivers/net/ethernet/freescale/fman/mac.h index 824a81a9f350..daa285a9b8b2 100644 --- a/drivers/net/ethernet/freescale/fman/mac.h +++ b/drivers/net/ethernet/freescale/fman/mac.h @@ -66,7 +66,7 @@ struct mac_device { int (*stop)(struct mac_device *mac_dev); void (*adjust_link)(struct mac_device *mac_dev); int (*set_promisc)(struct fman_mac *mac_dev, bool enable); - int (*change_addr)(struct fman_mac *mac_dev, enet_addr_t *enet_addr); + int (*change_addr)(struct fman_mac *mac_dev, const enet_addr_t *enet_addr); int (*set_allmulti)(struct fman_mac *mac_dev, bool enable); int (*set_tstamp)(struct fman_mac *mac_dev, bool enable); int (*set_multi)(struct net_device *net_dev, diff --git a/drivers/net/ethernet/hisilicon/hisi_femac.c b/drivers/net/ethernet/hisilicon/hisi_femac.c index 29190eb890c8..a6c18b6527f9 100644 --- a/drivers/net/ethernet/hisilicon/hisi_femac.c +++ b/drivers/net/ethernet/hisilicon/hisi_femac.c @@ -427,7 +427,7 @@ static void hisi_femac_free_skb_rings(struct hisi_femac_priv *priv) } static int hisi_femac_set_hw_mac_addr(struct hisi_femac_priv *priv, - unsigned char *mac) + const unsigned char *mac) { u32 reg; diff --git a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c index 5f7ccdc834b7..d7e62eca050f 100644 --- a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c +++ b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c @@ -429,7 +429,7 @@ static void hix5hd2_port_disable(struct hix5hd2_priv *priv) static void hix5hd2_hw_set_mac_addr(struct net_device *dev) { struct hix5hd2_priv *priv = netdev_priv(dev); - unsigned char *mac = dev->dev_addr; + const unsigned char *mac = dev->dev_addr; u32 val; val = mac[1] | (mac[0] << 8); diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.h b/drivers/net/ethernet/hisilicon/hns/hnae.h index d46e8f999019..d72657444ef3 100644 --- a/drivers/net/ethernet/hisilicon/hns/hnae.h +++ b/drivers/net/ethernet/hisilicon/hns/hnae.h @@ -499,7 +499,7 @@ struct hnae_ae_ops { u32 *tx_usecs_high, u32 *rx_usecs_high); void (*set_promisc_mode)(struct hnae_handle *handle, u32 en); int (*get_mac_addr)(struct hnae_handle *handle, void **p); - int (*set_mac_addr)(struct hnae_handle *handle, void *p); + int (*set_mac_addr)(struct hnae_handle *handle, const void *p); int (*add_uc_addr)(struct hnae_handle *handle, const unsigned char *addr); int (*rm_uc_addr)(struct hnae_handle *handle, diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c index e81116ad9bdf..bc3e406f0139 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c @@ -206,7 +206,7 @@ static void hns_ae_fini_queue(struct hnae_queue *q) hns_rcb_reset_ring_hw(q); } -static int hns_ae_set_mac_address(struct hnae_handle *handle, void *p) +static int hns_ae_set_mac_address(struct hnae_handle *handle, const void *p) { int ret; struct hns_mac_cb *mac_cb = hns_get_mac_cb(handle); diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c index f387a859a201..8f391e2adcc0 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c @@ -450,7 +450,7 @@ static void hns_gmac_update_stats(void *mac_drv) += dsaf_read_dev(drv, GMAC_TX_PAUSE_FRAMES_REG); } -static void hns_gmac_set_mac_addr(void *mac_drv, char *mac_addr) +static void hns_gmac_set_mac_addr(void *mac_drv, const char *mac_addr) { struct mac_driver *drv = (struct mac_driver *)mac_drv; diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c index f41379de2186..7edf8569514c 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c @@ -240,7 +240,7 @@ int hns_mac_get_inner_port_num(struct hns_mac_cb *mac_cb, u8 vmid, u8 *port_num) *@addr:mac address */ int hns_mac_change_vf_addr(struct hns_mac_cb *mac_cb, - u32 vmid, char *addr) + u32 vmid, const char *addr) { int ret; struct mac_driver *mac_ctrl_drv = hns_mac_get_drv(mac_cb); diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h index 8943ffab4418..e3bb05959ba9 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h @@ -348,7 +348,7 @@ struct mac_driver { /*disable mac when disable nic or dsaf*/ void (*mac_disable)(void *mac_drv, enum mac_commom_mode mode); /* config mac address*/ - void (*set_mac_addr)(void *mac_drv, char *mac_addr); + void (*set_mac_addr)(void *mac_drv, const char *mac_addr); /*adjust mac mode of port,include speed and duplex*/ int (*adjust_link)(void *mac_drv, enum mac_speed speed, u32 full_duplex); @@ -425,7 +425,8 @@ int hns_mac_init(struct dsaf_device *dsaf_dev); void mac_adjust_link(struct net_device *net_dev); bool hns_mac_need_adjust_link(struct hns_mac_cb *mac_cb, int speed, int duplex); void hns_mac_get_link_status(struct hns_mac_cb *mac_cb, u32 *link_status); -int hns_mac_change_vf_addr(struct hns_mac_cb *mac_cb, u32 vmid, char *addr); +int hns_mac_change_vf_addr(struct hns_mac_cb *mac_cb, u32 vmid, + const char *addr); int hns_mac_set_multi(struct hns_mac_cb *mac_cb, u32 port_num, char *addr, bool enable); int hns_mac_vm_config_bc_en(struct hns_mac_cb *mac_cb, u32 vm, bool enable); diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c index 401fef5f1d07..fc26ffaae620 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c @@ -255,7 +255,7 @@ static void hns_xgmac_pausefrm_cfg(void *mac_drv, u32 rx_en, u32 tx_en) dsaf_write_dev(drv, XGMAC_MAC_PAUSE_CTRL_REG, origin); } -static void hns_xgmac_set_pausefrm_mac_addr(void *mac_drv, char *mac_addr) +static void hns_xgmac_set_pausefrm_mac_addr(void *mac_drv, const char *mac_addr) { struct mac_driver *drv = (struct mac_driver *)mac_drv; diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h index 5d188573c433..98d63e804903 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h @@ -595,7 +595,7 @@ struct hnae3_ae_ops { u32 *tx_usecs_high, u32 *rx_usecs_high); void (*get_mac_addr)(struct hnae3_handle *handle, u8 *p); - int (*set_mac_addr)(struct hnae3_handle *handle, void *p, + int (*set_mac_addr)(struct hnae3_handle *handle, const void *p, bool is_first); int (*do_ioctl)(struct hnae3_handle *handle, struct ifreq *ifr, int cmd); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index cd981b33d4ff..7bb34af3981e 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -9442,7 +9442,7 @@ int hclge_update_mac_node_for_dev_addr(struct hclge_vport *vport, return 0; } -static int hclge_set_mac_addr(struct hnae3_handle *handle, void *p, +static int hclge_set_mac_addr(struct hnae3_handle *handle, const void *p, bool is_first) { const unsigned char *new_addr = (const unsigned char *)p; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 5fdac8685f95..2e6dcf75fba6 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -1349,7 +1349,7 @@ static void hclgevf_get_mac_addr(struct hnae3_handle *handle, u8 *p) ether_addr_copy(p, hdev->hw.mac.mac_addr); } -static int hclgevf_set_mac_addr(struct hnae3_handle *handle, void *p, +static int hclgevf_set_mac_addr(struct hnae3_handle *handle, const void *p, bool is_first) { struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle); diff --git a/drivers/net/ethernet/i825xx/sun3_82586.c b/drivers/net/ethernet/i825xx/sun3_82586.c index 0696f723228a..18d32302c3c7 100644 --- a/drivers/net/ethernet/i825xx/sun3_82586.c +++ b/drivers/net/ethernet/i825xx/sun3_82586.c @@ -461,7 +461,7 @@ static int init586(struct net_device *dev) ias_cmd->cmd_cmd = swab16(CMD_IASETUP | CMD_LAST); ias_cmd->cmd_link = 0xffff; - memcpy((char *)&ias_cmd->iaddr,(char *) dev->dev_addr,ETH_ALEN); + memcpy((char *)&ias_cmd->iaddr,(const char *) dev->dev_addr,ETH_ALEN); p->scb->cbl_offset = make16(ias_cmd); diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 39fb3d57c057..3d528fba754b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -435,7 +435,7 @@ static inline bool i40e_is_channel_macvlan(struct i40e_channel *ch) return !!ch->fwd; } -static inline u8 *i40e_channel_mac(struct i40e_channel *ch) +static inline const u8 *i40e_channel_mac(struct i40e_channel *ch) { if (i40e_is_channel_macvlan(ch)) return ch->fwd->netdev->dev_addr; diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_hw.c b/drivers/net/ethernet/intel/ixgb/ixgb_hw.c index a430871d1c27..c8d1e815ec6b 100644 --- a/drivers/net/ethernet/intel/ixgb/ixgb_hw.c +++ b/drivers/net/ethernet/intel/ixgb/ixgb_hw.c @@ -549,7 +549,7 @@ ixgb_mta_set(struct ixgb_hw *hw, *****************************************************************************/ void ixgb_rar_set(struct ixgb_hw *hw, - u8 *addr, + const u8 *addr, u32 index) { u32 rar_low, rar_high; diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_hw.h b/drivers/net/ethernet/intel/ixgb/ixgb_hw.h index 6064583095da..70bcff5fb3db 100644 --- a/drivers/net/ethernet/intel/ixgb/ixgb_hw.h +++ b/drivers/net/ethernet/intel/ixgb/ixgb_hw.h @@ -740,7 +740,7 @@ bool ixgb_adapter_start(struct ixgb_hw *hw); void ixgb_check_for_link(struct ixgb_hw *hw); bool ixgb_check_for_bad_link(struct ixgb_hw *hw); -void ixgb_rar_set(struct ixgb_hw *hw, u8 *addr, u32 index); +void ixgb_rar_set(struct ixgb_hw *hw, const u8 *addr, u32 index); /* Filters (multicast, vlan, receive) */ void ixgb_mc_addr_list_update(struct ixgb_hw *hw, u8 *mc_addr_list, diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c index 654ec25e6705..a63d9a5c8059 100644 --- a/drivers/net/ethernet/marvell/mv643xx_eth.c +++ b/drivers/net/ethernet/marvell/mv643xx_eth.c @@ -1770,7 +1770,7 @@ static void uc_addr_get(struct mv643xx_eth_private *mp, unsigned char *addr) addr[5] = mac_l & 0xff; } -static void uc_addr_set(struct mv643xx_eth_private *mp, unsigned char *addr) +static void uc_addr_set(struct mv643xx_eth_private *mp, const u8 *addr) { wrlp(mp, MAC_ADDR_HIGH, (addr[0] << 24) | (addr[1] << 16) | (addr[2] << 8) | addr[3]); diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 761155af25d8..e2ce84ecb7a6 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -1623,8 +1623,8 @@ static void mvneta_set_ucast_addr(struct mvneta_port *pp, u8 last_nibble, } /* Set mac address */ -static void mvneta_mac_addr_set(struct mvneta_port *pp, unsigned char *addr, - int queue) +static void mvneta_mac_addr_set(struct mvneta_port *pp, + const unsigned char *addr, int queue) { unsigned int mac_h; unsigned int mac_l; diff --git a/drivers/net/ethernet/marvell/pxa168_eth.c b/drivers/net/ethernet/marvell/pxa168_eth.c index 898b513f74e0..bb5341020803 100644 --- a/drivers/net/ethernet/marvell/pxa168_eth.c +++ b/drivers/net/ethernet/marvell/pxa168_eth.c @@ -389,7 +389,7 @@ static void inverse_every_nibble(unsigned char *mac_addr) * Outputs * return the calculated entry. */ -static u32 hash_function(unsigned char *mac_addr_orig) +static u32 hash_function(const unsigned char *mac_addr_orig) { u32 hash_result; u32 addr0; @@ -434,7 +434,7 @@ static u32 hash_function(unsigned char *mac_addr_orig) * -ENOSPC if table full */ static int add_del_hash_entry(struct pxa168_eth_private *pep, - unsigned char *mac_addr, + const unsigned char *mac_addr, u32 rd, u32 skip, int del) { struct addr_table_entry *entry, *start; @@ -521,7 +521,7 @@ static int add_del_hash_entry(struct pxa168_eth_private *pep, */ static void update_hash_table_mac_address(struct pxa168_eth_private *pep, unsigned char *oaddr, - unsigned char *addr) + const unsigned char *addr) { /* Delete old entry */ if (oaddr) diff --git a/drivers/net/ethernet/mediatek/mtk_star_emac.c b/drivers/net/ethernet/mediatek/mtk_star_emac.c index e2ebfd8115a0..89ca7960b225 100644 --- a/drivers/net/ethernet/mediatek/mtk_star_emac.c +++ b/drivers/net/ethernet/mediatek/mtk_star_emac.c @@ -523,7 +523,7 @@ static void mtk_star_dma_resume_tx(struct mtk_star_priv *priv) static void mtk_star_set_mac_addr(struct net_device *ndev) { struct mtk_star_priv *priv = netdev_priv(ndev); - u8 *mac_addr = ndev->dev_addr; + const u8 *mac_addr = ndev->dev_addr; unsigned int high, low; high = mac_addr[0] << 8 | mac_addr[1] << 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index c06b4b938ae7..73a377c8a2da 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -71,12 +71,12 @@ struct mlx5e_l2_hash_node { bool mpfs; }; -static inline int mlx5e_hash_l2(u8 *addr) +static inline int mlx5e_hash_l2(const u8 *addr) { return addr[5]; } -static void mlx5e_add_l2_to_hash(struct hlist_head *hash, u8 *addr) +static void mlx5e_add_l2_to_hash(struct hlist_head *hash, const u8 *addr) { struct mlx5e_l2_hash_node *hn; int ix = mlx5e_hash_l2(addr); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index 269ebb53eda6..f7ebc1f9283f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -219,7 +219,7 @@ void mlx5i_uninit_underlay_qp(struct mlx5e_priv *priv) int mlx5i_create_underlay_qp(struct mlx5e_priv *priv) { - unsigned char *dev_addr = priv->netdev->dev_addr; + const unsigned char *dev_addr = priv->netdev->dev_addr; u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {}; u32 in[MLX5_ST_SZ_DW(create_qp_in)] = {}; struct mlx5i_priv *ipriv = priv->ppriv; diff --git a/drivers/net/ethernet/micrel/ks8842.c b/drivers/net/ethernet/micrel/ks8842.c index 37ccf8c570b5..0f2cdcd4a4c0 100644 --- a/drivers/net/ethernet/micrel/ks8842.c +++ b/drivers/net/ethernet/micrel/ks8842.c @@ -380,7 +380,7 @@ static void ks8842_read_mac_addr(struct ks8842_adapter *adapter, u8 *dest) } } -static void ks8842_write_mac_addr(struct ks8842_adapter *adapter, u8 *mac) +static void ks8842_write_mac_addr(struct ks8842_adapter *adapter, const u8 *mac) { unsigned long flags; unsigned i; diff --git a/drivers/net/ethernet/micrel/ksz884x.c b/drivers/net/ethernet/micrel/ksz884x.c index a1f7f45b9d08..03ad8bdc1f0d 100644 --- a/drivers/net/ethernet/micrel/ksz884x.c +++ b/drivers/net/ethernet/micrel/ksz884x.c @@ -4033,7 +4033,7 @@ static void hw_set_add_addr(struct ksz_hw *hw) } } -static int hw_add_addr(struct ksz_hw *hw, u8 *mac_addr) +static int hw_add_addr(struct ksz_hw *hw, const u8 *mac_addr) { int i; int j = ADDITIONAL_ENTRIES; @@ -4054,7 +4054,7 @@ static int hw_add_addr(struct ksz_hw *hw, u8 *mac_addr) return -1; } -static int hw_del_addr(struct ksz_hw *hw, u8 *mac_addr) +static int hw_del_addr(struct ksz_hw *hw, const u8 *mac_addr) { int i; diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c index 32760f87bf8a..5ae59b1e5b48 100644 --- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c +++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c @@ -796,7 +796,8 @@ static int myri10ge_load_firmware(struct myri10ge_priv *mgp, int adopt) return status; } -static int myri10ge_update_mac_address(struct myri10ge_priv *mgp, u8 * addr) +static int myri10ge_update_mac_address(struct myri10ge_priv *mgp, + const u8 * addr) { struct myri10ge_cmd cmd; int status; diff --git a/drivers/net/ethernet/neterion/s2io.c b/drivers/net/ethernet/neterion/s2io.c index 5454c1c2f8ad..ea0b2f3fd9c6 100644 --- a/drivers/net/ethernet/neterion/s2io.c +++ b/drivers/net/ethernet/neterion/s2io.c @@ -5217,7 +5217,7 @@ static int s2io_set_mac_addr(struct net_device *dev, void *p) * as defined in errno.h file on failure. */ -static int do_s2io_prog_unicast(struct net_device *dev, u8 *addr) +static int do_s2io_prog_unicast(struct net_device *dev, const u8 *addr) { struct s2io_nic *sp = netdev_priv(dev); register u64 mac_addr = 0, perm_addr = 0; diff --git a/drivers/net/ethernet/neterion/s2io.h b/drivers/net/ethernet/neterion/s2io.h index 5a6032212c19..a4266d1544ab 100644 --- a/drivers/net/ethernet/neterion/s2io.h +++ b/drivers/net/ethernet/neterion/s2io.h @@ -1073,7 +1073,7 @@ static void s2io_reset(struct s2io_nic * sp); static int s2io_poll_msix(struct napi_struct *napi, int budget); static int s2io_poll_inta(struct napi_struct *napi, int budget); static void s2io_init_pci(struct s2io_nic * sp); -static int do_s2io_prog_unicast(struct net_device *dev, u8 *addr); +static int do_s2io_prog_unicast(struct net_device *dev, const u8 *addr); static void s2io_alarm_handle(struct timer_list *t); static irqreturn_t s2io_msix_ring_handle(int irq, void *dev_id); diff --git a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c index ab70179728f6..dfb4468fe287 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c +++ b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c @@ -837,7 +837,7 @@ nfp_tunnel_put_ipv6_off(struct nfp_app *app, struct nfp_ipv6_addr_entry *entry) } static int -__nfp_tunnel_offload_mac(struct nfp_app *app, u8 *mac, u16 idx, bool del) +__nfp_tunnel_offload_mac(struct nfp_app *app, const u8 *mac, u16 idx, bool del) { struct nfp_tun_mac_addr_offload payload; @@ -886,7 +886,7 @@ static bool nfp_tunnel_is_mac_idx_global(u16 nfp_mac_idx) } static struct nfp_tun_offloaded_mac * -nfp_tunnel_lookup_offloaded_macs(struct nfp_app *app, u8 *mac) +nfp_tunnel_lookup_offloaded_macs(struct nfp_app *app, const u8 *mac) { struct nfp_flower_priv *priv = app->priv; @@ -1005,7 +1005,7 @@ err_free_ida: static int nfp_tunnel_del_shared_mac(struct nfp_app *app, struct net_device *netdev, - u8 *mac, bool mod) + const u8 *mac, bool mod) { struct nfp_flower_priv *priv = app->priv; struct nfp_flower_repr_priv *repr_priv; diff --git a/drivers/net/ethernet/nxp/lpc_eth.c b/drivers/net/ethernet/nxp/lpc_eth.c index 43fd569aa5f1..fbfbf94e0377 100644 --- a/drivers/net/ethernet/nxp/lpc_eth.c +++ b/drivers/net/ethernet/nxp/lpc_eth.c @@ -419,7 +419,7 @@ struct netdata_local { /* * MAC support functions */ -static void __lpc_set_mac(struct netdata_local *pldat, u8 *mac) +static void __lpc_set_mac(struct netdata_local *pldat, const u8 *mac) { u32 tmp; diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c index 6823016ab5a3..18f3bf7c4dfe 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c @@ -952,7 +952,7 @@ qed_llh_remove_filter(struct qed_hwfn *p_hwfn, } int qed_llh_add_mac_filter(struct qed_dev *cdev, - u8 ppfid, u8 mac_addr[ETH_ALEN]) + u8 ppfid, const u8 mac_addr[ETH_ALEN]) { struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev); struct qed_ptt *p_ptt = qed_ptt_acquire(p_hwfn); diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev_api.h b/drivers/net/ethernet/qlogic/qed/qed_dev_api.h index 6582bfc1b4a9..f8682356d0cf 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev_api.h +++ b/drivers/net/ethernet/qlogic/qed/qed_dev_api.h @@ -364,7 +364,7 @@ int qed_llh_set_roce_affinity(struct qed_dev *cdev, enum qed_eng eng); * Return: Int. */ int qed_llh_add_mac_filter(struct qed_dev *cdev, - u8 ppfid, u8 mac_addr[ETH_ALEN]); + u8 ppfid, const u8 mac_addr[ETH_ALEN]); /** * qed_llh_remove_mac_filter(): Remove a LLH MAC filter from the given diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c index a116fbc59725..2edd6bf64a3c 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c @@ -2848,7 +2848,7 @@ static int qed_fp_cqe_completion(struct qed_dev *dev, cqe); } -static int qed_req_bulletin_update_mac(struct qed_dev *cdev, u8 *mac) +static int qed_req_bulletin_update_mac(struct qed_dev *cdev, const u8 *mac) { int i, ret; diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index 5e7242304ee2..29f9711bf438 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -2887,7 +2887,7 @@ static int qed_update_drv_state(struct qed_dev *cdev, bool active) return status; } -static int qed_update_mac(struct qed_dev *cdev, u8 *mac) +static int qed_update_mac(struct qed_dev *cdev, const u8 *mac) { struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev); struct qed_ptt *ptt; diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index 11a52a4a673b..64678a256f3b 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -2851,7 +2851,7 @@ int qed_mcp_ov_update_mtu(struct qed_hwfn *p_hwfn, } int qed_mcp_ov_update_mac(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt, u8 *mac) + struct qed_ptt *p_ptt, const u8 *mac) { struct qed_mcp_mb_params mb_params; u32 mfw_mac[2]; diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h index 9e1de1191bd4..564723800d15 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h @@ -536,7 +536,7 @@ int qed_mcp_ov_update_mtu(struct qed_hwfn *p_hwfn, * Return: Int - 0 - Operation was successul. */ int qed_mcp_ov_update_mac(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt, u8 *mac); + struct qed_ptt *p_ptt, const u8 *mac); /** * qed_mcp_ov_update_wol(): Send WOL mode to MFW. diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.c b/drivers/net/ethernet/qlogic/qed/qed_rdma.c index fe0bb11d0e43..7f3e84b8622d 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_rdma.c +++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.c @@ -1965,7 +1965,7 @@ static void qed_rdma_remove_user(void *rdma_cxt, u16 dpi) static int qed_roce_ll2_set_mac_filter(struct qed_dev *cdev, u8 *old_mac_address, - u8 *new_mac_address) + const u8 *new_mac_address) { int rc = 0; diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.c b/drivers/net/ethernet/qlogic/qed/qed_vf.c index 220a95fa96e1..597cd9cd57b5 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_vf.c +++ b/drivers/net/ethernet/qlogic/qed/qed_vf.c @@ -1373,7 +1373,7 @@ exit: int qed_vf_pf_bulletin_update_mac(struct qed_hwfn *p_hwfn, - u8 *p_mac) + const u8 *p_mac) { struct qed_vf_iov *p_iov = p_hwfn->vf_iov_info; struct vfpf_bulletin_update_mac_tlv *p_req; diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.h b/drivers/net/ethernet/qlogic/qed/qed_vf.h index 8718760443be..306b5f4bc632 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_vf.h +++ b/drivers/net/ethernet/qlogic/qed/qed_vf.h @@ -1070,7 +1070,7 @@ u32 qed_vf_hw_bar_size(struct qed_hwfn *p_hwfn, enum BAR_ID bar_id); * * Return: Int. */ -int qed_vf_pf_bulletin_update_mac(struct qed_hwfn *p_hwfn, u8 *p_mac); +int qed_vf_pf_bulletin_update_mac(struct qed_hwfn *p_hwfn, const u8 *p_mac); #else static inline void qed_vf_get_link_params(struct qed_hwfn *p_hwfn, @@ -1259,7 +1259,7 @@ static inline int qed_vf_pf_tunnel_param_update(struct qed_hwfn *p_hwfn, } static inline int qed_vf_pf_bulletin_update_mac(struct qed_hwfn *p_hwfn, - u8 *p_mac) + const u8 *p_mac) { return -EINVAL; } diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c index 03c51dd37e1f..3010833ddde3 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_filter.c +++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c @@ -617,7 +617,7 @@ void qede_fill_rss_params(struct qede_dev *edev, static int qede_set_ucast_rx_mac(struct qede_dev *edev, enum qed_filter_xcast_params_type opcode, - unsigned char mac[ETH_ALEN]) + const unsigned char mac[ETH_ALEN]) { struct qed_filter_ucast_params ucast; diff --git a/drivers/net/ethernet/qualcomm/emac/emac-mac.c b/drivers/net/ethernet/qualcomm/emac/emac-mac.c index 87b8c032195d..06104d2ff5b3 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac-mac.c +++ b/drivers/net/ethernet/qualcomm/emac/emac-mac.c @@ -420,7 +420,7 @@ static void emac_mac_dma_config(struct emac_adapter *adpt) } /* set MAC address */ -static void emac_set_mac_address(struct emac_adapter *adpt, u8 *addr) +static void emac_set_mac_address(struct emac_adapter *adpt, const u8 *addr) { u32 sta; diff --git a/drivers/net/ethernet/rdc/r6040.c b/drivers/net/ethernet/rdc/r6040.c index 01ef5efd7bc2..a8f282c43a78 100644 --- a/drivers/net/ethernet/rdc/r6040.c +++ b/drivers/net/ethernet/rdc/r6040.c @@ -453,7 +453,7 @@ static void r6040_down(struct net_device *dev) { struct r6040_private *lp = netdev_priv(dev); void __iomem *ioaddr = lp->base; - u16 *adrp; + const u16 *adrp; /* Stop MAC */ iowrite16(MSK_INT, ioaddr + MIER); /* Mask Off Interrupt */ @@ -462,7 +462,7 @@ static void r6040_down(struct net_device *dev) r6040_reset_mac(lp); /* Restore MAC Address to MIDx */ - adrp = (u16 *) dev->dev_addr; + adrp = (const u16 *) dev->dev_addr; iowrite16(adrp[0], ioaddr + MID_0L); iowrite16(adrp[1], ioaddr + MID_0M); iowrite16(adrp[2], ioaddr + MID_0H); @@ -731,13 +731,13 @@ static void r6040_mac_address(struct net_device *dev) { struct r6040_private *lp = netdev_priv(dev); void __iomem *ioaddr = lp->base; - u16 *adrp; + const u16 *adrp; /* Reset MAC */ r6040_reset_mac(lp); /* Restore MAC Address */ - adrp = (u16 *) dev->dev_addr; + adrp = (const u16 *) dev->dev_addr; iowrite16(adrp[0], ioaddr + MID_0L); iowrite16(adrp[1], ioaddr + MID_0M); iowrite16(adrp[2], ioaddr + MID_0H); @@ -849,13 +849,13 @@ static void r6040_multicast_list(struct net_device *dev) unsigned long flags; struct netdev_hw_addr *ha; int i; - u16 *adrp; + const u16 *adrp; u16 hash_table[4] = { 0 }; spin_lock_irqsave(&lp->lock, flags); /* Keep our MAC Address */ - adrp = (u16 *)dev->dev_addr; + adrp = (const u16 *)dev->dev_addr; iowrite16(adrp[0], ioaddr + MID_0L); iowrite16(adrp[1], ioaddr + MID_0M); iowrite16(adrp[2], ioaddr + MID_0H); diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_common.h b/drivers/net/ethernet/samsung/sxgbe/sxgbe_common.h index 049dc6cf4611..0f45107db8dd 100644 --- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_common.h +++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_common.h @@ -329,7 +329,7 @@ struct sxgbe_core_ops { /* Set power management mode (e.g. magic frame) */ void (*pmt)(void __iomem *ioaddr, unsigned long mode); /* Set/Get Unicast MAC addresses */ - void (*set_umac_addr)(void __iomem *ioaddr, unsigned char *addr, + void (*set_umac_addr)(void __iomem *ioaddr, const unsigned char *addr, unsigned int reg_n); void (*get_umac_addr)(void __iomem *ioaddr, unsigned char *addr, unsigned int reg_n); diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_core.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_core.c index e96e2bd295ef..7d9f257de92a 100644 --- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_core.c +++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_core.c @@ -85,7 +85,8 @@ static void sxgbe_core_pmt(void __iomem *ioaddr, unsigned long mode) } /* Set/Get Unicast MAC addresses */ -static void sxgbe_core_set_umac_addr(void __iomem *ioaddr, unsigned char *addr, +static void sxgbe_core_set_umac_addr(void __iomem *ioaddr, + const unsigned char *addr, unsigned int reg_n) { u32 high_word, low_word; diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index e7e2223aebbf..cf366ed2557c 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -1038,7 +1038,7 @@ int efx_ef10_vadaptor_free(struct efx_nic *efx, unsigned int port_id) } int efx_ef10_vport_add_mac(struct efx_nic *efx, - unsigned int port_id, u8 *mac) + unsigned int port_id, const u8 *mac) { MCDI_DECLARE_BUF(inbuf, MC_CMD_VPORT_ADD_MAC_ADDRESS_IN_LEN); @@ -1050,7 +1050,7 @@ int efx_ef10_vport_add_mac(struct efx_nic *efx, } int efx_ef10_vport_del_mac(struct efx_nic *efx, - unsigned int port_id, u8 *mac) + unsigned int port_id, const u8 *mac) { MCDI_DECLARE_BUF(inbuf, MC_CMD_VPORT_DEL_MAC_ADDRESS_IN_LEN); diff --git a/drivers/net/ethernet/sfc/ef10_sriov.c b/drivers/net/ethernet/sfc/ef10_sriov.c index 06d23c708a5f..7f5aa4a8c451 100644 --- a/drivers/net/ethernet/sfc/ef10_sriov.c +++ b/drivers/net/ethernet/sfc/ef10_sriov.c @@ -480,7 +480,7 @@ static int efx_ef10_vport_del_vf_mac(struct efx_nic *efx, unsigned int port_id, return rc; } -int efx_ef10_sriov_set_vf_mac(struct efx_nic *efx, int vf_i, u8 *mac) +int efx_ef10_sriov_set_vf_mac(struct efx_nic *efx, int vf_i, const u8 *mac) { struct efx_ef10_nic_data *nic_data = efx->nic_data; struct ef10_vf *vf; diff --git a/drivers/net/ethernet/sfc/ef10_sriov.h b/drivers/net/ethernet/sfc/ef10_sriov.h index cfe556d17313..3c703ca878b0 100644 --- a/drivers/net/ethernet/sfc/ef10_sriov.h +++ b/drivers/net/ethernet/sfc/ef10_sriov.h @@ -39,7 +39,7 @@ static inline void efx_ef10_sriov_reset(struct efx_nic *efx) {} void efx_ef10_sriov_fini(struct efx_nic *efx); static inline void efx_ef10_sriov_flr(struct efx_nic *efx, unsigned vf_i) {} -int efx_ef10_sriov_set_vf_mac(struct efx_nic *efx, int vf, u8 *mac); +int efx_ef10_sriov_set_vf_mac(struct efx_nic *efx, int vf, const u8 *mac); int efx_ef10_sriov_set_vf_vlan(struct efx_nic *efx, int vf_i, u16 vlan, u8 qos); @@ -60,9 +60,9 @@ int efx_ef10_vswitching_restore_vf(struct efx_nic *efx); void efx_ef10_vswitching_remove_pf(struct efx_nic *efx); void efx_ef10_vswitching_remove_vf(struct efx_nic *efx); int efx_ef10_vport_add_mac(struct efx_nic *efx, - unsigned int port_id, u8 *mac); + unsigned int port_id, const u8 *mac); int efx_ef10_vport_del_mac(struct efx_nic *efx, - unsigned int port_id, u8 *mac); + unsigned int port_id, const u8 *mac); int efx_ef10_vadaptor_alloc(struct efx_nic *efx, unsigned int port_id); int efx_ef10_vadaptor_query(struct efx_nic *efx, unsigned int port_id, u32 *port_flags, u32 *vadaptor_flags, diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index f6981810039d..cc15ee8812d9 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -1440,7 +1440,7 @@ struct efx_nic_type { bool (*sriov_wanted)(struct efx_nic *efx); void (*sriov_reset)(struct efx_nic *efx); void (*sriov_flr)(struct efx_nic *efx, unsigned vf_i); - int (*sriov_set_vf_mac)(struct efx_nic *efx, int vf_i, u8 *mac); + int (*sriov_set_vf_mac)(struct efx_nic *efx, int vf_i, const u8 *mac); int (*sriov_set_vf_vlan)(struct efx_nic *efx, int vf_i, u16 vlan, u8 qos); int (*sriov_set_vf_spoofchk)(struct efx_nic *efx, int vf_i, diff --git a/drivers/net/ethernet/sfc/siena_sriov.c b/drivers/net/ethernet/sfc/siena_sriov.c index 83dcfcae3d4b..e9095cf06368 100644 --- a/drivers/net/ethernet/sfc/siena_sriov.c +++ b/drivers/net/ethernet/sfc/siena_sriov.c @@ -1591,7 +1591,7 @@ void efx_fini_sriov(void) destroy_workqueue(vfdi_workqueue); } -int efx_siena_sriov_set_vf_mac(struct efx_nic *efx, int vf_i, u8 *mac) +int efx_siena_sriov_set_vf_mac(struct efx_nic *efx, int vf_i, const u8 *mac) { struct siena_nic_data *nic_data = efx->nic_data; struct siena_vf *vf; diff --git a/drivers/net/ethernet/sfc/siena_sriov.h b/drivers/net/ethernet/sfc/siena_sriov.h index e441c89c25ce..e548c4daf189 100644 --- a/drivers/net/ethernet/sfc/siena_sriov.h +++ b/drivers/net/ethernet/sfc/siena_sriov.h @@ -46,7 +46,7 @@ bool efx_siena_sriov_wanted(struct efx_nic *efx); void efx_siena_sriov_reset(struct efx_nic *efx); void efx_siena_sriov_flr(struct efx_nic *efx, unsigned flr); -int efx_siena_sriov_set_vf_mac(struct efx_nic *efx, int vf, u8 *mac); +int efx_siena_sriov_set_vf_mac(struct efx_nic *efx, int vf, const u8 *mac); int efx_siena_sriov_set_vf_vlan(struct efx_nic *efx, int vf, u16 vlan, u8 qos); int efx_siena_sriov_set_vf_spoofchk(struct efx_nic *efx, int vf, diff --git a/drivers/net/ethernet/sis/sis900.c b/drivers/net/ethernet/sis/sis900.c index 60a0c0e9ded2..d105779ba3b2 100644 --- a/drivers/net/ethernet/sis/sis900.c +++ b/drivers/net/ethernet/sis/sis900.c @@ -1098,7 +1098,7 @@ sis900_init_rxfilter (struct net_device * net_dev) /* load MAC addr to filter data register */ for (i = 0 ; i < 3 ; i++) { - u32 w = (u32) *((u16 *)(net_dev->dev_addr)+i); + u32 w = (u32) *((const u16 *)(net_dev->dev_addr)+i); sw32(rfcr, i << RFADDR_shift); sw32(rfdr, w); diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c index fa387510c189..73bcc6f2bb6e 100644 --- a/drivers/net/ethernet/smsc/smsc911x.c +++ b/drivers/net/ethernet/smsc/smsc911x.c @@ -1503,7 +1503,7 @@ static int smsc911x_soft_reset(struct smsc911x_data *pdata) /* Sets the device MAC address to dev_addr, called with mac_lock held */ static void -smsc911x_set_hw_mac_address(struct smsc911x_data *pdata, u8 dev_addr[6]) +smsc911x_set_hw_mac_address(struct smsc911x_data *pdata, const u8 dev_addr[6]) { u32 mac_high16 = (dev_addr[5] << 8) | dev_addr[4]; u32 mac_low32 = (dev_addr[3] << 24) | (dev_addr[2] << 16) | diff --git a/drivers/net/ethernet/smsc/smsc9420.c b/drivers/net/ethernet/smsc/smsc9420.c index 3d1176588f7d..d207c0b463ab 100644 --- a/drivers/net/ethernet/smsc/smsc9420.c +++ b/drivers/net/ethernet/smsc/smsc9420.c @@ -404,7 +404,7 @@ static const struct ethtool_ops smsc9420_ethtool_ops = { static void smsc9420_set_mac_address(struct net_device *dev) { struct smsc9420_pdata *pd = netdev_priv(dev); - u8 *dev_addr = dev->dev_addr; + const u8 *dev_addr = dev->dev_addr; u32 mac_high16 = (dev_addr[5] << 8) | dev_addr[4]; u32 mac_low32 = (dev_addr[3] << 24) | (dev_addr[2] << 16) | (dev_addr[1] << 8) | dev_addr[0]; diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index b6d945ea903d..9160f9ed363a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -546,13 +546,13 @@ int dwmac4_setup(struct stmmac_priv *priv); int dwxgmac2_setup(struct stmmac_priv *priv); int dwxlgmac2_setup(struct stmmac_priv *priv); -void stmmac_set_mac_addr(void __iomem *ioaddr, u8 addr[6], +void stmmac_set_mac_addr(void __iomem *ioaddr, const u8 addr[6], unsigned int high, unsigned int low); void stmmac_get_mac_addr(void __iomem *ioaddr, unsigned char *addr, unsigned int high, unsigned int low); void stmmac_set_mac(void __iomem *ioaddr, bool enable); -void stmmac_dwmac4_set_mac_addr(void __iomem *ioaddr, u8 addr[6], +void stmmac_dwmac4_set_mac_addr(void __iomem *ioaddr, const u8 addr[6], unsigned int high, unsigned int low); void stmmac_dwmac4_get_mac_addr(void __iomem *ioaddr, unsigned char *addr, unsigned int high, unsigned int low); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c index 4422baeed3d8..617d0e4c6495 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c @@ -634,7 +634,7 @@ static void sun8i_dwmac_set_mac(void __iomem *ioaddr, bool enable) * If addr is NULL, clear the slot */ static void sun8i_dwmac_set_umac_addr(struct mac_device_info *hw, - unsigned char *addr, + const unsigned char *addr, unsigned int reg_n) { void __iomem *ioaddr = hw->pcsr; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c index fc8759f146c7..76edb9b72675 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c @@ -104,7 +104,7 @@ static void dwmac1000_dump_regs(struct mac_device_info *hw, u32 *reg_space) } static void dwmac1000_set_umac_addr(struct mac_device_info *hw, - unsigned char *addr, + const unsigned char *addr, unsigned int reg_n) { void __iomem *ioaddr = hw->pcsr; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c index ebcad8dd99db..75071a7d551a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c @@ -68,7 +68,7 @@ static int dwmac100_irq_status(struct mac_device_info *hw, } static void dwmac100_set_umac_addr(struct mac_device_info *hw, - unsigned char *addr, + const unsigned char *addr, unsigned int reg_n) { void __iomem *ioaddr = hw->pcsr; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index b21745368983..fd41db65fe1d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -322,7 +322,7 @@ static void dwmac4_pmt(struct mac_device_info *hw, unsigned long mode) } static void dwmac4_set_umac_addr(struct mac_device_info *hw, - unsigned char *addr, unsigned int reg_n) + const unsigned char *addr, unsigned int reg_n) { void __iomem *ioaddr = hw->pcsr; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c index 9292a1fab7d3..d1c605777985 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c @@ -187,7 +187,7 @@ int dwmac4_dma_interrupt(void __iomem *ioaddr, return ret; } -void stmmac_dwmac4_set_mac_addr(void __iomem *ioaddr, u8 addr[6], +void stmmac_dwmac4_set_mac_addr(void __iomem *ioaddr, const u8 addr[6], unsigned int high, unsigned int low) { unsigned long data; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c index d1c31200bb91..caa4bfc4c1d6 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c @@ -239,7 +239,7 @@ void dwmac_dma_flush_tx_fifo(void __iomem *ioaddr) do {} while ((readl(ioaddr + DMA_CONTROL) & DMA_CONTROL_FTF)); } -void stmmac_set_mac_addr(void __iomem *ioaddr, u8 addr[6], +void stmmac_set_mac_addr(void __iomem *ioaddr, const u8 addr[6], unsigned int high, unsigned int low) { unsigned long data; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c index c4d78fa93663..c6c4d7948fe5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c @@ -335,7 +335,8 @@ static void dwxgmac2_pmt(struct mac_device_info *hw, unsigned long mode) } static void dwxgmac2_set_umac_addr(struct mac_device_info *hw, - unsigned char *addr, unsigned int reg_n) + const unsigned char *addr, + unsigned int reg_n) { void __iomem *ioaddr = hw->pcsr; u32 value; diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h index 6dc1c98ebec8..6cf2c2107197 100644 --- a/drivers/net/ethernet/stmicro/stmmac/hwif.h +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h @@ -330,7 +330,8 @@ struct stmmac_ops { /* Set power management mode (e.g. magic frame) */ void (*pmt)(struct mac_device_info *hw, unsigned long mode); /* Set/Get Unicast MAC addresses */ - void (*set_umac_addr)(struct mac_device_info *hw, unsigned char *addr, + void (*set_umac_addr)(struct mac_device_info *hw, + const unsigned char *addr, unsigned int reg_n); void (*get_umac_addr)(struct mac_device_info *hw, unsigned char *addr, unsigned int reg_n); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c index e649a3e6a529..be3cb63675a5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c @@ -36,7 +36,7 @@ struct stmmac_packet_attrs { int vlan_id_in; int vlan_id_out; unsigned char *src; - unsigned char *dst; + const unsigned char *dst; u32 ip_src; u32 ip_dst; int tcp; @@ -249,8 +249,8 @@ static int stmmac_test_loopback_validate(struct sk_buff *skb, struct net_device *orig_ndev) { struct stmmac_test_priv *tpriv = pt->af_packet_priv; + const unsigned char *dst = tpriv->packet->dst; unsigned char *src = tpriv->packet->src; - unsigned char *dst = tpriv->packet->dst; struct stmmachdr *shdr; struct ethhdr *ehdr; struct udphdr *uhdr; diff --git a/drivers/net/ethernet/sun/sunbmac.c b/drivers/net/ethernet/sun/sunbmac.c index c646575e79d5..d70426670c37 100644 --- a/drivers/net/ethernet/sun/sunbmac.c +++ b/drivers/net/ethernet/sun/sunbmac.c @@ -623,7 +623,7 @@ static int bigmac_init_hw(struct bigmac *bp, bool non_blocking) void __iomem *cregs = bp->creg; void __iomem *bregs = bp->bregs; __u32 bblk_dvma = (__u32)bp->bblock_dvma; - unsigned char *e = &bp->dev->dev_addr[0]; + const unsigned char *e = &bp->dev->dev_addr[0]; /* Latch current counters into statistics. */ bigmac_get_counters(bp, bregs); diff --git a/drivers/net/ethernet/sun/sunqe.c b/drivers/net/ethernet/sun/sunqe.c index 52b1053a0a77..efe0d33f6024 100644 --- a/drivers/net/ethernet/sun/sunqe.c +++ b/drivers/net/ethernet/sun/sunqe.c @@ -144,7 +144,7 @@ static int qe_init(struct sunqe *qep, int from_irq) void __iomem *cregs = qep->qcregs; void __iomem *mregs = qep->mregs; void __iomem *gregs = qecp->gregs; - unsigned char *e = &qep->dev->dev_addr[0]; + const unsigned char *e = &qep->dev->dev_addr[0]; __u32 qblk_dvma = (__u32)qep->qblock_dvma; u32 tmp; int i; diff --git a/drivers/net/ethernet/synopsys/dwc-xlgmac-hw.c b/drivers/net/ethernet/synopsys/dwc-xlgmac-hw.c index bf6c1c6779ff..76eb7db80f13 100644 --- a/drivers/net/ethernet/synopsys/dwc-xlgmac-hw.c +++ b/drivers/net/ethernet/synopsys/dwc-xlgmac-hw.c @@ -57,7 +57,7 @@ static int xlgmac_enable_rx_csum(struct xlgmac_pdata *pdata) return 0; } -static int xlgmac_set_mac_address(struct xlgmac_pdata *pdata, u8 *addr) +static int xlgmac_set_mac_address(struct xlgmac_pdata *pdata, const u8 *addr) { unsigned int mac_addr_hi, mac_addr_lo; diff --git a/drivers/net/ethernet/synopsys/dwc-xlgmac.h b/drivers/net/ethernet/synopsys/dwc-xlgmac.h index 8598aaf3ec99..98e3a271e017 100644 --- a/drivers/net/ethernet/synopsys/dwc-xlgmac.h +++ b/drivers/net/ethernet/synopsys/dwc-xlgmac.h @@ -410,7 +410,7 @@ struct xlgmac_hw_ops { void (*dev_xmit)(struct xlgmac_channel *channel); int (*dev_read)(struct xlgmac_channel *channel); - int (*set_mac_address)(struct xlgmac_pdata *pdata, u8 *addr); + int (*set_mac_address)(struct xlgmac_pdata *pdata, const u8 *addr); int (*config_rx_mode)(struct xlgmac_pdata *pdata); int (*enable_rx_csum)(struct xlgmac_pdata *pdata); int (*disable_rx_csum)(struct xlgmac_pdata *pdata); diff --git a/drivers/net/ethernet/ti/tlan.c b/drivers/net/ethernet/ti/tlan.c index 77c448ad67ce..eab7d78d7c72 100644 --- a/drivers/net/ethernet/ti/tlan.c +++ b/drivers/net/ethernet/ti/tlan.c @@ -184,7 +184,7 @@ static void tlan_print_list(struct tlan_list *, char *, int); static void tlan_read_and_clear_stats(struct net_device *, int); static void tlan_reset_adapter(struct net_device *); static void tlan_finish_reset(struct net_device *); -static void tlan_set_mac(struct net_device *, int areg, char *mac); +static void tlan_set_mac(struct net_device *, int areg, const char *mac); static void __tlan_phy_print(struct net_device *); static void tlan_phy_print(struct net_device *); @@ -2346,7 +2346,7 @@ tlan_finish_reset(struct net_device *dev) * **************************************************************/ -static void tlan_set_mac(struct net_device *dev, int areg, char *mac) +static void tlan_set_mac(struct net_device *dev, int areg, const char *mac) { int i; diff --git a/drivers/net/ethernet/toshiba/tc35815.c b/drivers/net/ethernet/toshiba/tc35815.c index 93453e5713b2..f8b9d10dc056 100644 --- a/drivers/net/ethernet/toshiba/tc35815.c +++ b/drivers/net/ethernet/toshiba/tc35815.c @@ -1859,7 +1859,8 @@ static struct net_device_stats *tc35815_get_stats(struct net_device *dev) return &dev->stats; } -static void tc35815_set_cam_entry(struct net_device *dev, int index, unsigned char *addr) +static void tc35815_set_cam_entry(struct net_device *dev, int index, + const unsigned char *addr) { struct tc35815_local *lp = netdev_priv(dev); struct tc35815_regs __iomem *tr = diff --git a/drivers/net/ethernet/xilinx/xilinx_emaclite.c b/drivers/net/ethernet/xilinx/xilinx_emaclite.c index b95aee8607a4..0815de581c7f 100644 --- a/drivers/net/ethernet/xilinx/xilinx_emaclite.c +++ b/drivers/net/ethernet/xilinx/xilinx_emaclite.c @@ -206,12 +206,13 @@ static void xemaclite_disable_interrupts(struct net_local *drvdata) * This function writes data from a 16-bit aligned buffer to a 32-bit aligned * address in the EmacLite device. */ -static void xemaclite_aligned_write(void *src_ptr, u32 *dest_ptr, +static void xemaclite_aligned_write(const void *src_ptr, u32 *dest_ptr, unsigned length) { + const u16 *from_u16_ptr; u32 align_buffer; u32 *to_u32_ptr; - u16 *from_u16_ptr, *to_u16_ptr; + u16 *to_u16_ptr; to_u32_ptr = dest_ptr; from_u16_ptr = src_ptr; @@ -470,7 +471,7 @@ static u16 xemaclite_recv_data(struct net_local *drvdata, u8 *data, int maxlen) * buffers (if configured). */ static void xemaclite_update_address(struct net_local *drvdata, - u8 *address_ptr) + const u8 *address_ptr) { void __iomem *addr; u32 reg_data; diff --git a/drivers/net/ethernet/xircom/xirc2ps_cs.c b/drivers/net/ethernet/xircom/xirc2ps_cs.c index ae611e46da6a..ab513dcc3b22 100644 --- a/drivers/net/ethernet/xircom/xirc2ps_cs.c +++ b/drivers/net/ethernet/xircom/xirc2ps_cs.c @@ -1271,7 +1271,7 @@ struct set_address_info { unsigned int ioaddr; }; -static void set_address(struct set_address_info *sa_info, char *addr) +static void set_address(struct set_address_info *sa_info, const char *addr) { unsigned int ioaddr = sa_info->ioaddr; int i; diff --git a/drivers/net/phy/mscc/mscc_main.c b/drivers/net/phy/mscc/mscc_main.c index 6e32da28e138..ebfeeb3c67c1 100644 --- a/drivers/net/phy/mscc/mscc_main.c +++ b/drivers/net/phy/mscc/mscc_main.c @@ -273,12 +273,12 @@ static int vsc85xx_downshift_set(struct phy_device *phydev, u8 count) static int vsc85xx_wol_set(struct phy_device *phydev, struct ethtool_wolinfo *wol) { + const u8 *mac_addr = phydev->attached_dev->dev_addr; int rc; u16 reg_val; u8 i; u16 pwd[3] = {0, 0, 0}; struct ethtool_wolinfo *wol_conf = wol; - u8 *mac_addr = phydev->attached_dev->dev_addr; mutex_lock(&phydev->lock); rc = phy_select_page(phydev, MSCC_PHY_PAGE_EXTENDED_2); diff --git a/drivers/net/usb/aqc111.c b/drivers/net/usb/aqc111.c index 981ac1c33780..ea06d10e1c21 100644 --- a/drivers/net/usb/aqc111.c +++ b/drivers/net/usb/aqc111.c @@ -119,7 +119,7 @@ static int aqc111_write_cmd_nopm(struct usbnet *dev, u8 cmd, u16 value, } static int aqc111_write_cmd(struct usbnet *dev, u8 cmd, u16 value, - u16 index, u16 size, void *data) + u16 index, u16 size, const void *data) { int ret; diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c index 5ed59d9dd631..ea8aa8c33241 100644 --- a/drivers/net/usb/ax88179_178a.c +++ b/drivers/net/usb/ax88179_178a.c @@ -209,7 +209,7 @@ static int __ax88179_read_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index, } static int __ax88179_write_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index, - u16 size, void *data, int in_pm) + u16 size, const void *data, int in_pm) { int ret; int (*fn)(struct usbnet *, u8, u8, u16, u16, const void *, u16); @@ -272,7 +272,7 @@ static int ax88179_read_cmd_nopm(struct usbnet *dev, u8 cmd, u16 value, } static int ax88179_write_cmd_nopm(struct usbnet *dev, u8 cmd, u16 value, - u16 index, u16 size, void *data) + u16 index, u16 size, const void *data) { int ret; @@ -313,7 +313,7 @@ static int ax88179_read_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index, } static int ax88179_write_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index, - u16 size, void *data) + u16 size, const void *data) { int ret; @@ -463,7 +463,7 @@ static int ax88179_auto_detach(struct usbnet *dev, int in_pm) u16 tmp16; u8 tmp8; int (*fnr)(struct usbnet *, u8, u16, u16, u16, void *); - int (*fnw)(struct usbnet *, u8, u16, u16, u16, void *); + int (*fnw)(struct usbnet *, u8, u16, u16, u16, const void *); if (!in_pm) { fnr = ax88179_read_cmd; diff --git a/drivers/net/usb/catc.c b/drivers/net/usb/catc.c index 97ba67042d12..24db5768a3c0 100644 --- a/drivers/net/usb/catc.c +++ b/drivers/net/usb/catc.c @@ -615,7 +615,7 @@ static void catc_stats_timer(struct timer_list *t) * Receive modes. Broadcast, Multicast, Promisc. */ -static void catc_multicast(unsigned char *addr, u8 *multicast) +static void catc_multicast(const unsigned char *addr, u8 *multicast) { u32 crc; diff --git a/drivers/net/usb/dm9601.c b/drivers/net/usb/dm9601.c index dcdb46314685..48d7d278631e 100644 --- a/drivers/net/usb/dm9601.c +++ b/drivers/net/usb/dm9601.c @@ -93,7 +93,8 @@ static int dm_write_reg(struct usbnet *dev, u8 reg, u8 value) value, reg, NULL, 0); } -static void dm_write_async(struct usbnet *dev, u8 reg, u16 length, void *data) +static void dm_write_async(struct usbnet *dev, u8 reg, u16 length, + const void *data) { usbnet_write_cmd_async(dev, DM_WRITE_REGS, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, diff --git a/drivers/net/usb/mcs7830.c b/drivers/net/usb/mcs7830.c index cead742da381..5f42db26d200 100644 --- a/drivers/net/usb/mcs7830.c +++ b/drivers/net/usb/mcs7830.c @@ -132,7 +132,8 @@ static int mcs7830_hif_get_mac_address(struct usbnet *dev, unsigned char *addr) return 0; } -static int mcs7830_hif_set_mac_address(struct usbnet *dev, unsigned char *addr) +static int mcs7830_hif_set_mac_address(struct usbnet *dev, + const unsigned char *addr) { int ret = mcs7830_set_reg(dev, HIF_REG_ETHERNET_ADDR, ETH_ALEN, addr); diff --git a/drivers/net/usb/sr9700.c b/drivers/net/usb/sr9700.c index 068f197f1786..15209de1849e 100644 --- a/drivers/net/usb/sr9700.c +++ b/drivers/net/usb/sr9700.c @@ -56,7 +56,8 @@ static int sr_write_reg(struct usbnet *dev, u8 reg, u8 value) value, reg, NULL, 0); } -static void sr_write_async(struct usbnet *dev, u8 reg, u16 length, void *data) +static void sr_write_async(struct usbnet *dev, u8 reg, u16 length, + const void *data) { usbnet_write_cmd_async(dev, SR_WR_REGS, SR_REQ_WR_REG, 0, reg, data, length); diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h index 4df0bf0a0864..e1bf3219b4e6 100644 --- a/include/linux/qed/qed_eth_if.h +++ b/include/linux/qed/qed_eth_if.h @@ -331,7 +331,7 @@ struct qed_eth_ops { int (*configure_arfs_searcher)(struct qed_dev *cdev, enum qed_filter_config_mode mode); int (*get_coalesce)(struct qed_dev *cdev, u16 *coal, void *handle); - int (*req_bulletin_update_mac)(struct qed_dev *cdev, u8 *mac); + int (*req_bulletin_update_mac)(struct qed_dev *cdev, const u8 *mac); }; const struct qed_eth_ops *qed_get_eth_ops(void); diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index ad220d5da18f..0dae7fcc5ef2 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -1113,7 +1113,7 @@ struct qed_common_ops { * * Return: Int. */ - int (*update_mac)(struct qed_dev *cdev, u8 *mac); + int (*update_mac)(struct qed_dev *cdev, const u8 *mac); /** * update_mtu(): API to inform the change in the mtu. diff --git a/include/linux/qed/qed_rdma_if.h b/include/linux/qed/qed_rdma_if.h index aeb242cefebf..3b76c07fbcf8 100644 --- a/include/linux/qed/qed_rdma_if.h +++ b/include/linux/qed/qed_rdma_if.h @@ -662,7 +662,8 @@ struct qed_rdma_ops { u8 connection_handle, struct qed_ll2_stats *p_stats); int (*ll2_set_mac_filter)(struct qed_dev *cdev, - u8 *old_mac_address, u8 *new_mac_address); + u8 *old_mac_address, + const u8 *new_mac_address); int (*iwarp_set_engine_affin)(struct qed_dev *cdev, bool b_reset); From 54f2d8d6ca99a3e24efc9c8b055bbcbe0b583227 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 13 Oct 2021 13:44:30 -0700 Subject: [PATCH 166/440] ethernet: make eth_hw_addr_random() use dev_addr_set() Commit 406f42fa0d3c ("net-next: When a bond have a massive amount of VLANs...") introduced a rbtree for faster Ethernet address look up. To maintain netdev->dev_addr in this tree we need to make all the writes to it got through appropriate helpers. Signed-off-by: Jakub Kicinski --- include/linux/etherdevice.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index 3cf546d2ffd1..76f7ff684cbf 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -269,8 +269,11 @@ static inline void eth_zero_addr(u8 *addr) */ static inline void eth_hw_addr_random(struct net_device *dev) { + u8 addr[ETH_ALEN]; + + eth_random_addr(addr); + __dev_addr_set(dev, addr, ETH_ALEN); dev->addr_assign_type = NET_ADDR_RANDOM; - eth_random_addr(dev->dev_addr); } /** From db0dcc6a8a7c9051f9228c0f745f1e1d029ae4fa Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 13 Oct 2021 13:44:31 -0700 Subject: [PATCH 167/440] ethernet: make use of eth_hw_addr_random() where appropriate Number of drivers use eth_random_addr(netdev->dev_addr) thus writing to netdev->dev_addr directly, and not setting the address type. Switch them to eth_hw_addr_random(). @@ expression netdev; @@ - eth_random_addr(netdev->dev_addr); + eth_hw_addr_random(netdev); Reviewed-by: Linus Walleij Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/atheros/ag71xx.c | 2 +- drivers/net/ethernet/cortina/gemini.c | 2 +- drivers/net/ethernet/hisilicon/hip04_eth.c | 2 +- drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c | 2 +- drivers/net/ethernet/ti/netcp_core.c | 4 ++-- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/atheros/ag71xx.c b/drivers/net/ethernet/atheros/ag71xx.c index 1aaaf8a4e7c5..ada3a9f0c8c8 100644 --- a/drivers/net/ethernet/atheros/ag71xx.c +++ b/drivers/net/ethernet/atheros/ag71xx.c @@ -1971,7 +1971,7 @@ static int ag71xx_probe(struct platform_device *pdev) err = of_get_ethdev_address(np, ndev); if (err) { netif_err(ag, probe, ndev, "invalid MAC address, using random address\n"); - eth_random_addr(ndev->dev_addr); + eth_hw_addr_random(ndev); } err = of_get_phy_mode(np, &ag->phy_if_mode); diff --git a/drivers/net/ethernet/cortina/gemini.c b/drivers/net/ethernet/cortina/gemini.c index 82d32caf1374..941f175fb911 100644 --- a/drivers/net/ethernet/cortina/gemini.c +++ b/drivers/net/ethernet/cortina/gemini.c @@ -2473,7 +2473,7 @@ static int gemini_ethernet_port_probe(struct platform_device *pdev) port->mac_addr[0], port->mac_addr[1], port->mac_addr[2]); dev_info(dev, "using a random ethernet address\n"); - eth_random_addr(netdev->dev_addr); + eth_hw_addr_random(netdev); } gmac_write_mac_address(netdev); diff --git a/drivers/net/ethernet/hisilicon/hip04_eth.c b/drivers/net/ethernet/hisilicon/hip04_eth.c index 37b605fed32c..c84ef494bd60 100644 --- a/drivers/net/ethernet/hisilicon/hip04_eth.c +++ b/drivers/net/ethernet/hisilicon/hip04_eth.c @@ -998,7 +998,7 @@ static int hip04_mac_probe(struct platform_device *pdev) hip04_config_port(ndev, SPEED_100, DUPLEX_FULL); hip04_config_fifo(priv); - eth_random_addr(ndev->dev_addr); + eth_hw_addr_random(ndev); hip04_update_mac_address(ndev); ret = hip04_alloc_ring(ndev, d); diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c index 13d8eb43a485..1b2119b1d48a 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c @@ -224,7 +224,7 @@ void rmnet_vnd_setup(struct net_device *rmnet_dev) rmnet_dev->netdev_ops = &rmnet_vnd_ops; rmnet_dev->mtu = RMNET_DFLT_PACKET_SIZE; rmnet_dev->needed_headroom = RMNET_NEEDED_HEADROOM; - eth_random_addr(rmnet_dev->dev_addr); + eth_hw_addr_random(rmnet_dev); rmnet_dev->tx_queue_len = RMNET_TX_QUEUE_LEN; /* Raw IP mode */ diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c index b666e1b53c5f..b818e4579f6f 100644 --- a/drivers/net/ethernet/ti/netcp_core.c +++ b/drivers/net/ethernet/ti/netcp_core.c @@ -2030,14 +2030,14 @@ static int netcp_create_interface(struct netcp_device *netcp_device, if (is_valid_ether_addr(efuse_mac_addr)) eth_hw_addr_set(ndev, efuse_mac_addr); else - eth_random_addr(ndev->dev_addr); + eth_hw_addr_random(ndev); devm_iounmap(dev, efuse); devm_release_mem_region(dev, res.start, size); } else { ret = of_get_ethdev_address(node_interface, ndev); if (ret) - eth_random_addr(ndev->dev_addr); + eth_hw_addr_random(ndev); } ret = of_property_read_string(node_interface, "rx-channel", From c51e5062c18067ccdc85798c2051edfc37e2475d Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 13 Oct 2021 13:44:32 -0700 Subject: [PATCH 168/440] ethernet: manually convert memcpy(dev_addr,..., sizeof(addr)) A handful of drivers use sizeof(addr) for the size of the address, after manually confirming the size is indeed 6 convert them to eth_hw_addr_set(). Acked-by: Florian Fainelli Acked-by: Petko Manolov Acked-by: Nicolas Ferre Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb_main.c | 2 +- drivers/net/ethernet/dnet.c | 2 +- drivers/net/ethernet/pasemi/pasemi_mac.c | 2 +- drivers/net/ethernet/ti/cpmac.c | 2 +- drivers/net/usb/pegasus.c | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 683f14665c2c..029dea2873e3 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -313,7 +313,7 @@ static void macb_get_hwaddr(struct macb *bp) addr[5] = (top >> 8) & 0xff; if (is_valid_ether_addr(addr)) { - memcpy(bp->dev->dev_addr, addr, sizeof(addr)); + eth_hw_addr_set(bp->dev, addr); return; } } diff --git a/drivers/net/ethernet/dnet.c b/drivers/net/ethernet/dnet.c index 3ed21ba4eb99..92462ed87bc4 100644 --- a/drivers/net/ethernet/dnet.c +++ b/drivers/net/ethernet/dnet.c @@ -93,7 +93,7 @@ static void dnet_get_hwaddr(struct dnet *bp) *((__be16 *)(addr + 4)) = cpu_to_be16(tmp); if (is_valid_ether_addr(addr)) - memcpy(bp->dev->dev_addr, addr, sizeof(addr)); + eth_hw_addr_set(bp->dev, addr); } static int dnet_mdio_read(struct mii_bus *bus, int mii_id, int regnum) diff --git a/drivers/net/ethernet/pasemi/pasemi_mac.c b/drivers/net/ethernet/pasemi/pasemi_mac.c index 5512e43bafc1..f0ace3a0e85c 100644 --- a/drivers/net/ethernet/pasemi/pasemi_mac.c +++ b/drivers/net/ethernet/pasemi/pasemi_mac.c @@ -1722,7 +1722,7 @@ pasemi_mac_probe(struct pci_dev *pdev, const struct pci_device_id *ent) err = -ENODEV; goto out; } - memcpy(dev->dev_addr, mac->mac_addr, sizeof(mac->mac_addr)); + eth_hw_addr_set(dev, mac->mac_addr); ret = mac_to_intf(mac); if (ret < 0) { diff --git a/drivers/net/ethernet/ti/cpmac.c b/drivers/net/ethernet/ti/cpmac.c index 02d4e51f7306..7449436fc87c 100644 --- a/drivers/net/ethernet/ti/cpmac.c +++ b/drivers/net/ethernet/ti/cpmac.c @@ -1112,7 +1112,7 @@ static int cpmac_probe(struct platform_device *pdev) priv->dev = dev; priv->ring_size = 64; priv->msg_enable = netif_msg_init(debug_level, 0xff); - memcpy(dev->dev_addr, pdata->dev_addr, sizeof(pdata->dev_addr)); + eth_hw_addr_set(dev, pdata->dev_addr); snprintf(priv->phy_name, MII_BUS_ID_SIZE, PHY_ID_FMT, mdio_bus_id, phy_id); diff --git a/drivers/net/usb/pegasus.c b/drivers/net/usb/pegasus.c index 6a92a3fef75e..c4cd40b090fd 100644 --- a/drivers/net/usb/pegasus.c +++ b/drivers/net/usb/pegasus.c @@ -357,7 +357,7 @@ static void set_ethernet_addr(pegasus_t *pegasus) goto err; } - memcpy(pegasus->net->dev_addr, node_id, sizeof(node_id)); + eth_hw_addr_set(pegasus->net, node_id); return; err: From 68a064028e4e85b7a9dbfe58fc43b34060c11d39 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 13 Oct 2021 13:44:33 -0700 Subject: [PATCH 169/440] ethernet: ibm/emac: use of_get_ethdev_address() to load dev_addr A straggler I somehow missed in the automated conversion in commit 9ca01b25dfff ("ethernet: use of_get_ethdev_address()"). Use the new helper instead of using netdev->dev_addr directly. Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ibm/emac/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c index 8db0ec38bbee..6b3fc8823c54 100644 --- a/drivers/net/ethernet/ibm/emac/core.c +++ b/drivers/net/ethernet/ibm/emac/core.c @@ -2975,7 +2975,7 @@ static int emac_init_config(struct emac_instance *dev) } /* Read MAC-address */ - err = of_get_mac_address(np, dev->ndev->dev_addr); + err = of_get_ethdev_address(np, dev->ndev); if (err) { if (err != -EPROBE_DEFER) dev_err(&dev->ofdev->dev, "Can't get valid [local-]mac-address from OF !\n"); From 562ef98a666eef8e32a6057af37a577163ab6946 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 13 Oct 2021 13:44:34 -0700 Subject: [PATCH 170/440] ethernet: replace netdev->dev_addr assignment loops A handful of drivers contains loops assigning the mac addr byte by byte. Convert those to eth_hw_addr_set(). Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amd/sun3lance.c | 4 +--- drivers/net/ethernet/amd/sunlance.c | 4 +--- drivers/net/ethernet/apple/bmac.c | 7 ++----- drivers/net/ethernet/dlink/dl2k.c | 3 +-- drivers/net/ethernet/fujitsu/fmvj18x_cs.c | 7 ++----- drivers/net/ethernet/i825xx/sun3_82586.c | 5 ++--- drivers/net/ethernet/myricom/myri10ge/myri10ge.c | 4 +--- drivers/net/ethernet/smsc/smc91c92_cs.c | 5 ++--- drivers/net/ethernet/sun/sunbmac.c | 4 +--- drivers/net/ethernet/ti/davinci_emac.c | 4 +--- drivers/pcmcia/pcmcia_cis.c | 5 ++--- net/atm/lec.c | 3 +-- 12 files changed, 17 insertions(+), 38 deletions(-) diff --git a/drivers/net/ethernet/amd/sun3lance.c b/drivers/net/ethernet/amd/sun3lance.c index 4a845bc071b2..007bd7787291 100644 --- a/drivers/net/ethernet/amd/sun3lance.c +++ b/drivers/net/ethernet/amd/sun3lance.c @@ -305,7 +305,6 @@ static int __init lance_probe( struct net_device *dev) unsigned long ioaddr; struct lance_private *lp; - int i; static int did_version; volatile unsigned short *ioaddr_probe; unsigned short tmp1, tmp2; @@ -373,8 +372,7 @@ static int __init lance_probe( struct net_device *dev) dev->irq); /* copy in the ethernet address from the prom */ - for(i = 0; i < 6 ; i++) - dev->dev_addr[i] = idprom->id_ethaddr[i]; + eth_hw_addr_set(dev, idprom->id_ethaddr); /* tell the card it's ether address, bytes swapped */ MEM->init.hwaddr[0] = dev->dev_addr[1]; diff --git a/drivers/net/ethernet/amd/sunlance.c b/drivers/net/ethernet/amd/sunlance.c index ddece276ae23..22d609563af8 100644 --- a/drivers/net/ethernet/amd/sunlance.c +++ b/drivers/net/ethernet/amd/sunlance.c @@ -1301,7 +1301,6 @@ static int sparc_lance_probe_one(struct platform_device *op, struct device_node *dp = op->dev.of_node; struct lance_private *lp; struct net_device *dev; - int i; dev = alloc_etherdev(sizeof(struct lance_private) + 8); if (!dev) @@ -1315,8 +1314,7 @@ static int sparc_lance_probe_one(struct platform_device *op, * will copy the address in the device structure to the lance * initialization block. */ - for (i = 0; i < 6; i++) - dev->dev_addr[i] = idprom->id_ethaddr[i]; + eth_hw_addr_set(dev, idprom->id_ethaddr); /* Get the IO region */ lp->lregs = of_ioremap(&op->resource[0], 0, diff --git a/drivers/net/ethernet/apple/bmac.c b/drivers/net/ethernet/apple/bmac.c index a63ec2005af3..9a650d1c1bdd 100644 --- a/drivers/net/ethernet/apple/bmac.c +++ b/drivers/net/ethernet/apple/bmac.c @@ -521,17 +521,14 @@ static int bmac_resume(struct macio_dev *mdev) static int bmac_set_address(struct net_device *dev, void *addr) { struct bmac_data *bp = netdev_priv(dev); - unsigned char *p = addr; const unsigned short *pWord16; unsigned long flags; - int i; XXDEBUG(("bmac: enter set_address\n")); spin_lock_irqsave(&bp->lock, flags); - for (i = 0; i < 6; ++i) { - dev->dev_addr[i] = p[i]; - } + eth_hw_addr_set(dev, addr); + /* load up the hardware address */ pWord16 = (const unsigned short *)dev->dev_addr; bmwrite(dev, MADD0, *pWord16++); diff --git a/drivers/net/ethernet/dlink/dl2k.c b/drivers/net/ethernet/dlink/dl2k.c index 993bba0ffb16..a301f7e6a440 100644 --- a/drivers/net/ethernet/dlink/dl2k.c +++ b/drivers/net/ethernet/dlink/dl2k.c @@ -349,8 +349,7 @@ parse_eeprom (struct net_device *dev) } /* Set MAC address */ - for (i = 0; i < 6; i++) - dev->dev_addr[i] = psrom->mac_addr[i]; + eth_hw_addr_set(dev, psrom->mac_addr); if (np->chip_id == CHIP_IP1000A) { np->led_mode = psrom->led_mode; diff --git a/drivers/net/ethernet/fujitsu/fmvj18x_cs.c b/drivers/net/ethernet/fujitsu/fmvj18x_cs.c index 62c0bed82ced..62600153b964 100644 --- a/drivers/net/ethernet/fujitsu/fmvj18x_cs.c +++ b/drivers/net/ethernet/fujitsu/fmvj18x_cs.c @@ -468,8 +468,7 @@ static int fmvj18x_config(struct pcmcia_device *link) goto failed; } /* Read MACID from CIS */ - for (i = 0; i < 6; i++) - dev->dev_addr[i] = buf[i + 5]; + eth_hw_addr_set(dev, &buf[5]); kfree(buf); } else { if (pcmcia_get_mac_from_cis(link, dev)) @@ -499,9 +498,7 @@ static int fmvj18x_config(struct pcmcia_device *link) pr_notice("unable to read hardware net address\n"); goto failed; } - for (i = 0 ; i < 6; i++) { - dev->dev_addr[i] = buggybuf[i]; - } + eth_hw_addr_set(dev, buggybuf); card_name = "FMV-J182"; break; case MBH10302: diff --git a/drivers/net/ethernet/i825xx/sun3_82586.c b/drivers/net/ethernet/i825xx/sun3_82586.c index 18d32302c3c7..3909c6a0af89 100644 --- a/drivers/net/ethernet/i825xx/sun3_82586.c +++ b/drivers/net/ethernet/i825xx/sun3_82586.c @@ -339,14 +339,13 @@ static const struct net_device_ops sun3_82586_netdev_ops = { static int __init sun3_82586_probe1(struct net_device *dev,int ioaddr) { - int i, size, retval; + int size, retval; if (!request_region(ioaddr, SUN3_82586_TOTAL_SIZE, DRV_NAME)) return -EBUSY; /* copy in the ethernet address from the prom */ - for(i = 0; i < 6 ; i++) - dev->dev_addr[i] = idprom->id_ethaddr[i]; + eth_hw_addr_set(dev, idprom->id_ethaddr); printk("%s: SUN3 Intel 82586 found at %lx, ",dev->name,dev->base_addr); diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c index 5ae59b1e5b48..5736fcdafd7a 100644 --- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c +++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c @@ -3739,7 +3739,6 @@ static int myri10ge_probe(struct pci_dev *pdev, const struct pci_device_id *ent) struct net_device *netdev; struct myri10ge_priv *mgp; struct device *dev = &pdev->dev; - int i; int status = -ENXIO; int dac_enabled; unsigned hdr_offset, ss_offset; @@ -3829,8 +3828,7 @@ static int myri10ge_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (status) goto abort_with_ioremap; - for (i = 0; i < ETH_ALEN; i++) - netdev->dev_addr[i] = mgp->mac_addr[i]; + eth_hw_addr_set(netdev, mgp->mac_addr); myri10ge_select_firmware(mgp); diff --git a/drivers/net/ethernet/smsc/smc91c92_cs.c b/drivers/net/ethernet/smsc/smc91c92_cs.c index 42fc37c7887a..e5658aa39765 100644 --- a/drivers/net/ethernet/smsc/smc91c92_cs.c +++ b/drivers/net/ethernet/smsc/smc91c92_cs.c @@ -666,14 +666,13 @@ static int pcmcia_osi_mac(struct pcmcia_device *p_dev, void *priv) { struct net_device *dev = priv; - int i; if (tuple->TupleDataLen < 8) return -EINVAL; if (tuple->TupleData[0] != 0x04) return -EINVAL; - for (i = 0; i < 6; i++) - dev->dev_addr[i] = tuple->TupleData[i+2]; + + eth_hw_addr_set(dev, &tuple->TupleData[2]); return 0; }; diff --git a/drivers/net/ethernet/sun/sunbmac.c b/drivers/net/ethernet/sun/sunbmac.c index d70426670c37..531a6f449afa 100644 --- a/drivers/net/ethernet/sun/sunbmac.c +++ b/drivers/net/ethernet/sun/sunbmac.c @@ -1076,7 +1076,6 @@ static int bigmac_ether_init(struct platform_device *op, struct net_device *dev; u8 bsizes, bsizes_more; struct bigmac *bp; - int i; /* Get a new device struct for this interface. */ dev = alloc_etherdev(sizeof(struct bigmac)); @@ -1086,8 +1085,7 @@ static int bigmac_ether_init(struct platform_device *op, if (version_printed++ == 0) printk(KERN_INFO "%s", version); - for (i = 0; i < 6; i++) - dev->dev_addr[i] = idprom->id_ethaddr[i]; + eth_hw_addr_set(dev, idprom->id_ethaddr); /* Setup softc, with backpointers to QEC and BigMAC SBUS device structs. */ bp = netdev_priv(dev); diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c index 4538e597eefe..2d2dcf70563f 100644 --- a/drivers/net/ethernet/ti/davinci_emac.c +++ b/drivers/net/ethernet/ti/davinci_emac.c @@ -1402,7 +1402,6 @@ static int match_first_device(struct device *dev, const void *data) static int emac_dev_open(struct net_device *ndev) { struct device *emac_dev = &ndev->dev; - u32 cnt; struct resource *res; int q, m, ret; int res_num = 0, irq_num = 0; @@ -1420,8 +1419,7 @@ static int emac_dev_open(struct net_device *ndev) } netif_carrier_off(ndev); - for (cnt = 0; cnt < ETH_ALEN; cnt++) - ndev->dev_addr[cnt] = priv->mac_addr[cnt]; + eth_hw_addr_set(ndev, priv->mac_addr); /* Configuration items */ priv->rx_buf_size = EMAC_DEF_MAX_FRAME_SIZE + NET_IP_ALIGN; diff --git a/drivers/pcmcia/pcmcia_cis.c b/drivers/pcmcia/pcmcia_cis.c index d2d0ed4b27c8..f650e19a315c 100644 --- a/drivers/pcmcia/pcmcia_cis.c +++ b/drivers/pcmcia/pcmcia_cis.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -398,7 +399,6 @@ static int pcmcia_do_get_mac(struct pcmcia_device *p_dev, tuple_t *tuple, void *priv) { struct net_device *dev = priv; - int i; if (tuple->TupleData[0] != CISTPL_FUNCE_LAN_NODE_ID) return -EINVAL; @@ -412,8 +412,7 @@ static int pcmcia_do_get_mac(struct pcmcia_device *p_dev, tuple_t *tuple, dev_warn(&p_dev->dev, "Invalid header for LAN_NODE_ID\n"); return -EINVAL; } - for (i = 0; i < 6; i++) - dev->dev_addr[i] = tuple->TupleData[i+2]; + eth_hw_addr_set(dev, &tuple->TupleData[2]); return 0; } diff --git a/net/atm/lec.c b/net/atm/lec.c index 7226c784dbe0..8eaea4a4bbd6 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -355,8 +355,7 @@ static int lec_atm_send(struct atm_vcc *vcc, struct sk_buff *skb) pr_debug("%s: msg from zeppelin:%d\n", dev->name, mesg->type); switch (mesg->type) { case l_set_mac_addr: - for (i = 0; i < 6; i++) - dev->dev_addr[i] = mesg->content.normal.mac_addr[i]; + eth_hw_addr_set(dev, mesg->content.normal.mac_addr); break; case l_del_mac_addr: for (i = 0; i < 6; i++) From 923ca6f61887c9ed5797af096ffb23bdb6e4c6fa Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 13 Oct 2021 13:44:35 -0700 Subject: [PATCH 171/440] ethernet: replace netdev->dev_addr 16bit writes Commit 406f42fa0d3c ("net-next: When a bond have a massive amount of VLANs...") introduced a rbtree for faster Ethernet address look up. To maintain netdev->dev_addr in this tree we need to make all the writes to it got through appropriate helpers. This patch takes care of drivers which cast netdev->dev_addr to a 16bit type, often with an explicit byte order. Acked-by: Florian Fainelli Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/3com/3c515.c | 5 +++-- drivers/net/ethernet/3com/3c574_cs.c | 11 +++++------ drivers/net/ethernet/3com/3c589_cs.c | 10 +++++----- drivers/net/ethernet/3com/3c59x.c | 4 +++- drivers/net/ethernet/dec/tulip/winbond-840.c | 4 +++- drivers/net/ethernet/dlink/sundance.c | 4 +++- drivers/net/ethernet/qlogic/qla3xxx.c | 10 ++++++---- drivers/net/ethernet/rdc/r6040.c | 12 ++++++------ drivers/net/ethernet/realtek/8139cp.c | 5 +++-- drivers/net/ethernet/realtek/8139too.c | 5 +++-- drivers/net/ethernet/realtek/atp.c | 4 +++- drivers/net/ethernet/sis/sis190.c | 4 +++- drivers/net/ethernet/sis/sis900.c | 13 +++++++++---- drivers/net/ethernet/smsc/epic100.c | 4 +++- 14 files changed, 58 insertions(+), 37 deletions(-) diff --git a/drivers/net/ethernet/3com/3c515.c b/drivers/net/ethernet/3com/3c515.c index 6f0ea2facea9..1d124b0f65e7 100644 --- a/drivers/net/ethernet/3com/3c515.c +++ b/drivers/net/ethernet/3com/3c515.c @@ -567,6 +567,7 @@ static int corkscrew_setup(struct net_device *dev, int ioaddr, { struct corkscrew_private *vp = netdev_priv(dev); unsigned int eeprom[0x40], checksum = 0; /* EEPROM contents */ + __be16 addr[ETH_ALEN / 2]; int i; int irq; @@ -619,7 +620,6 @@ static int corkscrew_setup(struct net_device *dev, int ioaddr, /* Read the station address from the EEPROM. */ EL3WINDOW(0); for (i = 0; i < 0x18; i++) { - __be16 *phys_addr = (__be16 *) dev->dev_addr; int timer; outw(EEPROM_Read + i, ioaddr + Wn0EepromCmd); /* Pause for at least 162 us. for the read to take place. */ @@ -631,8 +631,9 @@ static int corkscrew_setup(struct net_device *dev, int ioaddr, eeprom[i] = inw(ioaddr + Wn0EepromData); checksum ^= eeprom[i]; if (i < 3) - phys_addr[i] = htons(eeprom[i]); + addr[i] = htons(eeprom[i]); } + eth_hw_addr_set(dev, (u8 *)addr); checksum = (checksum ^ (checksum >> 8)) & 0xff; if (checksum != 0x00) pr_cont(" ***INVALID CHECKSUM %4.4x*** ", checksum); diff --git a/drivers/net/ethernet/3com/3c574_cs.c b/drivers/net/ethernet/3com/3c574_cs.c index dd4d3c48b98d..dc3b7c960611 100644 --- a/drivers/net/ethernet/3com/3c574_cs.c +++ b/drivers/net/ethernet/3com/3c574_cs.c @@ -305,15 +305,13 @@ static int tc574_config(struct pcmcia_device *link) struct net_device *dev = link->priv; struct el3_private *lp = netdev_priv(dev); int ret, i, j; + __be16 addr[ETH_ALEN / 2]; unsigned int ioaddr; - __be16 *phys_addr; char *cardname; __u32 config; u8 *buf; size_t len; - phys_addr = (__be16 *)dev->dev_addr; - dev_dbg(&link->dev, "3c574_config()\n"); link->io_lines = 16; @@ -347,19 +345,20 @@ static int tc574_config(struct pcmcia_device *link) len = pcmcia_get_tuple(link, 0x88, &buf); if (buf && len >= 6) { for (i = 0; i < 3; i++) - phys_addr[i] = htons(le16_to_cpu(buf[i * 2])); + addr[i] = htons(le16_to_cpu(buf[i * 2])); kfree(buf); } else { kfree(buf); /* 0 < len < 6 */ EL3WINDOW(0); for (i = 0; i < 3; i++) - phys_addr[i] = htons(read_eeprom(ioaddr, i + 10)); - if (phys_addr[0] == htons(0x6060)) { + addr[i] = htons(read_eeprom(ioaddr, i + 10)); + if (addr[0] == htons(0x6060)) { pr_notice("IO port conflict at 0x%03lx-0x%03lx\n", dev->base_addr, dev->base_addr+15); goto failed; } } + eth_hw_addr_set(dev, (u8 *)addr); if (link->prod_id[1]) cardname = link->prod_id[1]; else diff --git a/drivers/net/ethernet/3com/3c589_cs.c b/drivers/net/ethernet/3com/3c589_cs.c index 09816e84314d..4673bc1604e7 100644 --- a/drivers/net/ethernet/3com/3c589_cs.c +++ b/drivers/net/ethernet/3com/3c589_cs.c @@ -237,8 +237,8 @@ static void tc589_detach(struct pcmcia_device *link) static int tc589_config(struct pcmcia_device *link) { struct net_device *dev = link->priv; - __be16 *phys_addr; int ret, i, j, multi = 0, fifo; + __be16 addr[ETH_ALEN / 2]; unsigned int ioaddr; static const char * const ram_split[] = {"5:3", "3:1", "1:1", "3:5"}; u8 *buf; @@ -246,7 +246,6 @@ static int tc589_config(struct pcmcia_device *link) dev_dbg(&link->dev, "3c589_config\n"); - phys_addr = (__be16 *)dev->dev_addr; /* Is this a 3c562? */ if (link->manf_id != MANFID_3COM) dev_info(&link->dev, "hmmm, is this really a 3Com card??\n"); @@ -285,18 +284,19 @@ static int tc589_config(struct pcmcia_device *link) len = pcmcia_get_tuple(link, 0x88, &buf); if (buf && len >= 6) { for (i = 0; i < 3; i++) - phys_addr[i] = htons(le16_to_cpu(buf[i*2])); + addr[i] = htons(le16_to_cpu(buf[i*2])); kfree(buf); } else { kfree(buf); /* 0 < len < 6 */ for (i = 0; i < 3; i++) - phys_addr[i] = htons(read_eeprom(ioaddr, i)); - if (phys_addr[0] == htons(0x6060)) { + addr[i] = htons(read_eeprom(ioaddr, i)); + if (addr[0] == htons(0x6060)) { dev_err(&link->dev, "IO port conflict at 0x%03lx-0x%03lx\n", dev->base_addr, dev->base_addr+15); goto failed; } } + eth_hw_addr_set(dev, (u8 *)addr); /* The address and resource configuration register aren't loaded from * the EEPROM and *must* be set to 0 and IRQ3 for the PCMCIA version. diff --git a/drivers/net/ethernet/3com/3c59x.c b/drivers/net/ethernet/3com/3c59x.c index 7b0ae9efc004..ccf07667aa5e 100644 --- a/drivers/net/ethernet/3com/3c59x.c +++ b/drivers/net/ethernet/3com/3c59x.c @@ -1091,6 +1091,7 @@ static int vortex_probe1(struct device *gendev, void __iomem *ioaddr, int irq, struct vortex_private *vp; int option; unsigned int eeprom[0x40], checksum = 0; /* EEPROM contents */ + __be16 addr[ETH_ALEN / 2]; int i, step; struct net_device *dev; static int printed_version; @@ -1284,7 +1285,8 @@ static int vortex_probe1(struct device *gendev, void __iomem *ioaddr, int irq, if ((checksum != 0x00) && !(vci->drv_flags & IS_TORNADO)) pr_cont(" ***INVALID CHECKSUM %4.4x*** ", checksum); for (i = 0; i < 3; i++) - ((__be16 *)dev->dev_addr)[i] = htons(eeprom[i + 10]); + addr[i] = htons(eeprom[i + 10]); + eth_hw_addr_set(dev, (u8 *)addr); if (print_info) pr_cont(" %pM", dev->dev_addr); /* Unfortunately an all zero eeprom passes the checksum and this diff --git a/drivers/net/ethernet/dec/tulip/winbond-840.c b/drivers/net/ethernet/dec/tulip/winbond-840.c index 85b99099c6b9..c4217ca5d709 100644 --- a/drivers/net/ethernet/dec/tulip/winbond-840.c +++ b/drivers/net/ethernet/dec/tulip/winbond-840.c @@ -355,6 +355,7 @@ static int w840_probe1(struct pci_dev *pdev, const struct pci_device_id *ent) int chip_idx = ent->driver_data; int irq; int i, option = find_cnt < MAX_UNITS ? options[find_cnt] : 0; + __le16 addr[ETH_ALEN / 2]; void __iomem *ioaddr; i = pcim_enable_device(pdev); @@ -382,7 +383,8 @@ static int w840_probe1(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_out_netdev; for (i = 0; i < 3; i++) - ((__le16 *)dev->dev_addr)[i] = cpu_to_le16(eeprom_read(ioaddr, i)); + addr[i] = cpu_to_le16(eeprom_read(ioaddr, i)); + eth_hw_addr_set(dev, (u8 *)addr); /* Reset the chip to erase previous misconfiguration. No hold time required! */ diff --git a/drivers/net/ethernet/dlink/sundance.c b/drivers/net/ethernet/dlink/sundance.c index 194b0812f7f6..c710dc17be90 100644 --- a/drivers/net/ethernet/dlink/sundance.c +++ b/drivers/net/ethernet/dlink/sundance.c @@ -508,6 +508,7 @@ static int sundance_probe1(struct pci_dev *pdev, int bar = 1; #endif int phy, phy_end, phy_idx = 0; + __le16 addr[ETH_ALEN / 2]; if (pci_enable_device(pdev)) return -EIO; @@ -528,8 +529,9 @@ static int sundance_probe1(struct pci_dev *pdev, goto err_out_res; for (i = 0; i < 3; i++) - ((__le16 *)dev->dev_addr)[i] = + addr[i] = cpu_to_le16(eeprom_read(ioaddr, i + EEPROM_SA_OFFSET)); + eth_hw_addr_set(dev, (u8 *)addr); np = netdev_priv(dev); np->ndev = dev; diff --git a/drivers/net/ethernet/qlogic/qla3xxx.c b/drivers/net/ethernet/qlogic/qla3xxx.c index 395f054e9e0c..1e6d72adfe43 100644 --- a/drivers/net/ethernet/qlogic/qla3xxx.c +++ b/drivers/net/ethernet/qlogic/qla3xxx.c @@ -508,10 +508,12 @@ static void eeprom_readword(struct ql3_adapter *qdev, static void ql_set_mac_addr(struct net_device *ndev, u16 *addr) { - __le16 *p = (__le16 *)ndev->dev_addr; - p[0] = cpu_to_le16(addr[0]); - p[1] = cpu_to_le16(addr[1]); - p[2] = cpu_to_le16(addr[2]); + __le16 buf[ETH_ALEN / 2]; + + buf[0] = cpu_to_le16(addr[0]); + buf[1] = cpu_to_le16(addr[1]); + buf[2] = cpu_to_le16(addr[2]); + eth_hw_addr_set(ndev, (u8 *)buf); } static int ql_get_nvram_params(struct ql3_adapter *qdev) diff --git a/drivers/net/ethernet/rdc/r6040.c b/drivers/net/ethernet/rdc/r6040.c index a8f282c43a78..a6bf7d505178 100644 --- a/drivers/net/ethernet/rdc/r6040.c +++ b/drivers/net/ethernet/rdc/r6040.c @@ -1031,8 +1031,8 @@ static int r6040_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) void __iomem *ioaddr; int err, io_size = R6040_IO_SIZE; static int card_idx = -1; + u16 addr[ETH_ALEN / 2]; int bar = 0; - u16 *adrp; pr_info("%s\n", version); @@ -1102,14 +1102,14 @@ static int r6040_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) /* Set MAC address */ card_idx++; - adrp = (u16 *)dev->dev_addr; - adrp[0] = ioread16(ioaddr + MID_0L); - adrp[1] = ioread16(ioaddr + MID_0M); - adrp[2] = ioread16(ioaddr + MID_0H); + addr[0] = ioread16(ioaddr + MID_0L); + addr[1] = ioread16(ioaddr + MID_0M); + addr[2] = ioread16(ioaddr + MID_0H); + eth_hw_addr_set(dev, (u8 *)addr); /* Some bootloader/BIOSes do not initialize * MAC address, warn about that */ - if (!(adrp[0] || adrp[1] || adrp[2])) { + if (!(addr[0] || addr[1] || addr[2])) { netdev_warn(dev, "MAC address not initialized, " "generating random\n"); eth_hw_addr_random(dev); diff --git a/drivers/net/ethernet/realtek/8139cp.c b/drivers/net/ethernet/realtek/8139cp.c index 614aac780b6b..4f39f843bb3a 100644 --- a/drivers/net/ethernet/realtek/8139cp.c +++ b/drivers/net/ethernet/realtek/8139cp.c @@ -1889,6 +1889,7 @@ static int cp_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) void __iomem *regs; resource_size_t pciaddr; unsigned int addr_len, i, pci_using_dac; + __le16 addr[ETH_ALEN / 2]; pr_info_once("%s", version); @@ -1979,8 +1980,8 @@ static int cp_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) /* read MAC address from EEPROM */ addr_len = read_eeprom (regs, 0, 8) == 0x8129 ? 8 : 6; for (i = 0; i < 3; i++) - ((__le16 *) (dev->dev_addr))[i] = - cpu_to_le16(read_eeprom (regs, i + 7, addr_len)); + addr[i] = cpu_to_le16(read_eeprom (regs, i + 7, addr_len)); + eth_hw_addr_set(dev, (u8 *)addr); dev->netdev_ops = &cp_netdev_ops; netif_napi_add(dev, &cp->napi, cp_rx_poll, 16); diff --git a/drivers/net/ethernet/realtek/8139too.c b/drivers/net/ethernet/realtek/8139too.c index 75b5ac91132b..15b40fd93cd2 100644 --- a/drivers/net/ethernet/realtek/8139too.c +++ b/drivers/net/ethernet/realtek/8139too.c @@ -945,6 +945,7 @@ static int rtl8139_init_one(struct pci_dev *pdev, { struct net_device *dev = NULL; struct rtl8139_private *tp; + __le16 addr[ETH_ALEN / 2]; int i, addr_len, option; void __iomem *ioaddr; static int board_idx = -1; @@ -994,8 +995,8 @@ static int rtl8139_init_one(struct pci_dev *pdev, addr_len = read_eeprom (ioaddr, 0, 8) == 0x8129 ? 8 : 6; for (i = 0; i < 3; i++) - ((__le16 *) (dev->dev_addr))[i] = - cpu_to_le16(read_eeprom (ioaddr, i + 7, addr_len)); + addr[i] = cpu_to_le16(read_eeprom (ioaddr, i + 7, addr_len)); + eth_hw_addr_set(dev, (u8 *)addr); /* The Rtl8139-specific entries in the device structure. */ dev->netdev_ops = &rtl8139_netdev_ops; diff --git a/drivers/net/ethernet/realtek/atp.c b/drivers/net/ethernet/realtek/atp.c index b6c849b258a0..6cbcb3164367 100644 --- a/drivers/net/ethernet/realtek/atp.c +++ b/drivers/net/ethernet/realtek/atp.c @@ -368,6 +368,7 @@ static int __init atp_probe1(long ioaddr) static void __init get_node_ID(struct net_device *dev) { long ioaddr = dev->base_addr; + __be16 addr[ETH_ALEN / 2]; int sa_offset = 0; int i; @@ -379,8 +380,9 @@ static void __init get_node_ID(struct net_device *dev) sa_offset = 15; for (i = 0; i < 3; i++) - ((__be16 *)dev->dev_addr)[i] = + addr[i] = cpu_to_be16(eeprom_op(ioaddr, EE_READ(sa_offset + i))); + eth_hw_addr_set(dev, (u8 *)addr); write_reg(ioaddr, CMR2, CMR2_NULL); } diff --git a/drivers/net/ethernet/sis/sis190.c b/drivers/net/ethernet/sis/sis190.c index be4d772e68cf..5e66e3f3aafc 100644 --- a/drivers/net/ethernet/sis/sis190.c +++ b/drivers/net/ethernet/sis/sis190.c @@ -1586,6 +1586,7 @@ static int sis190_get_mac_addr_from_eeprom(struct pci_dev *pdev, { struct sis190_private *tp = netdev_priv(dev); void __iomem *ioaddr = tp->mmio_addr; + __le16 addr[ETH_ALEN / 2]; u16 sig; int i; @@ -1606,8 +1607,9 @@ static int sis190_get_mac_addr_from_eeprom(struct pci_dev *pdev, for (i = 0; i < ETH_ALEN / 2; i++) { u16 w = sis190_read_eeprom(ioaddr, EEPROMMACAddr + i); - ((__le16 *)dev->dev_addr)[i] = cpu_to_le16(w); + addr[i] = cpu_to_le16(w); } + eth_hw_addr_set(dev, (u8 *)addr); sis190_set_rgmii(tp, sis190_read_eeprom(ioaddr, EEPROMInfo)); diff --git a/drivers/net/ethernet/sis/sis900.c b/drivers/net/ethernet/sis/sis900.c index d105779ba3b2..3f5717a1874f 100644 --- a/drivers/net/ethernet/sis/sis900.c +++ b/drivers/net/ethernet/sis/sis900.c @@ -258,6 +258,7 @@ static int sis900_get_mac_addr(struct pci_dev *pci_dev, { struct sis900_private *sis_priv = netdev_priv(net_dev); void __iomem *ioaddr = sis_priv->ioaddr; + u16 addr[ETH_ALEN / 2]; u16 signature; int i; @@ -271,7 +272,8 @@ static int sis900_get_mac_addr(struct pci_dev *pci_dev, /* get MAC address from EEPROM */ for (i = 0; i < 3; i++) - ((u16 *)(net_dev->dev_addr))[i] = read_eeprom(ioaddr, i+EEPROMMACAddr); + addr[i] = read_eeprom(ioaddr, i+EEPROMMACAddr); + eth_hw_addr_set(net_dev, (u8 *)addr); return 1; } @@ -331,6 +333,7 @@ static int sis635_get_mac_addr(struct pci_dev *pci_dev, { struct sis900_private *sis_priv = netdev_priv(net_dev); void __iomem *ioaddr = sis_priv->ioaddr; + u16 addr[ETH_ALEN / 2]; u32 rfcrSave; u32 i; @@ -345,8 +348,9 @@ static int sis635_get_mac_addr(struct pci_dev *pci_dev, /* load MAC addr to filter data register */ for (i = 0 ; i < 3 ; i++) { sw32(rfcr, (i << RFADDR_shift)); - *( ((u16 *)net_dev->dev_addr) + i) = sr16(rfdr); + addr[i] = sr16(rfdr); } + eth_hw_addr_set(net_dev, (u8 *)addr); /* enable packet filtering */ sw32(rfcr, rfcrSave | RFEN); @@ -375,17 +379,18 @@ static int sis96x_get_mac_addr(struct pci_dev *pci_dev, { struct sis900_private *sis_priv = netdev_priv(net_dev); void __iomem *ioaddr = sis_priv->ioaddr; + u16 addr[ETH_ALEN / 2]; int wait, rc = 0; sw32(mear, EEREQ); for (wait = 0; wait < 2000; wait++) { if (sr32(mear) & EEGNT) { - u16 *mac = (u16 *)net_dev->dev_addr; int i; /* get MAC address from EEPROM */ for (i = 0; i < 3; i++) - mac[i] = read_eeprom(ioaddr, i + EEPROMMACAddr); + addr[i] = read_eeprom(ioaddr, i + EEPROMMACAddr); + eth_hw_addr_set(net_dev, (u8 *)addr); rc = 1; break; diff --git a/drivers/net/ethernet/smsc/epic100.c b/drivers/net/ethernet/smsc/epic100.c index 44daf79a8f97..a0654e88444c 100644 --- a/drivers/net/ethernet/smsc/epic100.c +++ b/drivers/net/ethernet/smsc/epic100.c @@ -325,6 +325,7 @@ static int epic_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) struct net_device *dev; struct epic_private *ep; int i, ret, option = 0, duplex = 0; + __le16 addr[ETH_ALEN / 2]; void *ring_space; dma_addr_t ring_dma; @@ -416,7 +417,8 @@ static int epic_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) /* Note: the '175 does not have a serial EEPROM. */ for (i = 0; i < 3; i++) - ((__le16 *)dev->dev_addr)[i] = cpu_to_le16(er16(LAN0 + i*4)); + addr[i] = cpu_to_le16(er16(LAN0 + i*4)); + eth_hw_addr_set(dev, (u8 *)addr); if (debug > 2) { dev_dbg(&pdev->dev, "EEPROM contents:\n"); From ba530fea8ca1b57ee71d4e62f287a5d7ed92f789 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 13 Oct 2021 13:54:50 -0700 Subject: [PATCH 172/440] ethernet: remove random_ether_addr() random_ether_addr() was the original name of the helper which was kept for backward compatibility (?) after the rename in commit 0a4dd594982a ("etherdevice: Rename random_ether_addr to eth_random_addr"). We have a single random_ether_addr() caller left in tree while there are 70 callers of eth_random_addr(). Time to drop this define. Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20211013205450.328092-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/am65-cpsw-nuss.c | 2 +- include/linux/etherdevice.h | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index 6904bfaa5777..c092cb61416a 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -1918,7 +1918,7 @@ static int am65_cpsw_nuss_init_slave_ports(struct am65_cpsw_common *common) port->port_id, port->slave.mac_addr); if (!is_valid_ether_addr(port->slave.mac_addr)) { - random_ether_addr(port->slave.mac_addr); + eth_random_addr(port->slave.mac_addr); dev_err(dev, "Use random MAC address\n"); } } diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index 76f7ff684cbf..23681c3d3b8a 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -234,8 +234,6 @@ static inline void eth_random_addr(u8 *addr) addr[0] |= 0x02; /* set local assignment bit (IEEE802) */ } -#define random_ether_addr(addr) eth_random_addr(addr) - /** * eth_broadcast_addr - Assign broadcast address * @addr: Pointer to a six-byte array containing the Ethernet address From 7463acfbe52ae8b7e0ea6890c1886b3f8ba8bddd Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Fri, 8 Oct 2021 22:06:01 +0200 Subject: [PATCH 173/440] netfilter: Rename ingress hook include file Prepare for addition of a netfilter egress hook by renaming to . The egress hook also necessitates a refactoring of the include file, but that is done in a separate commit to ease reviewing. No functional change intended. Signed-off-by: Lukas Wunner Signed-off-by: Pablo Neira Ayuso --- include/linux/{netfilter_ingress.h => netfilter_netdev.h} | 0 net/core/dev.c | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename include/linux/{netfilter_ingress.h => netfilter_netdev.h} (100%) diff --git a/include/linux/netfilter_ingress.h b/include/linux/netfilter_netdev.h similarity index 100% rename from include/linux/netfilter_ingress.h rename to include/linux/netfilter_netdev.h diff --git a/net/core/dev.c b/net/core/dev.c index 16ab09b6a7f8..0fd3c6490e06 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -140,7 +140,7 @@ #include #include #include -#include +#include #include #include #include From 17d20784223d52bf1671f984c9e8d5d9b8ea171b Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Fri, 8 Oct 2021 22:06:02 +0200 Subject: [PATCH 174/440] netfilter: Generalize ingress hook include file Prepare for addition of a netfilter egress hook by generalizing the ingress hook include file. No functional change intended. Signed-off-by: Lukas Wunner Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_netdev.h | 20 +++++++++++--------- net/core/dev.c | 2 +- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/include/linux/netfilter_netdev.h b/include/linux/netfilter_netdev.h index a13774be2eb5..5812b0fb0278 100644 --- a/include/linux/netfilter_netdev.h +++ b/include/linux/netfilter_netdev.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _NETFILTER_INGRESS_H_ -#define _NETFILTER_INGRESS_H_ +#ifndef _NETFILTER_NETDEV_H_ +#define _NETFILTER_NETDEV_H_ #include #include @@ -38,10 +38,6 @@ static inline int nf_hook_ingress(struct sk_buff *skb) return ret; } -static inline void nf_hook_ingress_init(struct net_device *dev) -{ - RCU_INIT_POINTER(dev->nf_hooks_ingress, NULL); -} #else /* CONFIG_NETFILTER_INGRESS */ static inline int nf_hook_ingress_active(struct sk_buff *skb) { @@ -52,7 +48,13 @@ static inline int nf_hook_ingress(struct sk_buff *skb) { return 0; } - -static inline void nf_hook_ingress_init(struct net_device *dev) {} #endif /* CONFIG_NETFILTER_INGRESS */ -#endif /* _NETFILTER_INGRESS_H_ */ + +static inline void nf_hook_netdev_init(struct net_device *dev) +{ +#ifdef CONFIG_NETFILTER_INGRESS + RCU_INIT_POINTER(dev->nf_hooks_ingress, NULL); +#endif +} + +#endif /* _NETFILTER_NETDEV_H_ */ diff --git a/net/core/dev.c b/net/core/dev.c index 0fd3c6490e06..e4c683029c61 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -10867,7 +10867,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, if (!dev->ethtool_ops) dev->ethtool_ops = &default_ethtool_ops; - nf_hook_ingress_init(dev); + nf_hook_netdev_init(dev); return dev; From 42df6e1d221dddc0f2acf2be37e68d553ad65f96 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Fri, 8 Oct 2021 22:06:03 +0200 Subject: [PATCH 175/440] netfilter: Introduce egress hook MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Support classifying packets with netfilter on egress to satisfy user requirements such as: * outbound security policies for containers (Laura) * filtering and mangling intra-node Direct Server Return (DSR) traffic on a load balancer (Laura) * filtering locally generated traffic coming in through AF_PACKET, such as local ARP traffic generated for clustering purposes or DHCP (Laura; the AF_PACKET plumbing is contained in a follow-up commit) * L2 filtering from ingress and egress for AVB (Audio Video Bridging) and gPTP with nftables (Pablo) * in the future: in-kernel NAT64/NAT46 (Pablo) The egress hook introduced herein complements the ingress hook added by commit e687ad60af09 ("netfilter: add netfilter ingress hook after handle_ing() under unique static key"). A patch for nftables to hook up egress rules from user space has been submitted separately, so users may immediately take advantage of the feature. Alternatively or in addition to netfilter, packets can be classified with traffic control (tc). On ingress, packets are classified first by tc, then by netfilter. On egress, the order is reversed for symmetry. Conceptually, tc and netfilter can be thought of as layers, with netfilter layered above tc. Traffic control is capable of redirecting packets to another interface (man 8 tc-mirred). E.g., an ingress packet may be redirected from the host namespace to a container via a veth connection: tc ingress (host) -> tc egress (veth host) -> tc ingress (veth container) In this case, netfilter egress classifying is not performed when leaving the host namespace! That's because the packet is still on the tc layer. If tc redirects the packet to a physical interface in the host namespace such that it leaves the system, the packet is never subjected to netfilter egress classifying. That is only logical since it hasn't passed through netfilter ingress classifying either. Packets can alternatively be redirected at the netfilter layer using nft fwd. Such a packet *is* subjected to netfilter egress classifying since it has reached the netfilter layer. Internally, the skb->nf_skip_egress flag controls whether netfilter is invoked on egress by __dev_queue_xmit(). Because __dev_queue_xmit() may be called recursively by tunnel drivers such as vxlan, the flag is reverted to false after sch_handle_egress(). This ensures that netfilter is applied both on the overlay and underlying network. Interaction between tc and netfilter is possible by setting and querying skb->mark. If netfilter egress classifying is not enabled on any interface, it is patched out of the data path by way of a static_key and doesn't make a performance difference that is discernible from noise: Before: 1537 1538 1538 1537 1538 1537 Mb/sec After: 1536 1534 1539 1539 1539 1540 Mb/sec Before + tc accept: 1418 1418 1418 1419 1419 1418 Mb/sec After + tc accept: 1419 1424 1418 1419 1422 1420 Mb/sec Before + tc drop: 1620 1619 1619 1619 1620 1620 Mb/sec After + tc drop: 1616 1624 1625 1624 1622 1619 Mb/sec When netfilter egress classifying is enabled on at least one interface, a minimal performance penalty is incurred for every egress packet, even if the interface it's transmitted over doesn't have any netfilter egress rules configured. That is caused by checking dev->nf_hooks_egress against NULL. Measurements were performed on a Core i7-3615QM. Commands to reproduce: ip link add dev foo type dummy ip link set dev foo up modprobe pktgen echo "add_device foo" > /proc/net/pktgen/kpktgend_3 samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh -i foo -n 400000000 -m "11:11:11:11:11:11" -d 1.1.1.1 Accept all traffic with tc: tc qdisc add dev foo clsact tc filter add dev foo egress bpf da bytecode '1,6 0 0 0,' Drop all traffic with tc: tc qdisc add dev foo clsact tc filter add dev foo egress bpf da bytecode '1,6 0 0 2,' Apply this patch when measuring packet drops to avoid errors in dmesg: https://lore.kernel.org/netdev/a73dda33-57f4-95d8-ea51-ed483abd6a7a@iogearbox.net/ Signed-off-by: Lukas Wunner Cc: Laura García Liébana Cc: John Fastabend Cc: Daniel Borkmann Cc: Alexei Starovoitov Cc: Eric Dumazet Cc: Thomas Graf Signed-off-by: Pablo Neira Ayuso --- drivers/net/ifb.c | 3 ++ include/linux/netdevice.h | 4 ++ include/linux/netfilter_netdev.h | 86 ++++++++++++++++++++++++++++++++ include/linux/skbuff.h | 4 ++ include/uapi/linux/netfilter.h | 1 + net/core/dev.c | 15 +++++- net/netfilter/Kconfig | 11 ++++ net/netfilter/core.c | 34 +++++++++++-- net/netfilter/nfnetlink_hook.c | 16 ++++-- net/netfilter/nft_chain_filter.c | 4 +- 10 files changed, 168 insertions(+), 10 deletions(-) diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c index e9258a9f3702..2c319dd27f29 100644 --- a/drivers/net/ifb.c +++ b/drivers/net/ifb.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -75,8 +76,10 @@ static void ifb_ri_tasklet(struct tasklet_struct *t) } while ((skb = __skb_dequeue(&txp->tq)) != NULL) { + /* Skip tc and netfilter to prevent redirection loop. */ skb->redirected = 0; skb->tc_skip_classify = 1; + nf_skip_egress(skb, true); u64_stats_update_begin(&txp->tsync); txp->tx_packets++; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d79163208dfd..e9a48068f306 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1861,6 +1861,7 @@ enum netdev_ml_priv_type { * @xps_maps: XXX: need comments on this one * @miniq_egress: clsact qdisc specific data for * egress processing + * @nf_hooks_egress: netfilter hooks executed for egress packets * @qdisc_hash: qdisc hash table * @watchdog_timeo: Represents the timeout that is used by * the watchdog (see dev_watchdog()) @@ -2161,6 +2162,9 @@ struct net_device { #ifdef CONFIG_NET_CLS_ACT struct mini_Qdisc __rcu *miniq_egress; #endif +#ifdef CONFIG_NETFILTER_EGRESS + struct nf_hook_entries __rcu *nf_hooks_egress; +#endif #ifdef CONFIG_NET_SCHED DECLARE_HASHTABLE (qdisc_hash, 4); diff --git a/include/linux/netfilter_netdev.h b/include/linux/netfilter_netdev.h index 5812b0fb0278..b71b57a83bb4 100644 --- a/include/linux/netfilter_netdev.h +++ b/include/linux/netfilter_netdev.h @@ -50,11 +50,97 @@ static inline int nf_hook_ingress(struct sk_buff *skb) } #endif /* CONFIG_NETFILTER_INGRESS */ +#ifdef CONFIG_NETFILTER_EGRESS +static inline bool nf_hook_egress_active(void) +{ +#ifdef CONFIG_JUMP_LABEL + if (!static_key_false(&nf_hooks_needed[NFPROTO_NETDEV][NF_NETDEV_EGRESS])) + return false; +#endif + return true; +} + +/** + * nf_hook_egress - classify packets before transmission + * @skb: packet to be classified + * @rc: result code which shall be returned by __dev_queue_xmit() on failure + * @dev: netdev whose egress hooks shall be applied to @skb + * + * Returns @skb on success or %NULL if the packet was consumed or filtered. + * Caller must hold rcu_read_lock. + * + * On ingress, packets are classified first by tc, then by netfilter. + * On egress, the order is reversed for symmetry. Conceptually, tc and + * netfilter can be thought of as layers, with netfilter layered above tc: + * When tc redirects a packet to another interface, netfilter is not applied + * because the packet is on the tc layer. + * + * The nf_skip_egress flag controls whether netfilter is applied on egress. + * It is updated by __netif_receive_skb_core() and __dev_queue_xmit() when the + * packet passes through tc and netfilter. Because __dev_queue_xmit() may be + * called recursively by tunnel drivers such as vxlan, the flag is reverted to + * false after sch_handle_egress(). This ensures that netfilter is applied + * both on the overlay and underlying network. + */ +static inline struct sk_buff *nf_hook_egress(struct sk_buff *skb, int *rc, + struct net_device *dev) +{ + struct nf_hook_entries *e; + struct nf_hook_state state; + int ret; + +#ifdef CONFIG_NETFILTER_SKIP_EGRESS + if (skb->nf_skip_egress) + return skb; +#endif + + e = rcu_dereference(dev->nf_hooks_egress); + if (!e) + return skb; + + nf_hook_state_init(&state, NF_NETDEV_EGRESS, + NFPROTO_NETDEV, dev, NULL, NULL, + dev_net(dev), NULL); + ret = nf_hook_slow(skb, &state, e, 0); + + if (ret == 1) { + return skb; + } else if (ret < 0) { + *rc = NET_XMIT_DROP; + return NULL; + } else { /* ret == 0 */ + *rc = NET_XMIT_SUCCESS; + return NULL; + } +} +#else /* CONFIG_NETFILTER_EGRESS */ +static inline bool nf_hook_egress_active(void) +{ + return false; +} + +static inline struct sk_buff *nf_hook_egress(struct sk_buff *skb, int *rc, + struct net_device *dev) +{ + return skb; +} +#endif /* CONFIG_NETFILTER_EGRESS */ + +static inline void nf_skip_egress(struct sk_buff *skb, bool skip) +{ +#ifdef CONFIG_NETFILTER_SKIP_EGRESS + skb->nf_skip_egress = skip; +#endif +} + static inline void nf_hook_netdev_init(struct net_device *dev) { #ifdef CONFIG_NETFILTER_INGRESS RCU_INIT_POINTER(dev->nf_hooks_ingress, NULL); #endif +#ifdef CONFIG_NETFILTER_EGRESS + RCU_INIT_POINTER(dev->nf_hooks_egress, NULL); +#endif } #endif /* _NETFILTER_NETDEV_H_ */ diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 841e2f0f5240..cb96f1e6460c 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -652,6 +652,7 @@ typedef unsigned char *sk_buff_data_t; * @tc_at_ingress: used within tc_classify to distinguish in/egress * @redirected: packet was redirected by packet classifier * @from_ingress: packet was redirected from the ingress path + * @nf_skip_egress: packet shall skip nf egress - see netfilter_netdev.h * @peeked: this packet has been seen already, so stats have been * done for it, don't do them again * @nf_trace: netfilter packet trace flag @@ -868,6 +869,9 @@ struct sk_buff { #ifdef CONFIG_NET_REDIRECT __u8 from_ingress:1; #endif +#ifdef CONFIG_NETFILTER_SKIP_EGRESS + __u8 nf_skip_egress:1; +#endif #ifdef CONFIG_TLS_DEVICE __u8 decrypted:1; #endif diff --git a/include/uapi/linux/netfilter.h b/include/uapi/linux/netfilter.h index ef9a44286e23..53411ccc69db 100644 --- a/include/uapi/linux/netfilter.h +++ b/include/uapi/linux/netfilter.h @@ -51,6 +51,7 @@ enum nf_inet_hooks { enum nf_dev_hooks { NF_NETDEV_INGRESS, + NF_NETDEV_EGRESS, NF_NETDEV_NUMHOOKS }; diff --git a/net/core/dev.c b/net/core/dev.c index e4c683029c61..09d74798b440 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3920,6 +3920,7 @@ EXPORT_SYMBOL(dev_loopback_xmit); static struct sk_buff * sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev) { +#ifdef CONFIG_NET_CLS_ACT struct mini_Qdisc *miniq = rcu_dereference_bh(dev->miniq_egress); struct tcf_result cl_res; @@ -3955,6 +3956,7 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev) default: break; } +#endif /* CONFIG_NET_CLS_ACT */ return skb; } @@ -4148,13 +4150,20 @@ static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev) qdisc_pkt_len_init(skb); #ifdef CONFIG_NET_CLS_ACT skb->tc_at_ingress = 0; -# ifdef CONFIG_NET_EGRESS +#endif +#ifdef CONFIG_NET_EGRESS if (static_branch_unlikely(&egress_needed_key)) { + if (nf_hook_egress_active()) { + skb = nf_hook_egress(skb, &rc, dev); + if (!skb) + goto out; + } + nf_skip_egress(skb, true); skb = sch_handle_egress(skb, &rc, dev); if (!skb) goto out; + nf_skip_egress(skb, false); } -# endif #endif /* If device/qdisc don't need skb->dst, release it right now while * its hot in this cpu cache. @@ -5296,6 +5305,7 @@ skip_taps: if (static_branch_unlikely(&ingress_needed_key)) { bool another = false; + nf_skip_egress(skb, true); skb = sch_handle_ingress(skb, &pt_prev, &ret, orig_dev, &another); if (another) @@ -5303,6 +5313,7 @@ skip_taps: if (!skb) goto out; + nf_skip_egress(skb, false); if (nf_ingress(skb, &pt_prev, &ret, orig_dev) < 0) goto out; } diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 54395266339d..49c9fae9c62c 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -10,6 +10,17 @@ config NETFILTER_INGRESS This allows you to classify packets from ingress using the Netfilter infrastructure. +config NETFILTER_EGRESS + bool "Netfilter egress support" + default y + select NET_EGRESS + help + This allows you to classify packets before transmission using the + Netfilter infrastructure. + +config NETFILTER_SKIP_EGRESS + def_bool NETFILTER_EGRESS && (NET_CLS_ACT || IFB) + config NETFILTER_NETLINK tristate diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 63d032191e62..3a32a813fcde 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -316,6 +316,12 @@ nf_hook_entry_head(struct net *net, int pf, unsigned int hooknum, if (dev && dev_net(dev) == net) return &dev->nf_hooks_ingress; } +#endif +#ifdef CONFIG_NETFILTER_EGRESS + if (hooknum == NF_NETDEV_EGRESS) { + if (dev && dev_net(dev) == net) + return &dev->nf_hooks_egress; + } #endif WARN_ON_ONCE(1); return NULL; @@ -344,6 +350,11 @@ static inline bool nf_ingress_hook(const struct nf_hook_ops *reg, int pf) return false; } +static inline bool nf_egress_hook(const struct nf_hook_ops *reg, int pf) +{ + return pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_EGRESS; +} + static void nf_static_key_inc(const struct nf_hook_ops *reg, int pf) { #ifdef CONFIG_JUMP_LABEL @@ -383,9 +394,18 @@ static int __nf_register_net_hook(struct net *net, int pf, switch (pf) { case NFPROTO_NETDEV: - err = nf_ingress_check(net, reg, NF_NETDEV_INGRESS); - if (err < 0) - return err; +#ifndef CONFIG_NETFILTER_INGRESS + if (reg->hooknum == NF_NETDEV_INGRESS) + return -EOPNOTSUPP; +#endif +#ifndef CONFIG_NETFILTER_EGRESS + if (reg->hooknum == NF_NETDEV_EGRESS) + return -EOPNOTSUPP; +#endif + if ((reg->hooknum != NF_NETDEV_INGRESS && + reg->hooknum != NF_NETDEV_EGRESS) || + !reg->dev || dev_net(reg->dev) != net) + return -EINVAL; break; case NFPROTO_INET: if (reg->hooknum != NF_INET_INGRESS) @@ -417,6 +437,10 @@ static int __nf_register_net_hook(struct net *net, int pf, #ifdef CONFIG_NETFILTER_INGRESS if (nf_ingress_hook(reg, pf)) net_inc_ingress_queue(); +#endif +#ifdef CONFIG_NETFILTER_EGRESS + if (nf_egress_hook(reg, pf)) + net_inc_egress_queue(); #endif nf_static_key_inc(reg, pf); @@ -474,6 +498,10 @@ static void __nf_unregister_net_hook(struct net *net, int pf, #ifdef CONFIG_NETFILTER_INGRESS if (nf_ingress_hook(reg, pf)) net_dec_ingress_queue(); +#endif +#ifdef CONFIG_NETFILTER_EGRESS + if (nf_egress_hook(reg, pf)) + net_dec_egress_queue(); #endif nf_static_key_dec(reg, pf); } else { diff --git a/net/netfilter/nfnetlink_hook.c b/net/netfilter/nfnetlink_hook.c index f554e2ea32ee..d5c719c9e36c 100644 --- a/net/netfilter/nfnetlink_hook.c +++ b/net/netfilter/nfnetlink_hook.c @@ -185,7 +185,7 @@ static const struct nf_hook_entries * nfnl_hook_entries_head(u8 pf, unsigned int hook, struct net *net, const char *dev) { const struct nf_hook_entries *hook_head = NULL; -#ifdef CONFIG_NETFILTER_INGRESS +#if defined(CONFIG_NETFILTER_INGRESS) || defined(CONFIG_NETFILTER_EGRESS) struct net_device *netdev; #endif @@ -221,9 +221,9 @@ nfnl_hook_entries_head(u8 pf, unsigned int hook, struct net *net, const char *de hook_head = rcu_dereference(net->nf.hooks_decnet[hook]); break; #endif -#ifdef CONFIG_NETFILTER_INGRESS +#if defined(CONFIG_NETFILTER_INGRESS) || defined(CONFIG_NETFILTER_EGRESS) case NFPROTO_NETDEV: - if (hook != NF_NETDEV_INGRESS) + if (hook >= NF_NETDEV_NUMHOOKS) return ERR_PTR(-EOPNOTSUPP); if (!dev) @@ -233,7 +233,15 @@ nfnl_hook_entries_head(u8 pf, unsigned int hook, struct net *net, const char *de if (!netdev) return ERR_PTR(-ENODEV); - return rcu_dereference(netdev->nf_hooks_ingress); +#ifdef CONFIG_NETFILTER_INGRESS + if (hook == NF_NETDEV_INGRESS) + return rcu_dereference(netdev->nf_hooks_ingress); +#endif +#ifdef CONFIG_NETFILTER_EGRESS + if (hook == NF_NETDEV_EGRESS) + return rcu_dereference(netdev->nf_hooks_egress); +#endif + fallthrough; #endif default: return ERR_PTR(-EPROTONOSUPPORT); diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c index 5b02408a920b..680fe557686e 100644 --- a/net/netfilter/nft_chain_filter.c +++ b/net/netfilter/nft_chain_filter.c @@ -310,9 +310,11 @@ static const struct nft_chain_type nft_chain_filter_netdev = { .name = "filter", .type = NFT_CHAIN_T_DEFAULT, .family = NFPROTO_NETDEV, - .hook_mask = (1 << NF_NETDEV_INGRESS), + .hook_mask = (1 << NF_NETDEV_INGRESS) | + (1 << NF_NETDEV_EGRESS), .hooks = { [NF_NETDEV_INGRESS] = nft_do_chain_netdev, + [NF_NETDEV_EGRESS] = nft_do_chain_netdev, }, }; From 0d7308c0ff5fa7c77164ab8491a91e0589da66e3 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 8 Oct 2021 22:06:04 +0200 Subject: [PATCH 176/440] af_packet: Introduce egress hook Add egress hook for AF_PACKET sockets that have the PACKET_QDISC_BYPASS socket option set to on, which allows packets to escape without being filtered in the egress path. This patch only updates the AF_PACKET path, it does not update dev_direct_xmit() so the XDP infrastructure has a chance to bypass Netfilter. [lukas: acquire rcu_read_lock, fix typos, rebase] Signed-off-by: Lukas Wunner Signed-off-by: Pablo Neira Ayuso --- net/packet/af_packet.c | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 2a2bc64f75cf..46943a18a10d 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -91,6 +91,7 @@ #endif #include #include +#include #include "internal.h" @@ -241,8 +242,42 @@ struct packet_skb_cb { static void __fanout_unlink(struct sock *sk, struct packet_sock *po); static void __fanout_link(struct sock *sk, struct packet_sock *po); +#ifdef CONFIG_NETFILTER_EGRESS +static noinline struct sk_buff *nf_hook_direct_egress(struct sk_buff *skb) +{ + struct sk_buff *next, *head = NULL, *tail; + int rc; + + rcu_read_lock(); + for (; skb != NULL; skb = next) { + next = skb->next; + skb_mark_not_on_list(skb); + + if (!nf_hook_egress(skb, &rc, skb->dev)) + continue; + + if (!head) + head = skb; + else + tail->next = skb; + + tail = skb; + } + rcu_read_unlock(); + + return head; +} +#endif + static int packet_direct_xmit(struct sk_buff *skb) { +#ifdef CONFIG_NETFILTER_EGRESS + if (nf_hook_egress_active()) { + skb = nf_hook_direct_egress(skb); + if (!skb) + return NET_XMIT_DROP; + } +#endif return dev_direct_xmit(skb, packet_pick_tx_queue(skb)); } From 8844e01062ddd8196c4550df9803cc1835d123c2 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 11 Oct 2021 17:15:10 +0200 Subject: [PATCH 177/440] netfilter: iptables: allow use of ipt_do_table as hookfn This is possible now that the xt_table structure is passed in via *priv. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_ipv4/ip_tables.h | 6 +++--- net/ipv4/netfilter/ip_tables.c | 7 ++++--- net/ipv4/netfilter/iptable_filter.c | 9 +-------- net/ipv4/netfilter/iptable_mangle.c | 8 ++++---- net/ipv4/netfilter/iptable_nat.c | 15 ++++----------- net/ipv4/netfilter/iptable_raw.c | 10 +--------- net/ipv4/netfilter/iptable_security.c | 9 +-------- 7 files changed, 18 insertions(+), 46 deletions(-) diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h index 8d09bfe850dc..132b0e4a6d4d 100644 --- a/include/linux/netfilter_ipv4/ip_tables.h +++ b/include/linux/netfilter_ipv4/ip_tables.h @@ -63,9 +63,9 @@ struct ipt_error { } extern void *ipt_alloc_initial_table(const struct xt_table *); -extern unsigned int ipt_do_table(struct sk_buff *skb, - const struct nf_hook_state *state, - struct xt_table *table); +extern unsigned int ipt_do_table(void *priv, + struct sk_buff *skb, + const struct nf_hook_state *state); #ifdef CONFIG_NETFILTER_XTABLES_COMPAT #include diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 13acb687c19a..2ed7c58b471a 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -222,10 +222,11 @@ struct ipt_entry *ipt_next_entry(const struct ipt_entry *entry) /* Returns one of the generic firewall policies, like NF_ACCEPT. */ unsigned int -ipt_do_table(struct sk_buff *skb, - const struct nf_hook_state *state, - struct xt_table *table) +ipt_do_table(void *priv, + struct sk_buff *skb, + const struct nf_hook_state *state) { + const struct xt_table *table = priv; unsigned int hook = state->hook; static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); const struct iphdr *ip; diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index 0eb0e2ab9bfc..b9062f4552ac 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c @@ -28,13 +28,6 @@ static const struct xt_table packet_filter = { .priority = NF_IP_PRI_FILTER, }; -static unsigned int -iptable_filter_hook(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state) -{ - return ipt_do_table(skb, state, priv); -} - static struct nf_hook_ops *filter_ops __read_mostly; /* Default to forward because I got too much mail already. */ @@ -90,7 +83,7 @@ static int __init iptable_filter_init(void) if (ret < 0) return ret; - filter_ops = xt_hook_ops_alloc(&packet_filter, iptable_filter_hook); + filter_ops = xt_hook_ops_alloc(&packet_filter, ipt_do_table); if (IS_ERR(filter_ops)) { xt_unregister_template(&packet_filter); return PTR_ERR(filter_ops); diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index 40417a3f930b..3abb430af9e6 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -34,7 +34,7 @@ static const struct xt_table packet_mangler = { }; static unsigned int -ipt_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state, void *priv) +ipt_mangle_out(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { unsigned int ret; const struct iphdr *iph; @@ -50,7 +50,7 @@ ipt_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state, void *pri daddr = iph->daddr; tos = iph->tos; - ret = ipt_do_table(skb, state, priv); + ret = ipt_do_table(priv, skb, state); /* Reroute for ANY change. */ if (ret != NF_DROP && ret != NF_STOLEN) { iph = ip_hdr(skb); @@ -75,8 +75,8 @@ iptable_mangle_hook(void *priv, const struct nf_hook_state *state) { if (state->hook == NF_INET_LOCAL_OUT) - return ipt_mangle_out(skb, state, priv); - return ipt_do_table(skb, state, priv); + return ipt_mangle_out(priv, skb, state); + return ipt_do_table(priv, skb, state); } static struct nf_hook_ops *mangle_ops __read_mostly; diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c index 45d7e072e6a5..56f6ecc43451 100644 --- a/net/ipv4/netfilter/iptable_nat.c +++ b/net/ipv4/netfilter/iptable_nat.c @@ -29,34 +29,27 @@ static const struct xt_table nf_nat_ipv4_table = { .af = NFPROTO_IPV4, }; -static unsigned int iptable_nat_do_chain(void *priv, - struct sk_buff *skb, - const struct nf_hook_state *state) -{ - return ipt_do_table(skb, state, priv); -} - static const struct nf_hook_ops nf_nat_ipv4_ops[] = { { - .hook = iptable_nat_do_chain, + .hook = ipt_do_table, .pf = NFPROTO_IPV4, .hooknum = NF_INET_PRE_ROUTING, .priority = NF_IP_PRI_NAT_DST, }, { - .hook = iptable_nat_do_chain, + .hook = ipt_do_table, .pf = NFPROTO_IPV4, .hooknum = NF_INET_POST_ROUTING, .priority = NF_IP_PRI_NAT_SRC, }, { - .hook = iptable_nat_do_chain, + .hook = ipt_do_table, .pf = NFPROTO_IPV4, .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP_PRI_NAT_DST, }, { - .hook = iptable_nat_do_chain, + .hook = ipt_do_table, .pf = NFPROTO_IPV4, .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP_PRI_NAT_SRC, diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index 8265c6765705..ca5e5b21587c 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c @@ -32,14 +32,6 @@ static const struct xt_table packet_raw_before_defrag = { .priority = NF_IP_PRI_RAW_BEFORE_DEFRAG, }; -/* The work comes in here from netfilter.c. */ -static unsigned int -iptable_raw_hook(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state) -{ - return ipt_do_table(skb, state, priv); -} - static struct nf_hook_ops *rawtable_ops __read_mostly; static int iptable_raw_table_init(struct net *net) @@ -90,7 +82,7 @@ static int __init iptable_raw_init(void) if (ret < 0) return ret; - rawtable_ops = xt_hook_ops_alloc(table, iptable_raw_hook); + rawtable_ops = xt_hook_ops_alloc(table, ipt_do_table); if (IS_ERR(rawtable_ops)) { xt_unregister_template(table); return PTR_ERR(rawtable_ops); diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c index f519162a2fa5..d885443cb267 100644 --- a/net/ipv4/netfilter/iptable_security.c +++ b/net/ipv4/netfilter/iptable_security.c @@ -33,13 +33,6 @@ static const struct xt_table security_table = { .priority = NF_IP_PRI_SECURITY, }; -static unsigned int -iptable_security_hook(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state) -{ - return ipt_do_table(skb, state, priv); -} - static struct nf_hook_ops *sectbl_ops __read_mostly; static int iptable_security_table_init(struct net *net) @@ -78,7 +71,7 @@ static int __init iptable_security_init(void) if (ret < 0) return ret; - sectbl_ops = xt_hook_ops_alloc(&security_table, iptable_security_hook); + sectbl_ops = xt_hook_ops_alloc(&security_table, ipt_do_table); if (IS_ERR(sectbl_ops)) { xt_unregister_template(&security_table); return PTR_ERR(sectbl_ops); From e8d225b6002673366abc2e40e30c991bdc8d62ca Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 11 Oct 2021 17:15:12 +0200 Subject: [PATCH 178/440] netfilter: arp_tables: allow use of arpt_do_table as hookfn This is possible now that the xt_table structure is passed in via *priv. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_arp/arp_tables.h | 5 ++--- net/ipv4/netfilter/arp_tables.c | 7 ++++--- net/ipv4/netfilter/arptable_filter.c | 10 +--------- 3 files changed, 7 insertions(+), 15 deletions(-) diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h index 4f9a4b3c5892..a40aaf645fa4 100644 --- a/include/linux/netfilter_arp/arp_tables.h +++ b/include/linux/netfilter_arp/arp_tables.h @@ -54,9 +54,8 @@ int arpt_register_table(struct net *net, const struct xt_table *table, const struct nf_hook_ops *ops); void arpt_unregister_table(struct net *net, const char *name); void arpt_unregister_table_pre_exit(struct net *net, const char *name); -extern unsigned int arpt_do_table(struct sk_buff *skb, - const struct nf_hook_state *state, - struct xt_table *table); +extern unsigned int arpt_do_table(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state); #ifdef CONFIG_NETFILTER_XTABLES_COMPAT #include diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index c53f14b94356..ffc0cab7cf18 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -179,10 +179,11 @@ struct arpt_entry *arpt_next_entry(const struct arpt_entry *entry) return (void *)entry + entry->next_offset; } -unsigned int arpt_do_table(struct sk_buff *skb, - const struct nf_hook_state *state, - struct xt_table *table) +unsigned int arpt_do_table(void *priv, + struct sk_buff *skb, + const struct nf_hook_state *state) { + const struct xt_table *table = priv; unsigned int hook = state->hook; static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); unsigned int verdict = NF_DROP; diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c index 3de78416ec76..78cd5ee24448 100644 --- a/net/ipv4/netfilter/arptable_filter.c +++ b/net/ipv4/netfilter/arptable_filter.c @@ -26,14 +26,6 @@ static const struct xt_table packet_filter = { .priority = NF_IP_PRI_FILTER, }; -/* The work comes in here from netfilter.c */ -static unsigned int -arptable_filter_hook(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state) -{ - return arpt_do_table(skb, state, priv); -} - static struct nf_hook_ops *arpfilter_ops __read_mostly; static int arptable_filter_table_init(struct net *net) @@ -72,7 +64,7 @@ static int __init arptable_filter_init(void) if (ret < 0) return ret; - arpfilter_ops = xt_hook_ops_alloc(&packet_filter, arptable_filter_hook); + arpfilter_ops = xt_hook_ops_alloc(&packet_filter, arpt_do_table); if (IS_ERR(arpfilter_ops)) { xt_unregister_template(&packet_filter); return PTR_ERR(arpfilter_ops); From 44b5990e7b463240e4c116c9e8670c67dad960cc Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 11 Oct 2021 17:15:13 +0200 Subject: [PATCH 179/440] netfilter: ip6tables: allow use of ip6t_do_table as hookfn This is possible now that the xt_table structure is passed via *priv. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_ipv6/ip6_tables.h | 5 ++--- net/ipv6/netfilter/ip6_tables.c | 6 +++--- net/ipv6/netfilter/ip6table_filter.c | 10 +--------- net/ipv6/netfilter/ip6table_mangle.c | 8 ++++---- net/ipv6/netfilter/ip6table_nat.c | 15 ++++----------- net/ipv6/netfilter/ip6table_raw.c | 10 +--------- net/ipv6/netfilter/ip6table_security.c | 9 +-------- 7 files changed, 16 insertions(+), 47 deletions(-) diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h index 79e73fd7d965..8b8885a73c76 100644 --- a/include/linux/netfilter_ipv6/ip6_tables.h +++ b/include/linux/netfilter_ipv6/ip6_tables.h @@ -29,9 +29,8 @@ int ip6t_register_table(struct net *net, const struct xt_table *table, const struct nf_hook_ops *ops); void ip6t_unregister_table_pre_exit(struct net *net, const char *name); void ip6t_unregister_table_exit(struct net *net, const char *name); -extern unsigned int ip6t_do_table(struct sk_buff *skb, - const struct nf_hook_state *state, - struct xt_table *table); +extern unsigned int ip6t_do_table(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state); #ifdef CONFIG_NETFILTER_XTABLES_COMPAT #include diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index a579ea14a69b..2d816277f2c5 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -247,10 +247,10 @@ ip6t_next_entry(const struct ip6t_entry *entry) /* Returns one of the generic firewall policies, like NF_ACCEPT. */ unsigned int -ip6t_do_table(struct sk_buff *skb, - const struct nf_hook_state *state, - struct xt_table *table) +ip6t_do_table(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) { + const struct xt_table *table = priv; unsigned int hook = state->hook; static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); /* Initializing verdict to NF_DROP keeps gcc happy. */ diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c index 727ee8097012..df785ebda0ca 100644 --- a/net/ipv6/netfilter/ip6table_filter.c +++ b/net/ipv6/netfilter/ip6table_filter.c @@ -27,14 +27,6 @@ static const struct xt_table packet_filter = { .priority = NF_IP6_PRI_FILTER, }; -/* The work comes in here from netfilter.c. */ -static unsigned int -ip6table_filter_hook(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state) -{ - return ip6t_do_table(skb, state, priv); -} - static struct nf_hook_ops *filter_ops __read_mostly; /* Default to forward because I got too much mail already. */ @@ -90,7 +82,7 @@ static int __init ip6table_filter_init(void) if (ret < 0) return ret; - filter_ops = xt_hook_ops_alloc(&packet_filter, ip6table_filter_hook); + filter_ops = xt_hook_ops_alloc(&packet_filter, ip6t_do_table); if (IS_ERR(filter_ops)) { xt_unregister_template(&packet_filter); return PTR_ERR(filter_ops); diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index 9b518ce37d6a..a88b2ce4a3cb 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -29,7 +29,7 @@ static const struct xt_table packet_mangler = { }; static unsigned int -ip6t_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state, void *priv) +ip6t_mangle_out(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { unsigned int ret; struct in6_addr saddr, daddr; @@ -46,7 +46,7 @@ ip6t_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state, void *pr /* flowlabel and prio (includes version, which shouldn't change either */ flowlabel = *((u_int32_t *)ipv6_hdr(skb)); - ret = ip6t_do_table(skb, state, priv); + ret = ip6t_do_table(priv, skb, state); if (ret != NF_DROP && ret != NF_STOLEN && (!ipv6_addr_equal(&ipv6_hdr(skb)->saddr, &saddr) || @@ -68,8 +68,8 @@ ip6table_mangle_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { if (state->hook == NF_INET_LOCAL_OUT) - return ip6t_mangle_out(skb, state, priv); - return ip6t_do_table(skb, state, priv); + return ip6t_mangle_out(priv, skb, state); + return ip6t_do_table(priv, skb, state); } static struct nf_hook_ops *mangle_ops __read_mostly; diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c index 921c1723a01e..bf3cb3a13600 100644 --- a/net/ipv6/netfilter/ip6table_nat.c +++ b/net/ipv6/netfilter/ip6table_nat.c @@ -31,34 +31,27 @@ static const struct xt_table nf_nat_ipv6_table = { .af = NFPROTO_IPV6, }; -static unsigned int ip6table_nat_do_chain(void *priv, - struct sk_buff *skb, - const struct nf_hook_state *state) -{ - return ip6t_do_table(skb, state, priv); -} - static const struct nf_hook_ops nf_nat_ipv6_ops[] = { { - .hook = ip6table_nat_do_chain, + .hook = ip6t_do_table, .pf = NFPROTO_IPV6, .hooknum = NF_INET_PRE_ROUTING, .priority = NF_IP6_PRI_NAT_DST, }, { - .hook = ip6table_nat_do_chain, + .hook = ip6t_do_table, .pf = NFPROTO_IPV6, .hooknum = NF_INET_POST_ROUTING, .priority = NF_IP6_PRI_NAT_SRC, }, { - .hook = ip6table_nat_do_chain, + .hook = ip6t_do_table, .pf = NFPROTO_IPV6, .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP6_PRI_NAT_DST, }, { - .hook = ip6table_nat_do_chain, + .hook = ip6t_do_table, .pf = NFPROTO_IPV6, .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP6_PRI_NAT_SRC, diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c index 4f2a04af71d3..08861d5d1f4d 100644 --- a/net/ipv6/netfilter/ip6table_raw.c +++ b/net/ipv6/netfilter/ip6table_raw.c @@ -31,14 +31,6 @@ static const struct xt_table packet_raw_before_defrag = { .priority = NF_IP6_PRI_RAW_BEFORE_DEFRAG, }; -/* The work comes in here from netfilter.c. */ -static unsigned int -ip6table_raw_hook(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state) -{ - return ip6t_do_table(skb, state, priv); -} - static struct nf_hook_ops *rawtable_ops __read_mostly; static int ip6table_raw_table_init(struct net *net) @@ -88,7 +80,7 @@ static int __init ip6table_raw_init(void) return ret; /* Register hooks */ - rawtable_ops = xt_hook_ops_alloc(table, ip6table_raw_hook); + rawtable_ops = xt_hook_ops_alloc(table, ip6t_do_table); if (IS_ERR(rawtable_ops)) { xt_unregister_template(table); return PTR_ERR(rawtable_ops); diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c index 931674034d8b..4df14a9bae78 100644 --- a/net/ipv6/netfilter/ip6table_security.c +++ b/net/ipv6/netfilter/ip6table_security.c @@ -32,13 +32,6 @@ static const struct xt_table security_table = { .priority = NF_IP6_PRI_SECURITY, }; -static unsigned int -ip6table_security_hook(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state) -{ - return ip6t_do_table(skb, state, priv); -} - static struct nf_hook_ops *sectbl_ops __read_mostly; static int ip6table_security_table_init(struct net *net) @@ -77,7 +70,7 @@ static int __init ip6table_security_init(void) if (ret < 0) return ret; - sectbl_ops = xt_hook_ops_alloc(&security_table, ip6table_security_hook); + sectbl_ops = xt_hook_ops_alloc(&security_table, ip6t_do_table); if (IS_ERR(sectbl_ops)) { xt_unregister_template(&security_table); return PTR_ERR(sectbl_ops); From f0d6764f7ddbf6d57302155b0f83eadb25ab0f0c Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 11 Oct 2021 17:15:14 +0200 Subject: [PATCH 180/440] netfilter: ebtables: allow use of ebt_do_table as hookfn This is possible now that the xt_table structure is passed via *priv. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_bridge/ebtables.h | 5 ++--- net/bridge/netfilter/ebtable_broute.c | 2 +- net/bridge/netfilter/ebtable_filter.c | 13 +++---------- net/bridge/netfilter/ebtable_nat.c | 12 +++--------- net/bridge/netfilter/ebtables.c | 6 +++--- 5 files changed, 12 insertions(+), 26 deletions(-) diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index 10a01978bc0d..a13296d6c7ce 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -112,9 +112,8 @@ extern int ebt_register_table(struct net *net, const struct nf_hook_ops *ops); extern void ebt_unregister_table(struct net *net, const char *tablename); void ebt_unregister_table_pre_exit(struct net *net, const char *tablename); -extern unsigned int ebt_do_table(struct sk_buff *skb, - const struct nf_hook_state *state, - struct ebt_table *table); +extern unsigned int ebt_do_table(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state); /* True if the hook mask denotes that the rule is in a base chain, * used in the check() functions */ diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c index a7af4eaff17d..1a11064f9990 100644 --- a/net/bridge/netfilter/ebtable_broute.c +++ b/net/bridge/netfilter/ebtable_broute.c @@ -66,7 +66,7 @@ static unsigned int ebt_broute(void *priv, struct sk_buff *skb, NFPROTO_BRIDGE, s->in, NULL, NULL, s->net, NULL); - ret = ebt_do_table(skb, &state, priv); + ret = ebt_do_table(priv, skb, &state); if (ret != NF_DROP) return ret; diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c index c0b121df4a9a..cb949436bc0e 100644 --- a/net/bridge/netfilter/ebtable_filter.c +++ b/net/bridge/netfilter/ebtable_filter.c @@ -58,28 +58,21 @@ static const struct ebt_table frame_filter = { .me = THIS_MODULE, }; -static unsigned int -ebt_filter_hook(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state) -{ - return ebt_do_table(skb, state, priv); -} - static const struct nf_hook_ops ebt_ops_filter[] = { { - .hook = ebt_filter_hook, + .hook = ebt_do_table, .pf = NFPROTO_BRIDGE, .hooknum = NF_BR_LOCAL_IN, .priority = NF_BR_PRI_FILTER_BRIDGED, }, { - .hook = ebt_filter_hook, + .hook = ebt_do_table, .pf = NFPROTO_BRIDGE, .hooknum = NF_BR_FORWARD, .priority = NF_BR_PRI_FILTER_BRIDGED, }, { - .hook = ebt_filter_hook, + .hook = ebt_do_table, .pf = NFPROTO_BRIDGE, .hooknum = NF_BR_LOCAL_OUT, .priority = NF_BR_PRI_FILTER_OTHER, diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c index 4078151c224f..5ee0531ae506 100644 --- a/net/bridge/netfilter/ebtable_nat.c +++ b/net/bridge/netfilter/ebtable_nat.c @@ -58,27 +58,21 @@ static const struct ebt_table frame_nat = { .me = THIS_MODULE, }; -static unsigned int ebt_nat_hook(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state) -{ - return ebt_do_table(skb, state, priv); -} - static const struct nf_hook_ops ebt_ops_nat[] = { { - .hook = ebt_nat_hook, + .hook = ebt_do_table, .pf = NFPROTO_BRIDGE, .hooknum = NF_BR_LOCAL_OUT, .priority = NF_BR_PRI_NAT_DST_OTHER, }, { - .hook = ebt_nat_hook, + .hook = ebt_do_table, .pf = NFPROTO_BRIDGE, .hooknum = NF_BR_POST_ROUTING, .priority = NF_BR_PRI_NAT_SRC, }, { - .hook = ebt_nat_hook, + .hook = ebt_do_table, .pf = NFPROTO_BRIDGE, .hooknum = NF_BR_PRE_ROUTING, .priority = NF_BR_PRI_NAT_DST_BRIDGED, diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 83d1798dfbb4..4a1508a1c566 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -189,10 +189,10 @@ ebt_get_target_c(const struct ebt_entry *e) } /* Do some firewalling */ -unsigned int ebt_do_table(struct sk_buff *skb, - const struct nf_hook_state *state, - struct ebt_table *table) +unsigned int ebt_do_table(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) { + struct ebt_table *table = priv; unsigned int hook = state->hook; int i, nentries; struct ebt_entry *point; From 9dd43a5f4b11b161c9dfcce9391e843e65d6a4cc Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 12 Oct 2021 19:29:56 +0200 Subject: [PATCH 181/440] netfilter: ipvs: prepare for hook function reduction ipvs has multiple one-line wrappers for hooks, compact them. To avoid a large patch make the two most common helpers use the same function signature as hooks. Next patches can then remove the oneline wrappers. Signed-off-by: Florian Westphal Acked-by: Julian Anastasov Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_core.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 128690c512df..5a5deee3425c 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1330,12 +1330,15 @@ drop: * Check if outgoing packet belongs to the established ip_vs_conn. */ static unsigned int -ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int af) +ip_vs_out_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { + struct netns_ipvs *ipvs = net_ipvs(state->net); + unsigned int hooknum = state->hook; struct ip_vs_iphdr iph; struct ip_vs_protocol *pp; struct ip_vs_proto_data *pd; struct ip_vs_conn *cp; + int af = state->pf; struct sock *sk; EnterFunction(11); @@ -1477,7 +1480,7 @@ static unsigned int ip_vs_reply4(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - return ip_vs_out(net_ipvs(state->net), state->hook, skb, AF_INET); + return ip_vs_out_hook(priv, skb, state); } /* @@ -1488,7 +1491,7 @@ static unsigned int ip_vs_local_reply4(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - return ip_vs_out(net_ipvs(state->net), state->hook, skb, AF_INET); + return ip_vs_out_hook(priv, skb, state); } #ifdef CONFIG_IP_VS_IPV6 @@ -1502,7 +1505,7 @@ static unsigned int ip_vs_reply6(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - return ip_vs_out(net_ipvs(state->net), state->hook, skb, AF_INET6); + return ip_vs_out_hook(priv, skb, state); } /* @@ -1513,7 +1516,7 @@ static unsigned int ip_vs_local_reply6(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - return ip_vs_out(net_ipvs(state->net), state->hook, skb, AF_INET6); + return ip_vs_out_hook(priv, skb, state); } #endif @@ -1957,8 +1960,10 @@ out: * and send it on its way... */ static unsigned int -ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int af) +ip_vs_in_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { + struct netns_ipvs *ipvs = net_ipvs(state->net); + unsigned int hooknum = state->hook; struct ip_vs_iphdr iph; struct ip_vs_protocol *pp; struct ip_vs_proto_data *pd; @@ -1966,6 +1971,7 @@ ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int int ret, pkts; int conn_reuse_mode; struct sock *sk; + int af = state->pf; /* Already marked as IPVS request or reply? */ if (skb->ipvs_property) @@ -2145,7 +2151,7 @@ static unsigned int ip_vs_remote_request4(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - return ip_vs_in(net_ipvs(state->net), state->hook, skb, AF_INET); + return ip_vs_in_hook(priv, skb, state); } /* @@ -2156,7 +2162,7 @@ static unsigned int ip_vs_local_request4(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - return ip_vs_in(net_ipvs(state->net), state->hook, skb, AF_INET); + return ip_vs_in_hook(priv, skb, state); } #ifdef CONFIG_IP_VS_IPV6 @@ -2169,7 +2175,7 @@ static unsigned int ip_vs_remote_request6(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - return ip_vs_in(net_ipvs(state->net), state->hook, skb, AF_INET6); + return ip_vs_in_hook(priv, skb, state); } /* @@ -2180,7 +2186,7 @@ static unsigned int ip_vs_local_request6(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - return ip_vs_in(net_ipvs(state->net), state->hook, skb, AF_INET6); + return ip_vs_in_hook(priv, skb, state); } #endif From 8a9941b42de5132eae0cd2c27d5da41024f278a2 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 12 Oct 2021 19:29:57 +0200 Subject: [PATCH 182/440] netfilter: ipvs: remove unneeded output wrappers After earlier patch we can use ip_vs_out_hook directly. Signed-off-by: Florian Westphal Acked-by: Julian Anastasov Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_core.c | 62 ++++----------------------------- 1 file changed, 6 insertions(+), 56 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 5a5deee3425c..2db033455204 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1471,56 +1471,6 @@ ip_vs_out_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *stat return NF_ACCEPT; } -/* - * It is hooked at the NF_INET_FORWARD and NF_INET_LOCAL_IN chain, - * used only for VS/NAT. - * Check if packet is reply for established ip_vs_conn. - */ -static unsigned int -ip_vs_reply4(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state) -{ - return ip_vs_out_hook(priv, skb, state); -} - -/* - * It is hooked at the NF_INET_LOCAL_OUT chain, used only for VS/NAT. - * Check if packet is reply for established ip_vs_conn. - */ -static unsigned int -ip_vs_local_reply4(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state) -{ - return ip_vs_out_hook(priv, skb, state); -} - -#ifdef CONFIG_IP_VS_IPV6 - -/* - * It is hooked at the NF_INET_FORWARD and NF_INET_LOCAL_IN chain, - * used only for VS/NAT. - * Check if packet is reply for established ip_vs_conn. - */ -static unsigned int -ip_vs_reply6(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state) -{ - return ip_vs_out_hook(priv, skb, state); -} - -/* - * It is hooked at the NF_INET_LOCAL_OUT chain, used only for VS/NAT. - * Check if packet is reply for established ip_vs_conn. - */ -static unsigned int -ip_vs_local_reply6(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state) -{ - return ip_vs_out_hook(priv, skb, state); -} - -#endif - static unsigned int ip_vs_try_to_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, @@ -2243,7 +2193,7 @@ ip_vs_forward_icmp_v6(void *priv, struct sk_buff *skb, static const struct nf_hook_ops ip_vs_ops4[] = { /* After packet filtering, change source only for VS/NAT */ { - .hook = ip_vs_reply4, + .hook = ip_vs_out_hook, .pf = NFPROTO_IPV4, .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP_PRI_NAT_SRC - 2, @@ -2259,7 +2209,7 @@ static const struct nf_hook_ops ip_vs_ops4[] = { }, /* Before ip_vs_in, change source only for VS/NAT */ { - .hook = ip_vs_local_reply4, + .hook = ip_vs_out_hook, .pf = NFPROTO_IPV4, .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP_PRI_NAT_DST + 1, @@ -2281,7 +2231,7 @@ static const struct nf_hook_ops ip_vs_ops4[] = { }, /* After packet filtering, change source only for VS/NAT */ { - .hook = ip_vs_reply4, + .hook = ip_vs_out_hook, .pf = NFPROTO_IPV4, .hooknum = NF_INET_FORWARD, .priority = 100, @@ -2292,7 +2242,7 @@ static const struct nf_hook_ops ip_vs_ops4[] = { static const struct nf_hook_ops ip_vs_ops6[] = { /* After packet filtering, change source only for VS/NAT */ { - .hook = ip_vs_reply6, + .hook = ip_vs_out_hook, .pf = NFPROTO_IPV6, .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP6_PRI_NAT_SRC - 2, @@ -2308,7 +2258,7 @@ static const struct nf_hook_ops ip_vs_ops6[] = { }, /* Before ip_vs_in, change source only for VS/NAT */ { - .hook = ip_vs_local_reply6, + .hook = ip_vs_out_hook, .pf = NFPROTO_IPV6, .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP6_PRI_NAT_DST + 1, @@ -2330,7 +2280,7 @@ static const struct nf_hook_ops ip_vs_ops6[] = { }, /* After packet filtering, change source only for VS/NAT */ { - .hook = ip_vs_reply6, + .hook = ip_vs_out_hook, .pf = NFPROTO_IPV6, .hooknum = NF_INET_FORWARD, .priority = 100, From 540ff44b28f0b31d0c74b5e7082b050e1b14b36a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 12 Oct 2021 19:29:58 +0200 Subject: [PATCH 183/440] netfilter: ipvs: remove unneeded input wrappers After earlier patch ip_vs_hook_in can be used directly. Signed-off-by: Florian Westphal Acked-by: Julian Anastasov Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_core.c | 57 +++------------------------------ 1 file changed, 4 insertions(+), 53 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 2db033455204..ad5f5e50547f 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -2093,55 +2093,6 @@ ip_vs_in_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state return ret; } -/* - * AF_INET handler in NF_INET_LOCAL_IN chain - * Schedule and forward packets from remote clients - */ -static unsigned int -ip_vs_remote_request4(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state) -{ - return ip_vs_in_hook(priv, skb, state); -} - -/* - * AF_INET handler in NF_INET_LOCAL_OUT chain - * Schedule and forward packets from local clients - */ -static unsigned int -ip_vs_local_request4(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state) -{ - return ip_vs_in_hook(priv, skb, state); -} - -#ifdef CONFIG_IP_VS_IPV6 - -/* - * AF_INET6 handler in NF_INET_LOCAL_IN chain - * Schedule and forward packets from remote clients - */ -static unsigned int -ip_vs_remote_request6(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state) -{ - return ip_vs_in_hook(priv, skb, state); -} - -/* - * AF_INET6 handler in NF_INET_LOCAL_OUT chain - * Schedule and forward packets from local clients - */ -static unsigned int -ip_vs_local_request6(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state) -{ - return ip_vs_in_hook(priv, skb, state); -} - -#endif - - /* * It is hooked at the NF_INET_FORWARD chain, in order to catch ICMP * related packets destined for 0.0.0.0/0. @@ -2202,7 +2153,7 @@ static const struct nf_hook_ops ip_vs_ops4[] = { * or VS/NAT(change destination), so that filtering rules can be * applied to IPVS. */ { - .hook = ip_vs_remote_request4, + .hook = ip_vs_in_hook, .pf = NFPROTO_IPV4, .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP_PRI_NAT_SRC - 1, @@ -2216,7 +2167,7 @@ static const struct nf_hook_ops ip_vs_ops4[] = { }, /* After mangle, schedule and forward local requests */ { - .hook = ip_vs_local_request4, + .hook = ip_vs_in_hook, .pf = NFPROTO_IPV4, .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP_PRI_NAT_DST + 2, @@ -2251,7 +2202,7 @@ static const struct nf_hook_ops ip_vs_ops6[] = { * or VS/NAT(change destination), so that filtering rules can be * applied to IPVS. */ { - .hook = ip_vs_remote_request6, + .hook = ip_vs_in_hook, .pf = NFPROTO_IPV6, .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP6_PRI_NAT_SRC - 1, @@ -2265,7 +2216,7 @@ static const struct nf_hook_ops ip_vs_ops6[] = { }, /* After mangle, schedule and forward local requests */ { - .hook = ip_vs_local_request6, + .hook = ip_vs_in_hook, .pf = NFPROTO_IPV6, .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP6_PRI_NAT_DST + 2, From c650c35a2506d8eebe8a4d8d263317fba29fe078 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 12 Oct 2021 19:29:59 +0200 Subject: [PATCH 184/440] netfilter: ipvs: merge ipv4 + ipv6 icmp reply handlers Similar to earlier patches: allow ipv4 and ipv6 to use the same handler. ipv4 and ipv6 specific actions can be done by checking state->pf. v2: split the pf == NFPROTO_IPV4 check (Julian Anastasov) Signed-off-by: Florian Westphal Acked-by: Julian Anastasov Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_core.c | 45 +++++++++++++-------------------- 1 file changed, 18 insertions(+), 27 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index ad5f5e50547f..e93c937a8bf0 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -2106,41 +2106,32 @@ static unsigned int ip_vs_forward_icmp(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - int r; struct netns_ipvs *ipvs = net_ipvs(state->net); - - if (ip_hdr(skb)->protocol != IPPROTO_ICMP) - return NF_ACCEPT; + int r; /* ipvs enabled in this netns ? */ if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable)) return NF_ACCEPT; + if (state->pf == NFPROTO_IPV4) { + if (ip_hdr(skb)->protocol != IPPROTO_ICMP) + return NF_ACCEPT; +#ifdef CONFIG_IP_VS_IPV6 + } else { + struct ip_vs_iphdr iphdr; + + ip_vs_fill_iph_skb(AF_INET6, skb, false, &iphdr); + + if (iphdr.protocol != IPPROTO_ICMPV6) + return NF_ACCEPT; + + return ip_vs_in_icmp_v6(ipvs, skb, &r, state->hook, &iphdr); +#endif + } + return ip_vs_in_icmp(ipvs, skb, &r, state->hook); } -#ifdef CONFIG_IP_VS_IPV6 -static unsigned int -ip_vs_forward_icmp_v6(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state) -{ - int r; - struct netns_ipvs *ipvs = net_ipvs(state->net); - struct ip_vs_iphdr iphdr; - - ip_vs_fill_iph_skb(AF_INET6, skb, false, &iphdr); - if (iphdr.protocol != IPPROTO_ICMPV6) - return NF_ACCEPT; - - /* ipvs enabled in this netns ? */ - if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable)) - return NF_ACCEPT; - - return ip_vs_in_icmp_v6(ipvs, skb, &r, state->hook, &iphdr); -} -#endif - - static const struct nf_hook_ops ip_vs_ops4[] = { /* After packet filtering, change source only for VS/NAT */ { @@ -2224,7 +2215,7 @@ static const struct nf_hook_ops ip_vs_ops6[] = { /* After packet filtering (but before ip_vs_out_icmp), catch icmp * destined for 0.0.0.0/0, which is for incoming IPVS connections */ { - .hook = ip_vs_forward_icmp_v6, + .hook = ip_vs_forward_icmp, .pf = NFPROTO_IPV6, .hooknum = NF_INET_FORWARD, .priority = 99, From 8b017fbe0bbb98dd71fb4850f6b9cc0e136a26b8 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 14 Oct 2021 11:00:37 +0200 Subject: [PATCH 185/440] net: of: fix stub of_net helpers for CONFIG_NET=n Moving the of_net code from drivers/of/ to net/core means we no longer stub out the helpers when networking is disabled, which leads to a randconfig build failure with at least one ARM platform that calls this from non-networking code: arm-linux-gnueabi-ld: arch/arm/mach-mvebu/kirkwood.o: in function `kirkwood_dt_eth_fixup': kirkwood.c:(.init.text+0x54): undefined reference to `of_get_mac_address' Restore the way this worked before by changing that #ifdef check back to testing for both CONFIG_OF and CONFIG_NET. Fixes: e330fb14590c ("of: net: move of_net under net/") Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20211014090055.2058949-1-arnd@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/of_net.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/of_net.h b/include/linux/of_net.h index 0797e2edb8c2..0484b613ca64 100644 --- a/include/linux/of_net.h +++ b/include/linux/of_net.h @@ -8,7 +8,7 @@ #include -#ifdef CONFIG_OF +#if defined(CONFIG_OF) && defined(CONFIG_NET) #include struct net_device; From 67ca5159dbe2edb5dae7544447b8677d2596933a Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Tue, 12 Oct 2021 12:34:02 +0200 Subject: [PATCH 186/440] net: phy: micrel: make *-skew-ps check more lenient It seems reasonable to fine-tune only some of the skew values when using one of the rgmii-*id PHY modes, and even when all skew values are specified, using the correct ID PHY mode makes sense for documentation purposes. Such a configuration also appears in the binding docs in Documentation/devicetree/bindings/net/micrel-ksz90x1.txt, so the driver should not warn about it. Signed-off-by: Matthias Schiffer Link: https://lore.kernel.org/r/20211012103402.21438-1-matthias.schiffer@ew.tq-group.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/micrel.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index b70f62efdbc3..ff452669130a 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -863,9 +863,9 @@ static int ksz9031_config_init(struct phy_device *phydev) MII_KSZ9031RN_TX_DATA_PAD_SKEW, 4, tx_data_skews, 4, &update); - if (update && phydev->interface != PHY_INTERFACE_MODE_RGMII) + if (update && !phy_interface_is_rgmii(phydev)) phydev_warn(phydev, - "*-skew-ps values should be used only with phy-mode = \"rgmii\"\n"); + "*-skew-ps values should be used only with RGMII PHY modes\n"); /* Silicon Errata Sheet (DS80000691D or DS80000692D): * When the device links in the 1000BASE-T slave mode only, From 4ece1ae440151a2e5af441d2c9b1d8eb3a700670 Mon Sep 17 00:00:00 2001 From: Yuiko Oshino Date: Tue, 12 Oct 2021 09:49:53 -0400 Subject: [PATCH 187/440] net: microchip: lan743x: add support for PTP pulse width (duty cycle) If the PTP_PEROUT_DUTY_CYCLE flag is set, then check if the request_on value in ptp_perout_request matches the pre-defined values or a toggle option. Return a failure if the value is not supported. Preserve the old behaviors if the PTP_PEROUT_DUTY_CYCLE flag is not set. Tested with an oscilloscope on EVB-LAN7430: e.g., to output PPS 1sec period 500mS on (high) to GPIO 2. ./testptp -L 2,2 ./testptp -p 1000000000 -w 500000000 Signed-off-by: Yuiko Oshino Link: https://lore.kernel.org/r/1634046593-64312-1-git-send-email-yuiko.oshino@microchip.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microchip/lan743x_main.h | 1 + drivers/net/ethernet/microchip/lan743x_ptp.c | 91 ++++++++++++++++--- 2 files changed, 81 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/microchip/lan743x_main.h b/drivers/net/ethernet/microchip/lan743x_main.h index 6080028c1df2..34c22eea0124 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.h +++ b/drivers/net/ethernet/microchip/lan743x_main.h @@ -279,6 +279,7 @@ #define PTP_GENERAL_CONFIG_CLOCK_EVENT_1MS_ (3) #define PTP_GENERAL_CONFIG_CLOCK_EVENT_10MS_ (4) #define PTP_GENERAL_CONFIG_CLOCK_EVENT_200MS_ (5) +#define PTP_GENERAL_CONFIG_CLOCK_EVENT_TOGGLE_ (6) #define PTP_GENERAL_CONFIG_CLOCK_EVENT_X_SET_(channel, value) \ (((value) & 0x7) << (1 + ((channel) << 2))) #define PTP_GENERAL_CONFIG_RELOAD_ADD_X_(channel) (BIT((channel) << 2)) diff --git a/drivers/net/ethernet/microchip/lan743x_ptp.c b/drivers/net/ethernet/microchip/lan743x_ptp.c index ab6d719d40f0..9380e396f648 100644 --- a/drivers/net/ethernet/microchip/lan743x_ptp.c +++ b/drivers/net/ethernet/microchip/lan743x_ptp.c @@ -491,9 +491,10 @@ static int lan743x_ptp_perout(struct lan743x_adapter *adapter, int on, int perout_pin = 0; unsigned int index = perout_request->index; struct lan743x_ptp_perout *perout = &ptp->perout[index]; + int ret = 0; /* Reject requests with unsupported flags */ - if (perout_request->flags) + if (perout_request->flags & ~PTP_PEROUT_DUTY_CYCLE) return -EOPNOTSUPP; if (on) { @@ -518,6 +519,7 @@ static int lan743x_ptp_perout(struct lan743x_adapter *adapter, int on, netif_warn(adapter, drv, adapter->netdev, "Failed to reserve event channel %d for PEROUT\n", index); + ret = -EBUSY; goto failed; } @@ -529,6 +531,7 @@ static int lan743x_ptp_perout(struct lan743x_adapter *adapter, int on, netif_warn(adapter, drv, adapter->netdev, "Failed to reserve gpio %d for PEROUT\n", perout_pin); + ret = -EBUSY; goto failed; } @@ -540,27 +543,93 @@ static int lan743x_ptp_perout(struct lan743x_adapter *adapter, int on, period_sec += perout_request->period.nsec / 1000000000; period_nsec = perout_request->period.nsec % 1000000000; - if (period_sec == 0) { - if (period_nsec >= 400000000) { + if (perout_request->flags & PTP_PEROUT_DUTY_CYCLE) { + struct timespec64 ts_on, ts_period; + s64 wf_high, period64, half; + s32 reminder; + + ts_on.tv_sec = perout_request->on.sec; + ts_on.tv_nsec = perout_request->on.nsec; + wf_high = timespec64_to_ns(&ts_on); + ts_period.tv_sec = perout_request->period.sec; + ts_period.tv_nsec = perout_request->period.nsec; + period64 = timespec64_to_ns(&ts_period); + + if (period64 < 200) { + netif_warn(adapter, drv, adapter->netdev, + "perout period too small, minimum is 200nS\n"); + ret = -EOPNOTSUPP; + goto failed; + } + if (wf_high >= period64) { + netif_warn(adapter, drv, adapter->netdev, + "pulse width must be smaller than period\n"); + ret = -EINVAL; + goto failed; + } + + /* Check if we can do 50% toggle on an even value of period. + * If the period number is odd, then check if the requested + * pulse width is the same as one of pre-defined width values. + * Otherwise, return failure. + */ + half = div_s64_rem(period64, 2, &reminder); + if (!reminder) { + if (half == wf_high) { + /* It's 50% match. Use the toggle option */ + pulse_width = PTP_GENERAL_CONFIG_CLOCK_EVENT_TOGGLE_; + /* In this case, devide period value by 2 */ + ts_period = ns_to_timespec64(div_s64(period64, 2)); + period_sec = ts_period.tv_sec; + period_nsec = ts_period.tv_nsec; + + goto program; + } + } + /* if we can't do toggle, then the width option needs to be the exact match */ + if (wf_high == 200000000) { pulse_width = PTP_GENERAL_CONFIG_CLOCK_EVENT_200MS_; - } else if (period_nsec >= 20000000) { + } else if (wf_high == 10000000) { pulse_width = PTP_GENERAL_CONFIG_CLOCK_EVENT_10MS_; - } else if (period_nsec >= 2000000) { + } else if (wf_high == 1000000) { pulse_width = PTP_GENERAL_CONFIG_CLOCK_EVENT_1MS_; - } else if (period_nsec >= 200000) { + } else if (wf_high == 100000) { pulse_width = PTP_GENERAL_CONFIG_CLOCK_EVENT_100US_; - } else if (period_nsec >= 20000) { + } else if (wf_high == 10000) { pulse_width = PTP_GENERAL_CONFIG_CLOCK_EVENT_10US_; - } else if (period_nsec >= 200) { + } else if (wf_high == 100) { pulse_width = PTP_GENERAL_CONFIG_CLOCK_EVENT_100NS_; } else { netif_warn(adapter, drv, adapter->netdev, - "perout period too small, minimum is 200nS\n"); + "duty cycle specified is not supported\n"); + ret = -EOPNOTSUPP; goto failed; } } else { - pulse_width = PTP_GENERAL_CONFIG_CLOCK_EVENT_200MS_; + if (period_sec == 0) { + if (period_nsec >= 400000000) { + pulse_width = PTP_GENERAL_CONFIG_CLOCK_EVENT_200MS_; + } else if (period_nsec >= 20000000) { + pulse_width = PTP_GENERAL_CONFIG_CLOCK_EVENT_10MS_; + } else if (period_nsec >= 2000000) { + pulse_width = PTP_GENERAL_CONFIG_CLOCK_EVENT_1MS_; + } else if (period_nsec >= 200000) { + pulse_width = PTP_GENERAL_CONFIG_CLOCK_EVENT_100US_; + } else if (period_nsec >= 20000) { + pulse_width = PTP_GENERAL_CONFIG_CLOCK_EVENT_10US_; + } else if (period_nsec >= 200) { + pulse_width = PTP_GENERAL_CONFIG_CLOCK_EVENT_100NS_; + } else { + netif_warn(adapter, drv, adapter->netdev, + "perout period too small, minimum is 200nS\n"); + ret = -EOPNOTSUPP; + goto failed; + } + } else { + pulse_width = PTP_GENERAL_CONFIG_CLOCK_EVENT_200MS_; + } } +program: /* turn off by setting target far in future */ lan743x_csr_write(adapter, @@ -599,7 +668,7 @@ static int lan743x_ptp_perout(struct lan743x_adapter *adapter, int on, failed: lan743x_ptp_perout_off(adapter, index); - return -ENODEV; + return ret; } static int lan743x_ptpci_enable(struct ptp_clock_info *ptpci, From 4dc08dcc9f6f58e881fd790d800e49abee8540e1 Mon Sep 17 00:00:00 2001 From: "Lay, Kuan Loon" Date: Wed, 13 Oct 2021 14:59:41 +0800 Subject: [PATCH 188/440] net: phy: dp83867: introduce critical chip default init for non-of platform PHY driver dp83867 has rich supports for OF-platform to fine-tune the PHY chip during phy configuration. However, for non-OF platform, certain PHY tunable parameters such as IO impedance and RX & TX internal delays are critical and should be initialized to its default during PHY driver probe. Tested-by: Clement Signed-off-by: Lay, Kuan Loon Co-developed-by: Ong Boon Leong Signed-off-by: Ong Boon Leong Tested-by: Kurt Kanzenbach Link: https://lore.kernel.org/r/20211013065941.2124858-1-boon.leong.ong@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/dp83867.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c index 6bbc81ad295f..914619f3f0e3 100644 --- a/drivers/net/phy/dp83867.c +++ b/drivers/net/phy/dp83867.c @@ -619,6 +619,25 @@ static int dp83867_of_init(struct phy_device *phydev) #else static int dp83867_of_init(struct phy_device *phydev) { + struct dp83867_private *dp83867 = phydev->priv; + u16 delay; + + /* For non-OF device, the RX and TX ID values are either strapped + * or take from default value. So, we init RX & TX ID values here + * so that the RGMIIDCTL is configured correctly later in + * dp83867_config_init(); + */ + delay = phy_read_mmd(phydev, DP83867_DEVADDR, DP83867_RGMIIDCTL); + dp83867->rx_id_delay = delay & DP83867_RGMII_RX_CLK_DELAY_MAX; + dp83867->tx_id_delay = (delay >> DP83867_RGMII_TX_CLK_DELAY_SHIFT) & + DP83867_RGMII_TX_CLK_DELAY_MAX; + + /* Per datasheet, IO impedance is default to 50-ohm, so we set the + * same here or else the default '0' means highest IO impedance + * which is wrong. + */ + dp83867->io_impedance = DP83867_IO_MUX_CFG_IO_IMPEDANCE_MIN / 2; + return 0; } #endif /* CONFIG_OF_MDIO */ From 20d446f24f378ee63a29b259b375691d25002448 Mon Sep 17 00:00:00 2001 From: Yuval Shaia Date: Wed, 13 Oct 2021 09:49:21 +0300 Subject: [PATCH 189/440] net: mvneta: Delete unused variable The variable pp is not in use - delete it. Signed-off-by: Yuval Shaia Link: https://lore.kernel.org/r/20211013064921.26346-1-yshaia@marvell.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/mvneta.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index e2ce84ecb7a6..98f276c617fb 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -1914,7 +1914,7 @@ static int mvneta_rx_refill(struct mvneta_port *pp, } /* Handle tx checksum */ -static u32 mvneta_skb_tx_csum(struct mvneta_port *pp, struct sk_buff *skb) +static u32 mvneta_skb_tx_csum(struct sk_buff *skb) { if (skb->ip_summed == CHECKSUM_PARTIAL) { int ip_hdr_len = 0; @@ -2595,8 +2595,7 @@ err_drop_frame: } static inline void -mvneta_tso_put_hdr(struct sk_buff *skb, - struct mvneta_port *pp, struct mvneta_tx_queue *txq) +mvneta_tso_put_hdr(struct sk_buff *skb, struct mvneta_tx_queue *txq) { int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb); struct mvneta_tx_buf *buf = &txq->buf[txq->txq_put_index]; @@ -2604,7 +2603,7 @@ mvneta_tso_put_hdr(struct sk_buff *skb, tx_desc = mvneta_txq_next_desc_get(txq); tx_desc->data_size = hdr_len; - tx_desc->command = mvneta_skb_tx_csum(pp, skb); + tx_desc->command = mvneta_skb_tx_csum(skb); tx_desc->command |= MVNETA_TXD_F_DESC; tx_desc->buf_phys_addr = txq->tso_hdrs_phys + txq->txq_put_index * TSO_HEADER_SIZE; @@ -2681,7 +2680,7 @@ static int mvneta_tx_tso(struct sk_buff *skb, struct net_device *dev, hdr = txq->tso_hdrs + txq->txq_put_index * TSO_HEADER_SIZE; tso_build_hdr(skb, hdr, &tso, data_left, total_len == 0); - mvneta_tso_put_hdr(skb, pp, txq); + mvneta_tso_put_hdr(skb, txq); while (data_left > 0) { int size; @@ -2799,7 +2798,7 @@ static netdev_tx_t mvneta_tx(struct sk_buff *skb, struct net_device *dev) /* Get a descriptor for the first part of the packet */ tx_desc = mvneta_txq_next_desc_get(txq); - tx_cmd = mvneta_skb_tx_csum(pp, skb); + tx_cmd = mvneta_skb_tx_csum(skb); tx_desc->data_size = skb_headlen(skb); From 507c2f1d2936e07aef83734983a36df01b458ef9 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 13 Oct 2021 15:21:38 +0200 Subject: [PATCH 190/440] net, neigh: Add build-time assertion to avoid neigh->flags overflow Currently, NDA_FLAGS_EXT flags allow a maximum of 24 bits to be used for extended neighbor flags. These are eventually fed into neigh->flags by shifting with NTF_EXT_SHIFT as per commit 2c611ad97a82 ("net, neigh: Extend neigh->flags to 32 bit to allow for extensions"). If really ever needed in future, the full 32 bits from NDA_FLAGS_EXT can be used, it would only require to move neigh->flags from u32 to u64 inside the kernel. Add a build-time assertion such that when extending the NTF_EXT_MASK with new bits, we'll trigger an error once we surpass the 24th bit. This assumes that no bit holes in new NTF_EXT_* flags will slip in from UAPI, but I think this is reasonable to assume. Suggested-by: David Ahern Signed-off-by: Daniel Borkmann Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- net/core/neighbour.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index eae73efa9245..4fc601f9cd06 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1940,6 +1940,9 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, NL_SET_ERR_MSG(extack, "Invalid extended flags"); goto out; } + BUILD_BUG_ON(sizeof(neigh->flags) * BITS_PER_BYTE < + (sizeof(ndm->ndm_flags) * BITS_PER_BYTE + + hweight32(NTF_EXT_MASK))); ndm_flags |= (ext << NTF_EXT_SHIFT); } if (ndm->ndm_ifindex) { From c8e80c1169b2b3b91453f9ff3c34687c42233ef7 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 13 Oct 2021 15:21:39 +0200 Subject: [PATCH 191/440] net, neigh: Use NLA_POLICY_MASK helper for NDA_FLAGS_EXT attribute Instead of open-coding a check for invalid bits in NTF_EXT_MASK, we can just use the NLA_POLICY_MASK() helper instead, and simplify NDA_FLAGS_EXT sanity check this way. Suggested-by: Jakub Kicinski Signed-off-by: Daniel Borkmann Signed-off-by: Jakub Kicinski --- net/core/neighbour.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 4fc601f9cd06..922b9ed0fe76 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1834,7 +1834,7 @@ const struct nla_policy nda_policy[NDA_MAX+1] = { [NDA_MASTER] = { .type = NLA_U32 }, [NDA_PROTOCOL] = { .type = NLA_U8 }, [NDA_NH_ID] = { .type = NLA_U32 }, - [NDA_FLAGS_EXT] = { .type = NLA_U32 }, + [NDA_FLAGS_EXT] = NLA_POLICY_MASK(NLA_U32, NTF_EXT_MASK), [NDA_FDB_EXT_ATTRS] = { .type = NLA_NESTED }, }; @@ -1936,10 +1936,6 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, if (tb[NDA_FLAGS_EXT]) { u32 ext = nla_get_u32(tb[NDA_FLAGS_EXT]); - if (ext & ~NTF_EXT_MASK) { - NL_SET_ERR_MSG(extack, "Invalid extended flags"); - goto out; - } BUILD_BUG_ON(sizeof(neigh->flags) * BITS_PER_BYTE < (sizeof(ndm->ndm_flags) * BITS_PER_BYTE + hweight32(NTF_EXT_MASK))); From 30fc7efa38f21afa48b0be6bf2053e4c10ae2c78 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 13 Oct 2021 15:21:40 +0200 Subject: [PATCH 192/440] net, neigh: Reject creating NUD_PERMANENT with NTF_MANAGED entries The combination of NUD_PERMANENT + NTF_MANAGED is not supported and does not make sense either given the former indicates a static/fixed neighbor entry whereas the latter a dynamically resolved one. While it is possible to transition from one over to the other, we should however reject such creation attempts. Fixes: 7482e3841d52 ("net, neigh: Add NTF_MANAGED flag for managed neighbor entries") Suggested-by: David Ahern Signed-off-by: Daniel Borkmann Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- net/core/neighbour.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 922b9ed0fe76..47931c8be04b 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1999,15 +1999,20 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, neigh = neigh_lookup(tbl, dst, dev); if (neigh == NULL) { - bool exempt_from_gc; + bool ndm_permanent = ndm->ndm_state & NUD_PERMANENT; + bool exempt_from_gc = ndm_permanent || + ndm_flags & NTF_EXT_LEARNED; if (!(nlh->nlmsg_flags & NLM_F_CREATE)) { err = -ENOENT; goto out; } + if (ndm_permanent && (ndm_flags & NTF_MANAGED)) { + NL_SET_ERR_MSG(extack, "Invalid NTF_* flag for permanent entry"); + err = -EINVAL; + goto out; + } - exempt_from_gc = ndm->ndm_state & NUD_PERMANENT || - ndm_flags & NTF_EXT_LEARNED; neigh = ___neigh_create(tbl, dst, dev, ndm_flags & (NTF_EXT_LEARNED | NTF_MANAGED), From 78e0a006914b9fc0dd714d68f0bb6e0f50c944f2 Mon Sep 17 00:00:00 2001 From: Jiasheng Jiang Date: Thu, 14 Oct 2021 01:26:26 +0000 Subject: [PATCH 193/440] hv_netvsc: Add comment of netvsc_xdp_xmit() Adding comment to avoid the misusing of netvsc_xdp_xmit(). Otherwise the value of skb->queue_mapping could be 0 and then the return value of skb_get_rx_queue() could be MAX_U16 cause by overflow. Signed-off-by: Jiasheng Jiang Reviewed-by: Haiyang Zhang Link: https://lore.kernel.org/r/1634174786-1810351-1-git-send-email-jiasheng@iscas.ac.cn Signed-off-by: Jakub Kicinski --- drivers/net/hyperv/netvsc_drv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 65850ea8a924..7e66ae1d2a59 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -803,6 +803,7 @@ void netvsc_linkstatus_callback(struct net_device *net, schedule_delayed_work(&ndev_ctx->dwork, 0); } +/* This function should only be called after skb_record_rx_queue() */ static void netvsc_xdp_xmit(struct sk_buff *skb, struct net_device *ndev) { int rc; From cd2621d07d517473611b170c69beb6524c677740 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 13 Oct 2021 22:05:00 -0700 Subject: [PATCH 194/440] net: fealnx: fix build for UML MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On i386, when builtin (not a loadable module), the fealnx driver inspects boot_cpu_data to see what CPU family it is running on, and then acts on that data. The "family" struct member (x86) does not exist when running on UML, so prevent that test and do the default action. Prevents this build error on UML + i386: ../drivers/net/ethernet/fealnx.c: In function ‘netdev_open’: ../drivers/net/ethernet/fealnx.c:861:19: error: ‘struct cpuinfo_um’ has no member named ‘x86’ Fixes: 68f5d3f3b654 ("um: add PCI over virtio emulation driver") Signed-off-by: Randy Dunlap Cc: linux-um@lists.infradead.org Cc: Jeff Dike Cc: Richard Weinberger Cc: Anton Ivanov Link: https://lore.kernel.org/r/20211014050500.5620-1-rdunlap@infradead.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/fealnx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/fealnx.c b/drivers/net/ethernet/fealnx.c index 63c935e7b625..ab194c9b0691 100644 --- a/drivers/net/ethernet/fealnx.c +++ b/drivers/net/ethernet/fealnx.c @@ -857,7 +857,7 @@ static int netdev_open(struct net_device *dev) np->bcrvalue |= 0x04; /* big-endian */ #endif -#if defined(__i386__) && !defined(MODULE) +#if defined(__i386__) && !defined(MODULE) && !defined(CONFIG_UML) if (boot_cpu_data.x86 <= 4) np->crvalue = 0xa00; else From 523994ba3ad1b7b55abe4a72e156897b5e2db825 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 13 Oct 2021 22:05:16 -0700 Subject: [PATCH 195/440] net: intel: igc_ptp: fix build for UML MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On a UML build, the igc_ptp driver uses CONFIG_X86_TSC for timestamp conversion. The function that is used is not available on UML builds, so have the function use the default system_counterval_t timestamp instead for UML builds. Prevents this build error on UML: ../drivers/net/ethernet/intel/igc/igc_ptp.c: In function ‘igc_device_tstamp_to_system’: ../drivers/net/ethernet/intel/igc/igc_ptp.c:777:9: error: implicit declaration of function ‘convert_art_ns_to_tsc’ [-Werror=implicit-function-declaration] return convert_art_ns_to_tsc(tstamp); ../drivers/net/ethernet/intel/igc/igc_ptp.c:777:9: error: incompatible types when returning type ‘int’ but ‘struct system_counterval_t’ was expected return convert_art_ns_to_tsc(tstamp); Fixes: 68f5d3f3b654 ("um: add PCI over virtio emulation driver") Signed-off-by: Randy Dunlap Cc: linux-um@lists.infradead.org Cc: Jeff Dike Cc: Richard Weinberger Cc: Anton Ivanov Cc: Jesse Brandeburg Cc: Tony Nguyen Cc: intel-wired-lan@lists.osuosl.org Link: https://lore.kernel.org/r/20211014050516.6846-1-rdunlap@infradead.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/igc/igc_ptp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c index 0f021909b430..30568e3544cd 100644 --- a/drivers/net/ethernet/intel/igc/igc_ptp.c +++ b/drivers/net/ethernet/intel/igc/igc_ptp.c @@ -773,7 +773,7 @@ static bool igc_is_crosststamp_supported(struct igc_adapter *adapter) static struct system_counterval_t igc_device_tstamp_to_system(u64 tstamp) { -#if IS_ENABLED(CONFIG_X86_TSC) +#if IS_ENABLED(CONFIG_X86_TSC) && !defined(CONFIG_UML) return convert_art_ns_to_tsc(tstamp); #else return (struct system_counterval_t) { }; From a3d708925fcca1a2f7219bc9ce93e6341f85c1e0 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 13 Oct 2021 22:06:06 -0700 Subject: [PATCH 196/440] net: tulip: winbond-840: fix build for UML MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On i386, when builtin (not a loadable module), the winbond-840 driver inspects boot_cpu_data to see what CPU family it is running on, and then acts on that data. The "family" struct member (x86) does not exist when running on UML, so prevent that test and do the default action. Prevents this build error on UML + i386: ../drivers/net/ethernet/dec/tulip/winbond-840.c: In function ‘init_registers’: ../drivers/net/ethernet/dec/tulip/winbond-840.c:882:19: error: ‘struct cpuinfo_um’ has no member named ‘x86’ if (boot_cpu_data.x86 <= 4) { Fixes: 68f5d3f3b654 ("um: add PCI over virtio emulation driver") Signed-off-by: Randy Dunlap Cc: linux-um@lists.infradead.org Cc: Jeff Dike Cc: Richard Weinberger Cc: Anton Ivanov Link: https://lore.kernel.org/r/20211014050606.7288-1-rdunlap@infradead.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/dec/tulip/winbond-840.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/dec/tulip/winbond-840.c b/drivers/net/ethernet/dec/tulip/winbond-840.c index c4217ca5d709..86b1d23eba83 100644 --- a/drivers/net/ethernet/dec/tulip/winbond-840.c +++ b/drivers/net/ethernet/dec/tulip/winbond-840.c @@ -879,7 +879,7 @@ static void init_registers(struct net_device *dev) 8000 16 longwords 0200 2 longwords 2000 32 longwords C000 32 longwords 0400 4 longwords */ -#if defined (__i386__) && !defined(MODULE) +#if defined (__i386__) && !defined(MODULE) && !defined(CONFIG_UML) /* When not a module we can work around broken '486 PCI boards. */ if (boot_cpu_data.x86 <= 4) { i |= 0x4800; From 4826090719d4bb525b101f1916c68c3b5594910c Mon Sep 17 00:00:00 2001 From: Srujana Challa Date: Wed, 13 Oct 2021 11:26:19 +0530 Subject: [PATCH 197/440] octeontx2-af: Enable CPT HW interrupts This patch enables and registers interrupt handler for CPT HW interrupts. Signed-off-by: Srujana Challa Reported-by: kernel test robot Signed-off-by: Jakub Kicinski --- .../net/ethernet/marvell/octeontx2/af/rvu.c | 13 + .../net/ethernet/marvell/octeontx2/af/rvu.h | 3 + .../ethernet/marvell/octeontx2/af/rvu_cpt.c | 230 ++++++++++++++++++ .../marvell/octeontx2/af/rvu_struct.h | 18 ++ 4 files changed, 264 insertions(+) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c index 0a1e9f6e216a..7698a67f6465 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c @@ -854,6 +854,7 @@ static int rvu_setup_nix_hw_resource(struct rvu *rvu, int blkaddr) block->lfcfg_reg = NIX_PRIV_LFX_CFG; block->msixcfg_reg = NIX_PRIV_LFX_INT_CFG; block->lfreset_reg = NIX_AF_LF_RST; + block->rvu = rvu; sprintf(block->name, "NIX%d", blkid); rvu->nix_blkaddr[blkid] = blkaddr; return rvu_alloc_bitmap(&block->lf); @@ -883,6 +884,7 @@ static int rvu_setup_cpt_hw_resource(struct rvu *rvu, int blkaddr) block->lfcfg_reg = CPT_PRIV_LFX_CFG; block->msixcfg_reg = CPT_PRIV_LFX_INT_CFG; block->lfreset_reg = CPT_AF_LF_RST; + block->rvu = rvu; sprintf(block->name, "CPT%d", blkid); return rvu_alloc_bitmap(&block->lf); } @@ -940,6 +942,7 @@ static int rvu_setup_hw_resources(struct rvu *rvu) block->lfcfg_reg = NPA_PRIV_LFX_CFG; block->msixcfg_reg = NPA_PRIV_LFX_INT_CFG; block->lfreset_reg = NPA_AF_LF_RST; + block->rvu = rvu; sprintf(block->name, "NPA"); err = rvu_alloc_bitmap(&block->lf); if (err) { @@ -979,6 +982,7 @@ nix: block->lfcfg_reg = SSO_PRIV_LFX_HWGRP_CFG; block->msixcfg_reg = SSO_PRIV_LFX_HWGRP_INT_CFG; block->lfreset_reg = SSO_AF_LF_HWGRP_RST; + block->rvu = rvu; sprintf(block->name, "SSO GROUP"); err = rvu_alloc_bitmap(&block->lf); if (err) { @@ -1003,6 +1007,7 @@ ssow: block->lfcfg_reg = SSOW_PRIV_LFX_HWS_CFG; block->msixcfg_reg = SSOW_PRIV_LFX_HWS_INT_CFG; block->lfreset_reg = SSOW_AF_LF_HWS_RST; + block->rvu = rvu; sprintf(block->name, "SSOWS"); err = rvu_alloc_bitmap(&block->lf); if (err) { @@ -1028,6 +1033,7 @@ tim: block->lfcfg_reg = TIM_PRIV_LFX_CFG; block->msixcfg_reg = TIM_PRIV_LFX_INT_CFG; block->lfreset_reg = TIM_AF_LF_RST; + block->rvu = rvu; sprintf(block->name, "TIM"); err = rvu_alloc_bitmap(&block->lf); if (err) { @@ -2724,6 +2730,8 @@ static void rvu_unregister_interrupts(struct rvu *rvu) { int irq; + rvu_cpt_unregister_interrupts(rvu); + /* Disable the Mbox interrupt */ rvu_write64(rvu, BLKADDR_RVUM, RVU_AF_PFAF_MBOX_INT_ENA_W1C, INTR_MASK(rvu->hw->total_pfs) & ~1ULL); @@ -2933,6 +2941,11 @@ static int rvu_register_interrupts(struct rvu *rvu) goto fail; } rvu->irq_allocated[offset] = true; + + ret = rvu_cpt_register_interrupts(rvu); + if (ret) + goto fail; + return 0; fail: diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index 58b166698fa5..cdbd2846127d 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -101,6 +101,7 @@ struct rvu_block { u64 msixcfg_reg; u64 lfreset_reg; unsigned char name[NAME_SIZE]; + struct rvu *rvu; }; struct nix_mcast { @@ -812,6 +813,8 @@ bool is_mcam_entry_enabled(struct rvu *rvu, struct npc_mcam *mcam, int blkaddr, int index); /* CPT APIs */ +int rvu_cpt_register_interrupts(struct rvu *rvu); +void rvu_cpt_unregister_interrupts(struct rvu *rvu); int rvu_cpt_lf_teardown(struct rvu *rvu, u16 pcifunc, int lf, int slot); /* CN10K RVU */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c index 267d092b8e97..259131435edc 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c @@ -37,6 +37,236 @@ (_rsp)->free_sts_##etype = free_sts; \ }) +static irqreturn_t rvu_cpt_af_flt_intr_handler(int irq, void *ptr) +{ + struct rvu_block *block = ptr; + struct rvu *rvu = block->rvu; + int blkaddr = block->addr; + u64 reg0, reg1, reg2; + + reg0 = rvu_read64(rvu, blkaddr, CPT_AF_FLTX_INT(0)); + reg1 = rvu_read64(rvu, blkaddr, CPT_AF_FLTX_INT(1)); + if (!is_rvu_otx2(rvu)) { + reg2 = rvu_read64(rvu, blkaddr, CPT_AF_FLTX_INT(2)); + dev_err_ratelimited(rvu->dev, + "Received CPTAF FLT irq : 0x%llx, 0x%llx, 0x%llx", + reg0, reg1, reg2); + } else { + dev_err_ratelimited(rvu->dev, + "Received CPTAF FLT irq : 0x%llx, 0x%llx", + reg0, reg1); + } + + rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT(0), reg0); + rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT(1), reg1); + if (!is_rvu_otx2(rvu)) + rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT(2), reg2); + + return IRQ_HANDLED; +} + +static irqreturn_t rvu_cpt_af_rvu_intr_handler(int irq, void *ptr) +{ + struct rvu_block *block = ptr; + struct rvu *rvu = block->rvu; + int blkaddr = block->addr; + u64 reg; + + reg = rvu_read64(rvu, blkaddr, CPT_AF_RVU_INT); + dev_err_ratelimited(rvu->dev, "Received CPTAF RVU irq : 0x%llx", reg); + + rvu_write64(rvu, blkaddr, CPT_AF_RVU_INT, reg); + return IRQ_HANDLED; +} + +static irqreturn_t rvu_cpt_af_ras_intr_handler(int irq, void *ptr) +{ + struct rvu_block *block = ptr; + struct rvu *rvu = block->rvu; + int blkaddr = block->addr; + u64 reg; + + reg = rvu_read64(rvu, blkaddr, CPT_AF_RAS_INT); + dev_err_ratelimited(rvu->dev, "Received CPTAF RAS irq : 0x%llx", reg); + + rvu_write64(rvu, blkaddr, CPT_AF_RAS_INT, reg); + return IRQ_HANDLED; +} + +static int rvu_cpt_do_register_interrupt(struct rvu_block *block, int irq_offs, + irq_handler_t handler, + const char *name) +{ + struct rvu *rvu = block->rvu; + int ret; + + ret = request_irq(pci_irq_vector(rvu->pdev, irq_offs), handler, 0, + name, block); + if (ret) { + dev_err(rvu->dev, "RVUAF: %s irq registration failed", name); + return ret; + } + + WARN_ON(rvu->irq_allocated[irq_offs]); + rvu->irq_allocated[irq_offs] = true; + return 0; +} + +static void cpt_10k_unregister_interrupts(struct rvu_block *block, int off) +{ + struct rvu *rvu = block->rvu; + int blkaddr = block->addr; + int i; + + /* Disable all CPT AF interrupts */ + for (i = 0; i < CPT_10K_AF_INT_VEC_RVU; i++) + rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT_ENA_W1C(i), 0x1); + rvu_write64(rvu, blkaddr, CPT_AF_RVU_INT_ENA_W1C, 0x1); + rvu_write64(rvu, blkaddr, CPT_AF_RAS_INT_ENA_W1C, 0x1); + + for (i = 0; i < CPT_10K_AF_INT_VEC_CNT; i++) + if (rvu->irq_allocated[off + i]) { + free_irq(pci_irq_vector(rvu->pdev, off + i), block); + rvu->irq_allocated[off + i] = false; + } +} + +static void cpt_unregister_interrupts(struct rvu *rvu, int blkaddr) +{ + struct rvu_hwinfo *hw = rvu->hw; + struct rvu_block *block; + int i, offs; + + if (!is_block_implemented(rvu->hw, blkaddr)) + return; + offs = rvu_read64(rvu, blkaddr, CPT_PRIV_AF_INT_CFG) & 0x7FF; + if (!offs) { + dev_warn(rvu->dev, + "Failed to get CPT_AF_INT vector offsets\n"); + return; + } + block = &hw->block[blkaddr]; + if (!is_rvu_otx2(rvu)) + return cpt_10k_unregister_interrupts(block, offs); + + /* Disable all CPT AF interrupts */ + for (i = 0; i < CPT_AF_INT_VEC_RVU; i++) + rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT_ENA_W1C(i), 0x1); + rvu_write64(rvu, blkaddr, CPT_AF_RVU_INT_ENA_W1C, 0x1); + rvu_write64(rvu, blkaddr, CPT_AF_RAS_INT_ENA_W1C, 0x1); + + for (i = 0; i < CPT_AF_INT_VEC_CNT; i++) + if (rvu->irq_allocated[offs + i]) { + free_irq(pci_irq_vector(rvu->pdev, offs + i), block); + rvu->irq_allocated[offs + i] = false; + } +} + +void rvu_cpt_unregister_interrupts(struct rvu *rvu) +{ + cpt_unregister_interrupts(rvu, BLKADDR_CPT0); + cpt_unregister_interrupts(rvu, BLKADDR_CPT1); +} + +static int cpt_10k_register_interrupts(struct rvu_block *block, int off) +{ + struct rvu *rvu = block->rvu; + int blkaddr = block->addr; + char irq_name[16]; + int i, ret; + + for (i = CPT_10K_AF_INT_VEC_FLT0; i < CPT_10K_AF_INT_VEC_RVU; i++) { + snprintf(irq_name, sizeof(irq_name), "CPTAF FLT%d", i); + ret = rvu_cpt_do_register_interrupt(block, off + i, + rvu_cpt_af_flt_intr_handler, + irq_name); + if (ret) + goto err; + rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT_ENA_W1S(i), 0x1); + } + + ret = rvu_cpt_do_register_interrupt(block, off + CPT_10K_AF_INT_VEC_RVU, + rvu_cpt_af_rvu_intr_handler, + "CPTAF RVU"); + if (ret) + goto err; + rvu_write64(rvu, blkaddr, CPT_AF_RVU_INT_ENA_W1S, 0x1); + + ret = rvu_cpt_do_register_interrupt(block, off + CPT_10K_AF_INT_VEC_RAS, + rvu_cpt_af_ras_intr_handler, + "CPTAF RAS"); + if (ret) + goto err; + rvu_write64(rvu, blkaddr, CPT_AF_RAS_INT_ENA_W1S, 0x1); + + return 0; +err: + rvu_cpt_unregister_interrupts(rvu); + return ret; +} + +static int cpt_register_interrupts(struct rvu *rvu, int blkaddr) +{ + struct rvu_hwinfo *hw = rvu->hw; + struct rvu_block *block; + int i, offs, ret = 0; + char irq_name[16]; + + if (!is_block_implemented(rvu->hw, blkaddr)) + return 0; + + block = &hw->block[blkaddr]; + offs = rvu_read64(rvu, blkaddr, CPT_PRIV_AF_INT_CFG) & 0x7FF; + if (!offs) { + dev_warn(rvu->dev, + "Failed to get CPT_AF_INT vector offsets\n"); + return 0; + } + + if (!is_rvu_otx2(rvu)) + return cpt_10k_register_interrupts(block, offs); + + for (i = CPT_AF_INT_VEC_FLT0; i < CPT_AF_INT_VEC_RVU; i++) { + snprintf(irq_name, sizeof(irq_name), "CPTAF FLT%d", i); + ret = rvu_cpt_do_register_interrupt(block, offs + i, + rvu_cpt_af_flt_intr_handler, + irq_name); + if (ret) + goto err; + rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT_ENA_W1S(i), 0x1); + } + + ret = rvu_cpt_do_register_interrupt(block, offs + CPT_AF_INT_VEC_RVU, + rvu_cpt_af_rvu_intr_handler, + "CPTAF RVU"); + if (ret) + goto err; + rvu_write64(rvu, blkaddr, CPT_AF_RVU_INT_ENA_W1S, 0x1); + + ret = rvu_cpt_do_register_interrupt(block, offs + CPT_AF_INT_VEC_RAS, + rvu_cpt_af_ras_intr_handler, + "CPTAF RAS"); + if (ret) + goto err; + rvu_write64(rvu, blkaddr, CPT_AF_RAS_INT_ENA_W1S, 0x1); + + return 0; +err: + rvu_cpt_unregister_interrupts(rvu); + return ret; +} + +int rvu_cpt_register_interrupts(struct rvu *rvu) +{ + int ret; + + ret = cpt_register_interrupts(rvu, BLKADDR_CPT0); + if (ret) + return ret; + + return cpt_register_interrupts(rvu, BLKADDR_CPT1); +} + static int get_cpt_pf_num(struct rvu *rvu) { int i, domain_nr, cpt_pf_num = -1; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h index 77ac96693f04..edc9367b1b95 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h @@ -62,6 +62,24 @@ enum rvu_af_int_vec_e { RVU_AF_INT_VEC_CNT = 0x5, }; +/* CPT Admin function Interrupt Vector Enumeration */ +enum cpt_af_int_vec_e { + CPT_AF_INT_VEC_FLT0 = 0x0, + CPT_AF_INT_VEC_FLT1 = 0x1, + CPT_AF_INT_VEC_RVU = 0x2, + CPT_AF_INT_VEC_RAS = 0x3, + CPT_AF_INT_VEC_CNT = 0x4, +}; + +enum cpt_10k_af_int_vec_e { + CPT_10K_AF_INT_VEC_FLT0 = 0x0, + CPT_10K_AF_INT_VEC_FLT1 = 0x1, + CPT_10K_AF_INT_VEC_FLT2 = 0x2, + CPT_10K_AF_INT_VEC_RVU = 0x3, + CPT_10K_AF_INT_VEC_RAS = 0x4, + CPT_10K_AF_INT_VEC_CNT = 0x5, +}; + /* NPA Admin function Interrupt Vector Enumeration */ enum npa_af_int_vec_e { NPA_AF_INT_VEC_RVU = 0x0, From 7054d39ccf7e862ebb569c73ab251f1968d1b640 Mon Sep 17 00:00:00 2001 From: Nithin Dabilpuram Date: Wed, 13 Oct 2021 11:26:20 +0530 Subject: [PATCH 198/440] octeontx2-af: Perform cpt lf teardown in non FLR path Perform CPT LF teardown in non FLR path as well via cpt_lf_free() Currently CPT LF teardown and reset sequence is only done when FLR is handled with CPT LF still attached. This patch also fixes cpt_lf_alloc() to set EXEC_LDWB in CPT_AF_LFX_CTL2 when being completely overwritten as that is the default value and is better for performance. Signed-off-by: Nithin Dabilpuram Signed-off-by: Jakub Kicinski --- .../net/ethernet/marvell/octeontx2/af/rvu.c | 3 +- .../net/ethernet/marvell/octeontx2/af/rvu.h | 3 +- .../ethernet/marvell/octeontx2/af/rvu_cpt.c | 32 +++++++++++-------- 3 files changed, 22 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c index 7698a67f6465..cb56e171ddd4 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c @@ -2532,7 +2532,8 @@ static void rvu_blklf_teardown(struct rvu *rvu, u16 pcifunc, u8 blkaddr) rvu_npa_lf_teardown(rvu, pcifunc, lf); else if ((block->addr == BLKADDR_CPT0) || (block->addr == BLKADDR_CPT1)) - rvu_cpt_lf_teardown(rvu, pcifunc, lf, slot); + rvu_cpt_lf_teardown(rvu, pcifunc, block->addr, lf, + slot); err = rvu_lf_reset(rvu, block, lf); if (err) { diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index cdbd2846127d..75aa0b8cfe58 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -815,7 +815,8 @@ bool is_mcam_entry_enabled(struct rvu *rvu, struct npc_mcam *mcam, int blkaddr, /* CPT APIs */ int rvu_cpt_register_interrupts(struct rvu *rvu); void rvu_cpt_unregister_interrupts(struct rvu *rvu); -int rvu_cpt_lf_teardown(struct rvu *rvu, u16 pcifunc, int lf, int slot); +int rvu_cpt_lf_teardown(struct rvu *rvu, u16 pcifunc, int blkaddr, int lf, + int slot); /* CN10K RVU */ int rvu_set_channels_base(struct rvu *rvu); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c index 259131435edc..7bab20403bba 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c @@ -377,9 +377,13 @@ int rvu_mbox_handler_cpt_lf_alloc(struct rvu *rvu, rvu_write64(rvu, blkaddr, CPT_AF_LFX_CTL(cptlf), val); - /* Set CPT LF NIX_PF_FUNC and SSO_PF_FUNC */ - val = (u64)req->nix_pf_func << 48 | - (u64)req->sso_pf_func << 32; + /* Set CPT LF NIX_PF_FUNC and SSO_PF_FUNC. EXE_LDWB is set + * on reset. + */ + val = rvu_read64(rvu, blkaddr, CPT_AF_LFX_CTL2(cptlf)); + val &= ~(GENMASK_ULL(63, 48) | GENMASK_ULL(47, 32)); + val |= ((u64)req->nix_pf_func << 48 | + (u64)req->sso_pf_func << 32); rvu_write64(rvu, blkaddr, CPT_AF_LFX_CTL2(cptlf), val); } @@ -389,7 +393,7 @@ int rvu_mbox_handler_cpt_lf_alloc(struct rvu *rvu, static int cpt_lf_free(struct rvu *rvu, struct msg_req *req, int blkaddr) { u16 pcifunc = req->hdr.pcifunc; - int num_lfs, cptlf, slot; + int num_lfs, cptlf, slot, err; struct rvu_block *block; block = &rvu->hw->block[blkaddr]; @@ -403,10 +407,15 @@ static int cpt_lf_free(struct rvu *rvu, struct msg_req *req, int blkaddr) if (cptlf < 0) return CPT_AF_ERR_LF_INVALID; - /* Reset CPT LF group and priority */ - rvu_write64(rvu, blkaddr, CPT_AF_LFX_CTL(cptlf), 0x0); - /* Reset CPT LF NIX_PF_FUNC and SSO_PF_FUNC */ - rvu_write64(rvu, blkaddr, CPT_AF_LFX_CTL2(cptlf), 0x0); + /* Perform teardown */ + rvu_cpt_lf_teardown(rvu, pcifunc, blkaddr, cptlf, slot); + + /* Reset LF */ + err = rvu_lf_reset(rvu, block, cptlf); + if (err) { + dev_err(rvu->dev, "Failed to reset blkaddr %d LF%d\n", + block->addr, cptlf); + } } return 0; @@ -850,15 +859,10 @@ static void cpt_lf_disable_iqueue(struct rvu *rvu, int blkaddr, int slot) dev_warn(rvu->dev, "CPT FLR hits hard loop counter\n"); } -int rvu_cpt_lf_teardown(struct rvu *rvu, u16 pcifunc, int lf, int slot) +int rvu_cpt_lf_teardown(struct rvu *rvu, u16 pcifunc, int blkaddr, int lf, int slot) { - int blkaddr; u64 reg; - blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_CPT, pcifunc); - if (blkaddr != BLKADDR_CPT0 && blkaddr != BLKADDR_CPT1) - return -EINVAL; - /* Enable BAR2 ALIAS for this pcifunc. */ reg = BIT_ULL(16) | pcifunc; rvu_write64(rvu, blkaddr, CPT_AF_BAR2_SEL, reg); From 149f3b73cb66b231bf818d94c378b906c5d652c4 Mon Sep 17 00:00:00 2001 From: Srujana Challa Date: Wed, 13 Oct 2021 11:26:21 +0530 Subject: [PATCH 199/440] octeontx2-af: Add support to flush full CPT CTX cache Adds support to flush or invalidate CPT CTX entries as part of FLR and also provides a mailbox to flush CPT CTX entries in case of graceful exit. This patch also adds support for AF -> CPT PF uplink mailbox messages and adds a new mbox message to submit a CPT instruction from AF. Signed-off-by: Srujana Challa Signed-off-by: Jakub Kicinski --- .../net/ethernet/marvell/octeontx2/af/mbox.h | 12 + .../net/ethernet/marvell/octeontx2/af/rvu.h | 1 + .../ethernet/marvell/octeontx2/af/rvu_cpt.c | 206 ++++++++++++++++++ .../ethernet/marvell/octeontx2/af/rvu_nix.c | 11 + .../ethernet/marvell/octeontx2/af/rvu_reg.h | 2 + 5 files changed, 232 insertions(+) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h index dfe487235007..4e79e918a161 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h @@ -191,6 +191,7 @@ M(CPT_INLINE_IPSEC_CFG, 0xA04, cpt_inline_ipsec_cfg, \ M(CPT_STATS, 0xA05, cpt_sts, cpt_sts_req, cpt_sts_rsp) \ M(CPT_RXC_TIME_CFG, 0xA06, cpt_rxc_time_cfg, cpt_rxc_time_cfg_req, \ msg_rsp) \ +M(CPT_CTX_CACHE_SYNC, 0xA07, cpt_ctx_cache_sync, msg_req, msg_rsp) \ /* SDP mbox IDs (range 0x1000 - 0x11FF) */ \ M(SET_SDP_CHAN_INFO, 0x1000, set_sdp_chan_info, sdp_chan_info_msg, msg_rsp) \ M(GET_SDP_CHAN_INFO, 0x1001, get_sdp_chan_info, msg_req, sdp_get_chan_info_msg) \ @@ -292,10 +293,14 @@ M(NIX_BANDPROF_GET_HWINFO, 0x801f, nix_bandprof_get_hwinfo, msg_req, \ #define MBOX_UP_CGX_MESSAGES \ M(CGX_LINK_EVENT, 0xC00, cgx_link_event, cgx_link_info_msg, msg_rsp) +#define MBOX_UP_CPT_MESSAGES \ +M(CPT_INST_LMTST, 0xD00, cpt_inst_lmtst, cpt_inst_lmtst_req, msg_rsp) + enum { #define M(_name, _id, _1, _2, _3) MBOX_MSG_ ## _name = _id, MBOX_MESSAGES MBOX_UP_CGX_MESSAGES +MBOX_UP_CPT_MESSAGES #undef M }; @@ -1562,6 +1567,13 @@ struct cpt_rxc_time_cfg_req { u16 active_limit; }; +/* Mailbox message request format to request for CPT_INST_S lmtst. */ +struct cpt_inst_lmtst_req { + struct mbox_msghdr hdr; + u64 inst[8]; + u64 rsvd; +}; + struct sdp_node_info { /* Node to which this PF belons to */ u8 node_id; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index 75aa0b8cfe58..66e45d733824 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -817,6 +817,7 @@ int rvu_cpt_register_interrupts(struct rvu *rvu); void rvu_cpt_unregister_interrupts(struct rvu *rvu); int rvu_cpt_lf_teardown(struct rvu *rvu, u16 pcifunc, int blkaddr, int lf, int slot); +int rvu_cpt_ctx_flush(struct rvu *rvu, u16 pcifunc); /* CN10K RVU */ int rvu_set_channels_base(struct rvu *rvu); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c index 7bab20403bba..45357deecabb 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c @@ -795,6 +795,58 @@ int rvu_mbox_handler_cpt_rxc_time_cfg(struct rvu *rvu, return 0; } +int rvu_mbox_handler_cpt_ctx_cache_sync(struct rvu *rvu, struct msg_req *req, + struct msg_rsp *rsp) +{ + return rvu_cpt_ctx_flush(rvu, req->hdr.pcifunc); +} + +static void cpt_rxc_teardown(struct rvu *rvu, int blkaddr) +{ + struct cpt_rxc_time_cfg_req req; + int timeout = 2000; + u64 reg; + + if (is_rvu_otx2(rvu)) + return; + + /* Set time limit to minimum values, so that rxc entries will be + * flushed out quickly. + */ + req.step = 1; + req.zombie_thres = 1; + req.zombie_limit = 1; + req.active_thres = 1; + req.active_limit = 1; + + cpt_rxc_time_cfg(rvu, &req, blkaddr); + + do { + reg = rvu_read64(rvu, blkaddr, CPT_AF_RXC_ACTIVE_STS); + udelay(1); + if (FIELD_GET(RXC_ACTIVE_COUNT, reg)) + timeout--; + else + break; + } while (timeout); + + if (timeout == 0) + dev_warn(rvu->dev, "Poll for RXC active count hits hard loop counter\n"); + + timeout = 2000; + do { + reg = rvu_read64(rvu, blkaddr, CPT_AF_RXC_ZOMBIE_STS); + udelay(1); + if (FIELD_GET(RXC_ZOMBIE_COUNT, reg)) + timeout--; + else + break; + } while (timeout); + + if (timeout == 0) + dev_warn(rvu->dev, "Poll for RXC zombie count hits hard loop counter\n"); +} + #define INPROG_INFLIGHT(reg) ((reg) & 0x1FF) #define INPROG_GRB_PARTIAL(reg) ((reg) & BIT_ULL(31)) #define INPROG_GRB(reg) (((reg) >> 32) & 0xFF) @@ -863,6 +915,9 @@ int rvu_cpt_lf_teardown(struct rvu *rvu, u16 pcifunc, int blkaddr, int lf, int s { u64 reg; + if (is_cpt_pf(rvu, pcifunc) || is_cpt_vf(rvu, pcifunc)) + cpt_rxc_teardown(rvu, blkaddr); + /* Enable BAR2 ALIAS for this pcifunc. */ reg = BIT_ULL(16) | pcifunc; rvu_write64(rvu, blkaddr, CPT_AF_BAR2_SEL, reg); @@ -878,3 +933,154 @@ int rvu_cpt_lf_teardown(struct rvu *rvu, u16 pcifunc, int blkaddr, int lf, int s return 0; } + +#define CPT_RES_LEN 16 +#define CPT_SE_IE_EGRP 1ULL + +static int cpt_inline_inb_lf_cmd_send(struct rvu *rvu, int blkaddr, + int nix_blkaddr) +{ + int cpt_pf_num = get_cpt_pf_num(rvu); + struct cpt_inst_lmtst_req *req; + dma_addr_t res_daddr; + int timeout = 3000; + u8 cpt_idx; + u64 *inst; + u16 *res; + int rc; + + res = kzalloc(CPT_RES_LEN, GFP_KERNEL); + if (!res) + return -ENOMEM; + + res_daddr = dma_map_single(rvu->dev, res, CPT_RES_LEN, + DMA_BIDIRECTIONAL); + if (dma_mapping_error(rvu->dev, res_daddr)) { + dev_err(rvu->dev, "DMA mapping failed for CPT result\n"); + rc = -EFAULT; + goto res_free; + } + *res = 0xFFFF; + + /* Send mbox message to CPT PF */ + req = (struct cpt_inst_lmtst_req *) + otx2_mbox_alloc_msg_rsp(&rvu->afpf_wq_info.mbox_up, + cpt_pf_num, sizeof(*req), + sizeof(struct msg_rsp)); + if (!req) { + rc = -ENOMEM; + goto res_daddr_unmap; + } + req->hdr.sig = OTX2_MBOX_REQ_SIG; + req->hdr.id = MBOX_MSG_CPT_INST_LMTST; + + inst = req->inst; + /* Prepare CPT_INST_S */ + inst[0] = 0; + inst[1] = res_daddr; + /* AF PF FUNC */ + inst[2] = 0; + /* Set QORD */ + inst[3] = 1; + inst[4] = 0; + inst[5] = 0; + inst[6] = 0; + /* Set EGRP */ + inst[7] = CPT_SE_IE_EGRP << 61; + + /* Subtract 1 from the NIX-CPT credit count to preserve + * credit counts. + */ + cpt_idx = (blkaddr == BLKADDR_CPT0) ? 0 : 1; + rvu_write64(rvu, nix_blkaddr, NIX_AF_RX_CPTX_CREDIT(cpt_idx), + BIT_ULL(22) - 1); + + otx2_mbox_msg_send(&rvu->afpf_wq_info.mbox_up, cpt_pf_num); + rc = otx2_mbox_wait_for_rsp(&rvu->afpf_wq_info.mbox_up, cpt_pf_num); + if (rc) + dev_warn(rvu->dev, "notification to pf %d failed\n", + cpt_pf_num); + /* Wait for CPT instruction to be completed */ + do { + mdelay(1); + if (*res == 0xFFFF) + timeout--; + else + break; + } while (timeout); + + if (timeout == 0) + dev_warn(rvu->dev, "Poll for result hits hard loop counter\n"); + +res_daddr_unmap: + dma_unmap_single(rvu->dev, res_daddr, CPT_RES_LEN, DMA_BIDIRECTIONAL); +res_free: + kfree(res); + + return 0; +} + +#define CTX_CAM_PF_FUNC GENMASK_ULL(61, 46) +#define CTX_CAM_CPTR GENMASK_ULL(45, 0) + +int rvu_cpt_ctx_flush(struct rvu *rvu, u16 pcifunc) +{ + int nix_blkaddr, blkaddr; + u16 max_ctx_entries, i; + int slot = 0, num_lfs; + u64 reg, cam_data; + int rc; + + nix_blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc); + if (nix_blkaddr < 0) + return -EINVAL; + + if (is_rvu_otx2(rvu)) + return 0; + + blkaddr = (nix_blkaddr == BLKADDR_NIX1) ? BLKADDR_CPT1 : BLKADDR_CPT0; + + /* Submit CPT_INST_S to track when all packets have been + * flushed through for the NIX PF FUNC in inline inbound case. + */ + rc = cpt_inline_inb_lf_cmd_send(rvu, blkaddr, nix_blkaddr); + if (rc) + return rc; + + /* Wait for rxc entries to be flushed out */ + cpt_rxc_teardown(rvu, blkaddr); + + reg = rvu_read64(rvu, blkaddr, CPT_AF_CONSTANTS0); + max_ctx_entries = (reg >> 48) & 0xFFF; + + mutex_lock(&rvu->rsrc_lock); + + num_lfs = rvu_get_rsrc_mapcount(rvu_get_pfvf(rvu, pcifunc), + blkaddr); + if (num_lfs == 0) { + dev_warn(rvu->dev, "CPT LF is not configured\n"); + goto unlock; + } + + /* Enable BAR2 ALIAS for this pcifunc. */ + reg = BIT_ULL(16) | pcifunc; + rvu_write64(rvu, blkaddr, CPT_AF_BAR2_SEL, reg); + + for (i = 0; i < max_ctx_entries; i++) { + cam_data = rvu_read64(rvu, blkaddr, CPT_AF_CTX_CAM_DATA(i)); + + if ((FIELD_GET(CTX_CAM_PF_FUNC, cam_data) == pcifunc) && + FIELD_GET(CTX_CAM_CPTR, cam_data)) { + reg = BIT_ULL(46) | FIELD_GET(CTX_CAM_CPTR, cam_data); + rvu_write64(rvu, blkaddr, + CPT_AF_BAR2_ALIASX(slot, CPT_LF_CTX_FLUSH), + reg); + } + } + rvu_write64(rvu, blkaddr, CPT_AF_BAR2_SEL, 0); + +unlock: + mutex_unlock(&rvu->rsrc_lock); + + return 0; +} diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index 67feb26792e4..7761dcf17b91 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -4512,6 +4512,8 @@ int rvu_mbox_handler_nix_lf_stop_rx(struct rvu *rvu, struct msg_req *req, return rvu_cgx_start_stop_io(rvu, pcifunc, false); } +#define RX_SA_BASE GENMASK_ULL(52, 7) + void rvu_nix_lf_teardown(struct rvu *rvu, u16 pcifunc, int blkaddr, int nixlf) { struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, pcifunc); @@ -4519,6 +4521,7 @@ void rvu_nix_lf_teardown(struct rvu *rvu, u16 pcifunc, int blkaddr, int nixlf) int pf = rvu_get_pf(pcifunc); struct mac_ops *mac_ops; u8 cgx_id, lmac_id; + u64 sa_base; void *cgxd; int err; @@ -4575,6 +4578,14 @@ void rvu_nix_lf_teardown(struct rvu *rvu, u16 pcifunc, int blkaddr, int nixlf) nix_ctx_free(rvu, pfvf); nix_free_all_bandprof(rvu, pcifunc); + + sa_base = rvu_read64(rvu, blkaddr, NIX_AF_LFX_RX_IPSEC_SA_BASE(nixlf)); + if (FIELD_GET(RX_SA_BASE, sa_base)) { + err = rvu_cpt_ctx_flush(rvu, pcifunc); + if (err) + dev_err(rvu->dev, + "CPT ctx flush failed with error: %d\n", err); + } } #define NIX_AF_LFX_TX_CFG_PTP_EN BIT_ULL(32) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h index dbaeb10de7c2..22cd751613cd 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h @@ -527,6 +527,7 @@ #define CPT_AF_CTX_WBACK_LATENCY_PC (0x49448ull) #define CPT_AF_CTX_PSH_PC (0x49450ull) #define CPT_AF_CTX_PSH_LATENCY_PC (0x49458ull) +#define CPT_AF_CTX_CAM_DATA(a) (0x49800ull | (u64)(a) << 3) #define CPT_AF_RXC_TIME (0x50010ull) #define CPT_AF_RXC_TIME_CFG (0x50018ull) #define CPT_AF_RXC_DFRG (0x50020ull) @@ -544,6 +545,7 @@ #define CPT_LF_CTL 0x10 #define CPT_LF_INPROG 0x40 #define CPT_LF_Q_GRP_PTR 0x120 +#define CPT_LF_CTX_FLUSH 0x510 #define NPC_AF_BLK_RST (0x00040) From d00e60ee54b12de945b8493cf18c1ada9e422514 Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Wed, 13 Oct 2021 17:19:20 +0800 Subject: [PATCH 200/440] page_pool: disable dma mapping support for 32-bit arch with 64-bit DMA As the 32-bit arch with 64-bit DMA seems to rare those days, and page pool might carry a lot of code and complexity for systems that possibly. So disable dma mapping support for such systems, if drivers really want to work on such systems, they have to implement their own DMA-mapping fallback tracking outside page_pool. Reviewed-by: Ilias Apalodimas Signed-off-by: Yunsheng Lin Acked-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- include/linux/mm_types.h | 13 +------------ include/net/page_pool.h | 12 +----------- net/core/page_pool.c | 10 ++++++---- 3 files changed, 8 insertions(+), 27 deletions(-) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 7f8ee09c711f..436e0946d691 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -104,18 +104,7 @@ struct page { struct page_pool *pp; unsigned long _pp_mapping_pad; unsigned long dma_addr; - union { - /** - * dma_addr_upper: might require a 64-bit - * value on 32-bit architectures. - */ - unsigned long dma_addr_upper; - /** - * For frag page support, not supported in - * 32-bit architectures with 64-bit DMA. - */ - atomic_long_t pp_frag_count; - }; + atomic_long_t pp_frag_count; }; struct { /* slab, slob and slub */ union { diff --git a/include/net/page_pool.h b/include/net/page_pool.h index a4082406a003..3855f069627f 100644 --- a/include/net/page_pool.h +++ b/include/net/page_pool.h @@ -216,24 +216,14 @@ static inline void page_pool_recycle_direct(struct page_pool *pool, page_pool_put_full_page(pool, page, true); } -#define PAGE_POOL_DMA_USE_PP_FRAG_COUNT \ - (sizeof(dma_addr_t) > sizeof(unsigned long)) - static inline dma_addr_t page_pool_get_dma_addr(struct page *page) { - dma_addr_t ret = page->dma_addr; - - if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT) - ret |= (dma_addr_t)page->dma_addr_upper << 16 << 16; - - return ret; + return page->dma_addr; } static inline void page_pool_set_dma_addr(struct page *page, dma_addr_t addr) { page->dma_addr = addr; - if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT) - page->dma_addr_upper = upper_32_bits(addr); } static inline void page_pool_set_frag_count(struct page *page, long nr) diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 1a6978427d6c..9b60e4301a44 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -49,6 +49,12 @@ static int page_pool_init(struct page_pool *pool, * which is the XDP_TX use-case. */ if (pool->p.flags & PP_FLAG_DMA_MAP) { + /* DMA-mapping is not supported on 32-bit systems with + * 64-bit DMA mapping. + */ + if (sizeof(dma_addr_t) > sizeof(unsigned long)) + return -EOPNOTSUPP; + if ((pool->p.dma_dir != DMA_FROM_DEVICE) && (pool->p.dma_dir != DMA_BIDIRECTIONAL)) return -EINVAL; @@ -69,10 +75,6 @@ static int page_pool_init(struct page_pool *pool, */ } - if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT && - pool->p.flags & PP_FLAG_PAGE_FRAG) - return -EINVAL; - if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0) return -ENOMEM; From bacc8daf97d4199316328a5d18eeafbe447143c5 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 13 Oct 2021 17:51:42 +0100 Subject: [PATCH 201/440] xen-netback: Remove redundant initialization of variable err The variable err is being initialized with a value that is never read, it is being updated immediately afterwards. The assignment is redundant and can be removed. Addresses-Coverity: ("Unused value") Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/xen-netback/netback.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 32d5bc4919d8..0f7fd159f0f2 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -1474,7 +1474,7 @@ int xenvif_map_frontend_data_rings(struct xenvif_queue *queue, struct xen_netif_tx_sring *txs; struct xen_netif_rx_sring *rxs; RING_IDX rsp_prod, req_prod; - int err = -ENOMEM; + int err; err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(queue->vif), &tx_ring_ref, 1, &addr); From d8b6f5bae6d3b648a67b6958cb98e4e97256d652 Mon Sep 17 00:00:00 2001 From: Ansuel Smith Date: Thu, 14 Oct 2021 00:39:06 +0200 Subject: [PATCH 202/440] dsa: qca8k: add mac_power_sel support Add missing mac power sel support needed for ipq8064/5 SoC that require 1.8v for the internal regulator port instead of the default 1.5v. If other device needs this, consider adding a dedicated binding to support this. Signed-off-by: Ansuel Smith Reviewed-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/qca8k.c | 31 +++++++++++++++++++++++++++++++ drivers/net/dsa/qca8k.h | 5 +++++ 2 files changed, 36 insertions(+) diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c index a984f06f6f04..db97f45b563c 100644 --- a/drivers/net/dsa/qca8k.c +++ b/drivers/net/dsa/qca8k.c @@ -950,6 +950,33 @@ qca8k_setup_of_rgmii_delay(struct qca8k_priv *priv) return 0; } +static int +qca8k_setup_mac_pwr_sel(struct qca8k_priv *priv) +{ + u32 mask = 0; + int ret = 0; + + /* SoC specific settings for ipq8064. + * If more device require this consider adding + * a dedicated binding. + */ + if (of_machine_is_compatible("qcom,ipq8064")) + mask |= QCA8K_MAC_PWR_RGMII0_1_8V; + + /* SoC specific settings for ipq8065 */ + if (of_machine_is_compatible("qcom,ipq8065")) + mask |= QCA8K_MAC_PWR_RGMII1_1_8V; + + if (mask) { + ret = qca8k_rmw(priv, QCA8K_REG_MAC_PWR_SEL, + QCA8K_MAC_PWR_RGMII0_1_8V | + QCA8K_MAC_PWR_RGMII1_1_8V, + mask); + } + + return ret; +} + static int qca8k_setup(struct dsa_switch *ds) { @@ -979,6 +1006,10 @@ qca8k_setup(struct dsa_switch *ds) if (ret) return ret; + ret = qca8k_setup_mac_pwr_sel(priv); + if (ret) + return ret; + /* Enable CPU Port */ ret = qca8k_reg_set(priv, QCA8K_REG_GLOBAL_FW_CTRL0, QCA8K_GLOBAL_FW_CTRL0_CPU_PORT_EN); diff --git a/drivers/net/dsa/qca8k.h b/drivers/net/dsa/qca8k.h index ed3b05ad6745..fc7db94cc0c9 100644 --- a/drivers/net/dsa/qca8k.h +++ b/drivers/net/dsa/qca8k.h @@ -100,6 +100,11 @@ #define QCA8K_SGMII_MODE_CTRL_PHY (1 << 22) #define QCA8K_SGMII_MODE_CTRL_MAC (2 << 22) +/* MAC_PWR_SEL registers */ +#define QCA8K_REG_MAC_PWR_SEL 0x0e4 +#define QCA8K_MAC_PWR_RGMII1_1_8V BIT(18) +#define QCA8K_MAC_PWR_RGMII0_1_8V BIT(19) + /* EEE control registers */ #define QCA8K_REG_EEE_CTRL 0x100 #define QCA8K_REG_EEE_CTRL_LPI_EN(_i) ((_i + 1) * 2) From fdbf35df9c091db9c46e57e9938e3f7a4f603a7c Mon Sep 17 00:00:00 2001 From: Ansuel Smith Date: Thu, 14 Oct 2021 00:39:07 +0200 Subject: [PATCH 203/440] dt-bindings: net: dsa: qca8k: Add SGMII clock phase properties Add names and descriptions of additional PORT0_PAD_CTRL properties. qca,sgmii-(rx|tx)clk-falling-edge are for setting the respective clock phase to failling edge. Co-developed-by: Matthew Hagan Signed-off-by: Matthew Hagan Signed-off-by: Ansuel Smith Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/dsa/qca8k.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/devicetree/bindings/net/dsa/qca8k.txt b/Documentation/devicetree/bindings/net/dsa/qca8k.txt index 8c73f67c43ca..cc214e655442 100644 --- a/Documentation/devicetree/bindings/net/dsa/qca8k.txt +++ b/Documentation/devicetree/bindings/net/dsa/qca8k.txt @@ -37,6 +37,10 @@ A CPU port node has the following optional node: managed entity. See Documentation/devicetree/bindings/net/fixed-link.txt for details. +- qca,sgmii-rxclk-falling-edge: Set the receive clock phase to falling edge. + Mostly used in qca8327 with CPU port 0 set to + sgmii. +- qca,sgmii-txclk-falling-edge: Set the transmit clock phase to falling edge. For QCA8K the 'fixed-link' sub-node supports only the following properties: From 6c43809bf1bee76c434e365a26546a92a5fbec14 Mon Sep 17 00:00:00 2001 From: Ansuel Smith Date: Thu, 14 Oct 2021 00:39:08 +0200 Subject: [PATCH 204/440] net: dsa: qca8k: add support for sgmii falling edge Add support for this in the qca8k driver. Also add support for SGMII rx/tx clock falling edge. This is only present for pad0, pad5 and pad6 have these bit reserved from Documentation. Add a comment that this is hardcoded to PAD0 as qca8327/28/34/37 have an unique sgmii line and setting falling in port0 applies to both configuration with sgmii used for port0 or port6. Co-developed-by: Matthew Hagan Signed-off-by: Matthew Hagan Signed-off-by: Ansuel Smith Signed-off-by: David S. Miller --- drivers/net/dsa/qca8k.c | 63 +++++++++++++++++++++++++++++++++++++++++ drivers/net/dsa/qca8k.h | 4 +++ 2 files changed, 67 insertions(+) diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c index db97f45b563c..668951c54045 100644 --- a/drivers/net/dsa/qca8k.c +++ b/drivers/net/dsa/qca8k.c @@ -977,6 +977,42 @@ qca8k_setup_mac_pwr_sel(struct qca8k_priv *priv) return ret; } +static int +qca8k_parse_port_config(struct qca8k_priv *priv) +{ + struct device_node *port_dn; + phy_interface_t mode; + struct dsa_port *dp; + int port, ret; + + /* We have 2 CPU port. Check them */ + for (port = 0; port < QCA8K_NUM_PORTS; port++) { + /* Skip every other port */ + if (port != 0 && port != 6) + continue; + + dp = dsa_to_port(priv->ds, port); + port_dn = dp->dn; + + if (!of_device_is_available(port_dn)) + continue; + + ret = of_get_phy_mode(port_dn, &mode); + if (ret) + continue; + + if (mode == PHY_INTERFACE_MODE_SGMII) { + if (of_property_read_bool(port_dn, "qca,sgmii-txclk-falling-edge")) + priv->sgmii_tx_clk_falling_edge = true; + + if (of_property_read_bool(port_dn, "qca,sgmii-rxclk-falling-edge")) + priv->sgmii_rx_clk_falling_edge = true; + } + } + + return 0; +} + static int qca8k_setup(struct dsa_switch *ds) { @@ -990,6 +1026,11 @@ qca8k_setup(struct dsa_switch *ds) return -EINVAL; } + /* Parse CPU port config to be later used in phy_link mac_config */ + ret = qca8k_parse_port_config(priv); + if (ret) + return ret; + mutex_init(&priv->reg_mutex); /* Start by setting up the register mapping */ @@ -1274,6 +1315,28 @@ qca8k_phylink_mac_config(struct dsa_switch *ds, int port, unsigned int mode, } qca8k_write(priv, QCA8K_REG_SGMII_CTRL, val); + + /* For qca8327/qca8328/qca8334/qca8338 sgmii is unique and + * falling edge is set writing in the PORT0 PAD reg + */ + if (priv->switch_id == QCA8K_ID_QCA8327 || + priv->switch_id == QCA8K_ID_QCA8337) + reg = QCA8K_REG_PORT0_PAD_CTRL; + + val = 0; + + /* SGMII Clock phase configuration */ + if (priv->sgmii_rx_clk_falling_edge) + val |= QCA8K_PORT0_PAD_SGMII_RXCLK_FALLING_EDGE; + + if (priv->sgmii_tx_clk_falling_edge) + val |= QCA8K_PORT0_PAD_SGMII_TXCLK_FALLING_EDGE; + + if (val) + ret = qca8k_rmw(priv, reg, + QCA8K_PORT0_PAD_SGMII_RXCLK_FALLING_EDGE | + QCA8K_PORT0_PAD_SGMII_TXCLK_FALLING_EDGE, + val); break; default: dev_err(ds->dev, "xMII mode %s not supported for port %d\n", diff --git a/drivers/net/dsa/qca8k.h b/drivers/net/dsa/qca8k.h index fc7db94cc0c9..bc9c89dd7e71 100644 --- a/drivers/net/dsa/qca8k.h +++ b/drivers/net/dsa/qca8k.h @@ -35,6 +35,8 @@ #define QCA8K_MASK_CTRL_DEVICE_ID_MASK GENMASK(15, 8) #define QCA8K_MASK_CTRL_DEVICE_ID(x) ((x) >> 8) #define QCA8K_REG_PORT0_PAD_CTRL 0x004 +#define QCA8K_PORT0_PAD_SGMII_RXCLK_FALLING_EDGE BIT(19) +#define QCA8K_PORT0_PAD_SGMII_TXCLK_FALLING_EDGE BIT(18) #define QCA8K_REG_PORT5_PAD_CTRL 0x008 #define QCA8K_REG_PORT6_PAD_CTRL 0x00c #define QCA8K_PORT_PAD_RGMII_EN BIT(26) @@ -260,6 +262,8 @@ struct qca8k_priv { u8 switch_revision; u8 rgmii_tx_delay; u8 rgmii_rx_delay; + bool sgmii_rx_clk_falling_edge; + bool sgmii_tx_clk_falling_edge; bool legacy_phy_port_mapping; struct regmap *regmap; struct mii_bus *bus; From 731d613338ec6de482053ffa3f71be2325b0f8eb Mon Sep 17 00:00:00 2001 From: Ansuel Smith Date: Thu, 14 Oct 2021 00:39:09 +0200 Subject: [PATCH 205/440] dt-bindings: net: dsa: qca8k: Document support for CPU port 6 The switch now support CPU port to be set 6 instead of be hardcoded to 0. Document support for it and describe logic selection. Signed-off-by: Ansuel Smith Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/dsa/qca8k.txt | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/net/dsa/qca8k.txt b/Documentation/devicetree/bindings/net/dsa/qca8k.txt index cc214e655442..aeb206556f54 100644 --- a/Documentation/devicetree/bindings/net/dsa/qca8k.txt +++ b/Documentation/devicetree/bindings/net/dsa/qca8k.txt @@ -29,7 +29,11 @@ the mdio MASTER is used as communication. Don't use mixed external and internal mdio-bus configurations, as this is not supported by the hardware. -The CPU port of this switch is always port 0. +This switch support 2 CPU port. Normally and advised configuration is with +CPU port set to port 0. It is also possible to set the CPU port to port 6 +if the device requires it. The driver will configure the switch to the defined +port. With both CPU port declared the first CPU port is selected as primary +and the secondary CPU ignored. A CPU port node has the following optional node: From 3fcf734aa482487df83cf8f18608438fcf59127f Mon Sep 17 00:00:00 2001 From: Ansuel Smith Date: Thu, 14 Oct 2021 00:39:10 +0200 Subject: [PATCH 206/440] net: dsa: qca8k: add support for cpu port 6 Currently CPU port is always hardcoded to port 0. This switch have 2 CPU ports. The original intention of this driver seems to be use the mac06_exchange bit to swap MAC0 with MAC6 in the strange configuration where device have connected only the CPU port 6. To skip the introduction of a new binding, rework the driver to address the secondary CPU port as primary and drop any reference of hardcoded port. With configuration of mac06 exchange, just skip the definition of port0 and define the CPU port as a secondary. The driver will autoconfigure the switch to use that as the primary CPU port. Signed-off-by: Ansuel Smith Signed-off-by: David S. Miller --- drivers/net/dsa/qca8k.c | 51 +++++++++++++++++++++++++++++------------ drivers/net/dsa/qca8k.h | 2 -- 2 files changed, 36 insertions(+), 17 deletions(-) diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c index 668951c54045..55118450347f 100644 --- a/drivers/net/dsa/qca8k.c +++ b/drivers/net/dsa/qca8k.c @@ -977,6 +977,22 @@ qca8k_setup_mac_pwr_sel(struct qca8k_priv *priv) return ret; } +static int qca8k_find_cpu_port(struct dsa_switch *ds) +{ + struct qca8k_priv *priv = ds->priv; + + /* Find the connected cpu port. Valid port are 0 or 6 */ + if (dsa_is_cpu_port(ds, 0)) + return 0; + + dev_dbg(priv->dev, "port 0 is not the CPU port. Checking port 6"); + + if (dsa_is_cpu_port(ds, 6)) + return 6; + + return -EINVAL; +} + static int qca8k_parse_port_config(struct qca8k_priv *priv) { @@ -1017,13 +1033,13 @@ static int qca8k_setup(struct dsa_switch *ds) { struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; - int ret, i; + int cpu_port, ret, i; u32 mask; - /* Make sure that port 0 is the cpu port */ - if (!dsa_is_cpu_port(ds, 0)) { - dev_err(priv->dev, "port 0 is not the CPU port"); - return -EINVAL; + cpu_port = qca8k_find_cpu_port(ds); + if (cpu_port < 0) { + dev_err(priv->dev, "No cpu port configured in both cpu port0 and port6"); + return cpu_port; } /* Parse CPU port config to be later used in phy_link mac_config */ @@ -1065,7 +1081,7 @@ qca8k_setup(struct dsa_switch *ds) dev_warn(priv->dev, "mib init failed"); /* Enable QCA header mode on the cpu port */ - ret = qca8k_write(priv, QCA8K_REG_PORT_HDR_CTRL(QCA8K_CPU_PORT), + ret = qca8k_write(priv, QCA8K_REG_PORT_HDR_CTRL(cpu_port), QCA8K_PORT_HDR_CTRL_ALL << QCA8K_PORT_HDR_CTRL_TX_S | QCA8K_PORT_HDR_CTRL_ALL << QCA8K_PORT_HDR_CTRL_RX_S); if (ret) { @@ -1087,10 +1103,10 @@ qca8k_setup(struct dsa_switch *ds) /* Forward all unknown frames to CPU port for Linux processing */ ret = qca8k_write(priv, QCA8K_REG_GLOBAL_FW_CTRL1, - BIT(0) << QCA8K_GLOBAL_FW_CTRL1_IGMP_DP_S | - BIT(0) << QCA8K_GLOBAL_FW_CTRL1_BC_DP_S | - BIT(0) << QCA8K_GLOBAL_FW_CTRL1_MC_DP_S | - BIT(0) << QCA8K_GLOBAL_FW_CTRL1_UC_DP_S); + BIT(cpu_port) << QCA8K_GLOBAL_FW_CTRL1_IGMP_DP_S | + BIT(cpu_port) << QCA8K_GLOBAL_FW_CTRL1_BC_DP_S | + BIT(cpu_port) << QCA8K_GLOBAL_FW_CTRL1_MC_DP_S | + BIT(cpu_port) << QCA8K_GLOBAL_FW_CTRL1_UC_DP_S); if (ret) return ret; @@ -1098,7 +1114,7 @@ qca8k_setup(struct dsa_switch *ds) for (i = 0; i < QCA8K_NUM_PORTS; i++) { /* CPU port gets connected to all user ports of the switch */ if (dsa_is_cpu_port(ds, i)) { - ret = qca8k_rmw(priv, QCA8K_PORT_LOOKUP_CTRL(QCA8K_CPU_PORT), + ret = qca8k_rmw(priv, QCA8K_PORT_LOOKUP_CTRL(cpu_port), QCA8K_PORT_LOOKUP_MEMBER, dsa_user_ports(ds)); if (ret) return ret; @@ -1110,7 +1126,7 @@ qca8k_setup(struct dsa_switch *ds) ret = qca8k_rmw(priv, QCA8K_PORT_LOOKUP_CTRL(i), QCA8K_PORT_LOOKUP_MEMBER, - BIT(QCA8K_CPU_PORT)); + BIT(cpu_port)); if (ret) return ret; @@ -1616,9 +1632,12 @@ static int qca8k_port_bridge_join(struct dsa_switch *ds, int port, struct net_device *br) { struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; - int port_mask = BIT(QCA8K_CPU_PORT); + int port_mask, cpu_port; int i, ret; + cpu_port = dsa_to_port(ds, port)->cpu_dp->index; + port_mask = BIT(cpu_port); + for (i = 1; i < QCA8K_NUM_PORTS; i++) { if (dsa_to_port(ds, i)->bridge_dev != br) continue; @@ -1645,7 +1664,9 @@ static void qca8k_port_bridge_leave(struct dsa_switch *ds, int port, struct net_device *br) { struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; - int i; + int cpu_port, i; + + cpu_port = dsa_to_port(ds, port)->cpu_dp->index; for (i = 1; i < QCA8K_NUM_PORTS; i++) { if (dsa_to_port(ds, i)->bridge_dev != br) @@ -1662,7 +1683,7 @@ qca8k_port_bridge_leave(struct dsa_switch *ds, int port, struct net_device *br) * this port */ qca8k_rmw(priv, QCA8K_PORT_LOOKUP_CTRL(port), - QCA8K_PORT_LOOKUP_MEMBER, BIT(QCA8K_CPU_PORT)); + QCA8K_PORT_LOOKUP_MEMBER, BIT(cpu_port)); } static int diff --git a/drivers/net/dsa/qca8k.h b/drivers/net/dsa/qca8k.h index bc9c89dd7e71..781521e6a965 100644 --- a/drivers/net/dsa/qca8k.h +++ b/drivers/net/dsa/qca8k.h @@ -24,8 +24,6 @@ #define QCA8K_NUM_FDB_RECORDS 2048 -#define QCA8K_CPU_PORT 0 - #define QCA8K_PORT_VID_DEF 1 /* Global control registers */ From 5654ec78dd7e64b1e04777b24007344329e6a63b Mon Sep 17 00:00:00 2001 From: Ansuel Smith Date: Thu, 14 Oct 2021 00:39:11 +0200 Subject: [PATCH 207/440] net: dsa: qca8k: rework rgmii delay logic and scan for cpu port 6 Future proof commit. This switch have 2 CPU ports and one valid configuration is first CPU port set to sgmii and second CPU port set to rgmii-id. The current implementation detects delay only for CPU port zero set to rgmii and doesn't count any delay set in a secondary CPU port. Drop the current delay scan function and move it to the sgmii parser function to generalize and implicitly add support for secondary CPU port set to rgmii-id. Introduce new logic where delay is enabled also with internal delay binding declared and rgmii set as PHY mode. Signed-off-by: Ansuel Smith Signed-off-by: David S. Miller --- drivers/net/dsa/qca8k.c | 165 ++++++++++++++++++++-------------------- drivers/net/dsa/qca8k.h | 10 ++- 2 files changed, 89 insertions(+), 86 deletions(-) diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c index 55118450347f..67999d3619e8 100644 --- a/drivers/net/dsa/qca8k.c +++ b/drivers/net/dsa/qca8k.c @@ -888,68 +888,6 @@ qca8k_setup_mdio_bus(struct qca8k_priv *priv) return 0; } -static int -qca8k_setup_of_rgmii_delay(struct qca8k_priv *priv) -{ - struct device_node *port_dn; - phy_interface_t mode; - struct dsa_port *dp; - u32 val; - - /* CPU port is already checked */ - dp = dsa_to_port(priv->ds, 0); - - port_dn = dp->dn; - - /* Check if port 0 is set to the correct type */ - of_get_phy_mode(port_dn, &mode); - if (mode != PHY_INTERFACE_MODE_RGMII_ID && - mode != PHY_INTERFACE_MODE_RGMII_RXID && - mode != PHY_INTERFACE_MODE_RGMII_TXID) { - return 0; - } - - switch (mode) { - case PHY_INTERFACE_MODE_RGMII_ID: - case PHY_INTERFACE_MODE_RGMII_RXID: - if (of_property_read_u32(port_dn, "rx-internal-delay-ps", &val)) - val = 2; - else - /* Switch regs accept value in ns, convert ps to ns */ - val = val / 1000; - - if (val > QCA8K_MAX_DELAY) { - dev_err(priv->dev, "rgmii rx delay is limited to a max value of 3ns, setting to the max value"); - val = 3; - } - - priv->rgmii_rx_delay = val; - /* Stop here if we need to check only for rx delay */ - if (mode != PHY_INTERFACE_MODE_RGMII_ID) - break; - - fallthrough; - case PHY_INTERFACE_MODE_RGMII_TXID: - if (of_property_read_u32(port_dn, "tx-internal-delay-ps", &val)) - val = 1; - else - /* Switch regs accept value in ns, convert ps to ns */ - val = val / 1000; - - if (val > QCA8K_MAX_DELAY) { - dev_err(priv->dev, "rgmii tx delay is limited to a max value of 3ns, setting to the max value"); - val = 3; - } - - priv->rgmii_tx_delay = val; - break; - default: - return 0; - } - - return 0; -} - static int qca8k_setup_mac_pwr_sel(struct qca8k_priv *priv) { @@ -996,19 +934,21 @@ static int qca8k_find_cpu_port(struct dsa_switch *ds) static int qca8k_parse_port_config(struct qca8k_priv *priv) { + int port, cpu_port_index = 0, ret; struct device_node *port_dn; phy_interface_t mode; struct dsa_port *dp; - int port, ret; + u32 delay; /* We have 2 CPU port. Check them */ - for (port = 0; port < QCA8K_NUM_PORTS; port++) { + for (port = 0; port < QCA8K_NUM_PORTS && cpu_port_index < QCA8K_NUM_CPU_PORTS; port++) { /* Skip every other port */ if (port != 0 && port != 6) continue; dp = dsa_to_port(priv->ds, port); port_dn = dp->dn; + cpu_port_index++; if (!of_device_is_available(port_dn)) continue; @@ -1017,12 +957,54 @@ qca8k_parse_port_config(struct qca8k_priv *priv) if (ret) continue; - if (mode == PHY_INTERFACE_MODE_SGMII) { + switch (mode) { + case PHY_INTERFACE_MODE_RGMII: + case PHY_INTERFACE_MODE_RGMII_ID: + case PHY_INTERFACE_MODE_RGMII_TXID: + case PHY_INTERFACE_MODE_RGMII_RXID: + delay = 0; + + if (!of_property_read_u32(port_dn, "tx-internal-delay-ps", &delay)) + /* Switch regs accept value in ns, convert ps to ns */ + delay = delay / 1000; + else if (mode == PHY_INTERFACE_MODE_RGMII_ID || + mode == PHY_INTERFACE_MODE_RGMII_TXID) + delay = 1; + + if (delay > QCA8K_MAX_DELAY) { + dev_err(priv->dev, "rgmii tx delay is limited to a max value of 3ns, setting to the max value"); + delay = 3; + } + + priv->rgmii_tx_delay[cpu_port_index] = delay; + + delay = 0; + + if (!of_property_read_u32(port_dn, "rx-internal-delay-ps", &delay)) + /* Switch regs accept value in ns, convert ps to ns */ + delay = delay / 1000; + else if (mode == PHY_INTERFACE_MODE_RGMII_ID || + mode == PHY_INTERFACE_MODE_RGMII_RXID) + delay = 2; + + if (delay > QCA8K_MAX_DELAY) { + dev_err(priv->dev, "rgmii rx delay is limited to a max value of 3ns, setting to the max value"); + delay = 3; + } + + priv->rgmii_rx_delay[cpu_port_index] = delay; + + break; + case PHY_INTERFACE_MODE_SGMII: if (of_property_read_bool(port_dn, "qca,sgmii-txclk-falling-edge")) priv->sgmii_tx_clk_falling_edge = true; if (of_property_read_bool(port_dn, "qca,sgmii-rxclk-falling-edge")) priv->sgmii_rx_clk_falling_edge = true; + + break; + default: + continue; } } @@ -1059,10 +1041,6 @@ qca8k_setup(struct dsa_switch *ds) if (ret) return ret; - ret = qca8k_setup_of_rgmii_delay(priv); - if (ret) - return ret; - ret = qca8k_setup_mac_pwr_sel(priv); if (ret) return ret; @@ -1229,8 +1207,8 @@ qca8k_phylink_mac_config(struct dsa_switch *ds, int port, unsigned int mode, const struct phylink_link_state *state) { struct qca8k_priv *priv = ds->priv; - u32 reg, val; - int ret; + int cpu_port_index, ret; + u32 reg, val, delay; switch (port) { case 0: /* 1st CPU port */ @@ -1242,6 +1220,7 @@ qca8k_phylink_mac_config(struct dsa_switch *ds, int port, unsigned int mode, return; reg = QCA8K_REG_PORT0_PAD_CTRL; + cpu_port_index = QCA8K_CPU_PORT0; break; case 1: case 2: @@ -1260,6 +1239,7 @@ qca8k_phylink_mac_config(struct dsa_switch *ds, int port, unsigned int mode, return; reg = QCA8K_REG_PORT6_PAD_CTRL; + cpu_port_index = QCA8K_CPU_PORT6; break; default: dev_err(ds->dev, "%s: unsupported port: %i\n", __func__, port); @@ -1274,23 +1254,40 @@ qca8k_phylink_mac_config(struct dsa_switch *ds, int port, unsigned int mode, switch (state->interface) { case PHY_INTERFACE_MODE_RGMII: - /* RGMII mode means no delay so don't enable the delay */ - qca8k_write(priv, reg, QCA8K_PORT_PAD_RGMII_EN); - break; case PHY_INTERFACE_MODE_RGMII_ID: case PHY_INTERFACE_MODE_RGMII_TXID: case PHY_INTERFACE_MODE_RGMII_RXID: - /* RGMII_ID needs internal delay. This is enabled through - * PORT5_PAD_CTRL for all ports, rather than individual port - * registers + val = QCA8K_PORT_PAD_RGMII_EN; + + /* Delay can be declared in 3 different way. + * Mode to rgmii and internal-delay standard binding defined + * rgmii-id or rgmii-tx/rx phy mode set. + * The parse logic set a delay different than 0 only when one + * of the 3 different way is used. In all other case delay is + * not enabled. With ID or TX/RXID delay is enabled and set + * to the default and recommended value. + */ + if (priv->rgmii_tx_delay[cpu_port_index]) { + delay = priv->rgmii_tx_delay[cpu_port_index]; + + val |= QCA8K_PORT_PAD_RGMII_TX_DELAY(delay) | + QCA8K_PORT_PAD_RGMII_TX_DELAY_EN; + } + + if (priv->rgmii_rx_delay[cpu_port_index]) { + delay = priv->rgmii_rx_delay[cpu_port_index]; + + val |= QCA8K_PORT_PAD_RGMII_RX_DELAY(delay) | + QCA8K_PORT_PAD_RGMII_RX_DELAY_EN; + } + + /* Set RGMII delay based on the selected values */ + qca8k_write(priv, reg, val); + + /* QCA8337 requires to set rgmii rx delay for all ports. + * This is enabled through PORT5_PAD_CTRL for all ports, + * rather than individual port registers. */ - qca8k_write(priv, reg, - QCA8K_PORT_PAD_RGMII_EN | - QCA8K_PORT_PAD_RGMII_TX_DELAY(priv->rgmii_tx_delay) | - QCA8K_PORT_PAD_RGMII_RX_DELAY(priv->rgmii_rx_delay) | - QCA8K_PORT_PAD_RGMII_TX_DELAY_EN | - QCA8K_PORT_PAD_RGMII_RX_DELAY_EN); - /* QCA8337 requires to set rgmii rx delay */ if (priv->switch_id == QCA8K_ID_QCA8337) qca8k_write(priv, QCA8K_REG_PORT5_PAD_CTRL, QCA8K_PORT_PAD_RGMII_RX_DELAY_EN); diff --git a/drivers/net/dsa/qca8k.h b/drivers/net/dsa/qca8k.h index 781521e6a965..5eb0c890dfe4 100644 --- a/drivers/net/dsa/qca8k.h +++ b/drivers/net/dsa/qca8k.h @@ -13,6 +13,7 @@ #include #define QCA8K_NUM_PORTS 7 +#define QCA8K_NUM_CPU_PORTS 2 #define QCA8K_MAX_MTU 9000 #define PHY_ID_QCA8327 0x004dd034 @@ -255,13 +256,18 @@ struct qca8k_match_data { u8 id; }; +enum { + QCA8K_CPU_PORT0, + QCA8K_CPU_PORT6, +}; + struct qca8k_priv { u8 switch_id; u8 switch_revision; - u8 rgmii_tx_delay; - u8 rgmii_rx_delay; bool sgmii_rx_clk_falling_edge; bool sgmii_tx_clk_falling_edge; + u8 rgmii_rx_delay[QCA8K_NUM_CPU_PORTS]; /* 0: CPU port0, 1: CPU port6 */ + u8 rgmii_tx_delay[QCA8K_NUM_CPU_PORTS]; /* 0: CPU port0, 1: CPU port6 */ bool legacy_phy_port_mapping; struct regmap *regmap; struct mii_bus *bus; From 13ad5ccc093ff448b99ac7e138e91e78796adb48 Mon Sep 17 00:00:00 2001 From: Ansuel Smith Date: Thu, 14 Oct 2021 00:39:12 +0200 Subject: [PATCH 208/440] dt-bindings: net: dsa: qca8k: Document qca,sgmii-enable-pll Document qca,sgmii-enable-pll binding used in the CPU nodes to enable SGMII PLL on MAC config. Signed-off-by: Ansuel Smith Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/dsa/qca8k.txt | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Documentation/devicetree/bindings/net/dsa/qca8k.txt b/Documentation/devicetree/bindings/net/dsa/qca8k.txt index aeb206556f54..05a8ddfb5483 100644 --- a/Documentation/devicetree/bindings/net/dsa/qca8k.txt +++ b/Documentation/devicetree/bindings/net/dsa/qca8k.txt @@ -45,6 +45,16 @@ A CPU port node has the following optional node: Mostly used in qca8327 with CPU port 0 set to sgmii. - qca,sgmii-txclk-falling-edge: Set the transmit clock phase to falling edge. +- qca,sgmii-enable-pll : For SGMII CPU port, explicitly enable PLL, TX and RX + chain along with Signal Detection. + This should NOT be enabled for qca8327. If enabled with + qca8327 the sgmii port won't correctly init and an err + is printed. + This can be required for qca8337 switch with revision 2. + A warning is displayed when used with revision greater + 2. + With CPU port set to sgmii and qca8337 it is advised + to set this unless a communication problem is observed. For QCA8K the 'fixed-link' sub-node supports only the following properties: From bbc4799e8bb6c397e3b3fec13de68e179f5db9ff Mon Sep 17 00:00:00 2001 From: Ansuel Smith Date: Thu, 14 Oct 2021 00:39:13 +0200 Subject: [PATCH 209/440] net: dsa: qca8k: add explicit SGMII PLL enable Support enabling PLL on the SGMII CPU port. Some device require this special configuration or no traffic is transmitted and the switch doesn't work at all. A dedicated binding is added to the CPU node port to apply the correct reg on mac config. Fail to correctly configure sgmii with qca8327 switch and warn if pll is used on qca8337 with a revision greater than 1. Signed-off-by: Ansuel Smith Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/qca8k.c | 19 +++++++++++++++++-- drivers/net/dsa/qca8k.h | 1 + 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c index 67999d3619e8..e0a16fe6a298 100644 --- a/drivers/net/dsa/qca8k.c +++ b/drivers/net/dsa/qca8k.c @@ -1002,6 +1002,18 @@ qca8k_parse_port_config(struct qca8k_priv *priv) if (of_property_read_bool(port_dn, "qca,sgmii-rxclk-falling-edge")) priv->sgmii_rx_clk_falling_edge = true; + if (of_property_read_bool(port_dn, "qca,sgmii-enable-pll")) { + priv->sgmii_enable_pll = true; + + if (priv->switch_id == QCA8K_ID_QCA8327) { + dev_err(priv->dev, "SGMII PLL should NOT be enabled for qca8327. Aborting enabling"); + priv->sgmii_enable_pll = false; + } + + if (priv->switch_revision < 2) + dev_warn(priv->dev, "SGMII PLL should NOT be enabled for qca8337 with revision 2 or more."); + } + break; default: continue; @@ -1312,8 +1324,11 @@ qca8k_phylink_mac_config(struct dsa_switch *ds, int port, unsigned int mode, if (ret) return; - val |= QCA8K_SGMII_EN_PLL | QCA8K_SGMII_EN_RX | - QCA8K_SGMII_EN_TX | QCA8K_SGMII_EN_SD; + val |= QCA8K_SGMII_EN_SD; + + if (priv->sgmii_enable_pll) + val |= QCA8K_SGMII_EN_PLL | QCA8K_SGMII_EN_RX | + QCA8K_SGMII_EN_TX; if (dsa_is_cpu_port(ds, port)) { /* CPU port, we're talking to the CPU MAC, be a PHY */ diff --git a/drivers/net/dsa/qca8k.h b/drivers/net/dsa/qca8k.h index 5eb0c890dfe4..77b1677edafa 100644 --- a/drivers/net/dsa/qca8k.h +++ b/drivers/net/dsa/qca8k.h @@ -266,6 +266,7 @@ struct qca8k_priv { u8 switch_revision; bool sgmii_rx_clk_falling_edge; bool sgmii_tx_clk_falling_edge; + bool sgmii_enable_pll; u8 rgmii_rx_delay[QCA8K_NUM_CPU_PORTS]; /* 0: CPU port0, 1: CPU port6 */ u8 rgmii_tx_delay[QCA8K_NUM_CPU_PORTS]; /* 0: CPU port0, 1: CPU port6 */ bool legacy_phy_port_mapping; From 924087c5c3d41553700b0eb83ca2a53b91643dca Mon Sep 17 00:00:00 2001 From: Ansuel Smith Date: Thu, 14 Oct 2021 00:39:14 +0200 Subject: [PATCH 210/440] dt-bindings: net: dsa: qca8k: Document qca,led-open-drain binding Document new binding qca,ignore-power-on-sel used to ignore power on strapping and use sw regs instead. Document qca,led-open.drain to set led to open drain mode, the qca,ignore-power-on-sel is mandatory with this enabled or an error will be reported. Signed-off-by: Ansuel Smith Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/dsa/qca8k.txt | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/Documentation/devicetree/bindings/net/dsa/qca8k.txt b/Documentation/devicetree/bindings/net/dsa/qca8k.txt index 05a8ddfb5483..9e6748ec13da 100644 --- a/Documentation/devicetree/bindings/net/dsa/qca8k.txt +++ b/Documentation/devicetree/bindings/net/dsa/qca8k.txt @@ -13,6 +13,17 @@ Required properties: Optional properties: - reset-gpios: GPIO to be used to reset the whole device +- qca,ignore-power-on-sel: Ignore power on pin strapping to configure led open + drain or eeprom presence. This is needed for broken + devices that have wrong configuration or when the oem + decided to not use pin strapping and fallback to sw + regs. +- qca,led-open-drain: Set leds to open-drain mode. This requires the + qca,ignore-power-on-sel to be set or the driver will fail + to probe. This is needed if the oem doesn't use pin + strapping to set this mode and prefers to set it using sw + regs. The pin strapping related to led open drain mode is + the pin B68 for QCA832x and B49 for QCA833x Subnodes: From 362bb238d8bf1470424214a8a5968d9c6cce68fa Mon Sep 17 00:00:00 2001 From: Ansuel Smith Date: Thu, 14 Oct 2021 00:39:15 +0200 Subject: [PATCH 211/440] net: dsa: qca8k: add support for pws config reg Some qca8327 switch require to force the ignore of power on sel strapping. Some switch require to set the led open drain mode in regs instead of using strapping. While most of the device implements this using the correct way using pin strapping, there are still some broken device that require to be set using sw regs. Introduce a new binding and support these special configuration. As led open drain require to ignore pin strapping to work, the probe fails with EINVAL error with incorrect configuration. Signed-off-by: Ansuel Smith Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/qca8k.c | 39 +++++++++++++++++++++++++++++++++++++++ drivers/net/dsa/qca8k.h | 6 ++++++ 2 files changed, 45 insertions(+) diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c index e0a16fe6a298..45a5321e03e8 100644 --- a/drivers/net/dsa/qca8k.c +++ b/drivers/net/dsa/qca8k.c @@ -931,6 +931,41 @@ static int qca8k_find_cpu_port(struct dsa_switch *ds) return -EINVAL; } +static int +qca8k_setup_of_pws_reg(struct qca8k_priv *priv) +{ + struct device_node *node = priv->dev->of_node; + u32 val = 0; + int ret; + + /* QCA8327 require to set to the correct mode. + * His bigger brother QCA8328 have the 172 pin layout. + * Should be applied by default but we set this just to make sure. + */ + if (priv->switch_id == QCA8K_ID_QCA8327) { + ret = qca8k_rmw(priv, QCA8K_REG_PWS, QCA8327_PWS_PACKAGE148_EN, + QCA8327_PWS_PACKAGE148_EN); + if (ret) + return ret; + } + + if (of_property_read_bool(node, "qca,ignore-power-on-sel")) + val |= QCA8K_PWS_POWER_ON_SEL; + + if (of_property_read_bool(node, "qca,led-open-drain")) { + if (!(val & QCA8K_PWS_POWER_ON_SEL)) { + dev_err(priv->dev, "qca,led-open-drain require qca,ignore-power-on-sel to be set."); + return -EINVAL; + } + + val |= QCA8K_PWS_LED_OPEN_EN_CSR; + } + + return qca8k_rmw(priv, QCA8K_REG_PWS, + QCA8K_PWS_LED_OPEN_EN_CSR | QCA8K_PWS_POWER_ON_SEL, + val); +} + static int qca8k_parse_port_config(struct qca8k_priv *priv) { @@ -1053,6 +1088,10 @@ qca8k_setup(struct dsa_switch *ds) if (ret) return ret; + ret = qca8k_setup_of_pws_reg(priv); + if (ret) + return ret; + ret = qca8k_setup_mac_pwr_sel(priv); if (ret) return ret; diff --git a/drivers/net/dsa/qca8k.h b/drivers/net/dsa/qca8k.h index 77b1677edafa..35a471bfd27f 100644 --- a/drivers/net/dsa/qca8k.h +++ b/drivers/net/dsa/qca8k.h @@ -46,6 +46,12 @@ #define QCA8K_MAX_DELAY 3 #define QCA8K_PORT_PAD_SGMII_EN BIT(7) #define QCA8K_REG_PWS 0x010 +#define QCA8K_PWS_POWER_ON_SEL BIT(31) +/* This reg is only valid for QCA832x and toggle the package + * type from 176 pin (by default) to 148 pin used on QCA8327 + */ +#define QCA8327_PWS_PACKAGE148_EN BIT(30) +#define QCA8K_PWS_LED_OPEN_EN_CSR BIT(24) #define QCA8K_PWS_SERDES_AEN_DIS BIT(7) #define QCA8K_REG_MODULE_EN 0x030 #define QCA8K_MODULE_EN_MIB BIT(0) From ed7988d77fbfb79366b68f9e7fa60a6080da23d4 Mon Sep 17 00:00:00 2001 From: Ansuel Smith Date: Thu, 14 Oct 2021 00:39:16 +0200 Subject: [PATCH 212/440] dt-bindings: net: dsa: qca8k: document support for qca8328 QCA8328 is the bigger brother of qca8327. Document the new compatible binding and add some information to understand the various switch compatible. Signed-off-by: Ansuel Smith Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/dsa/qca8k.txt | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/net/dsa/qca8k.txt b/Documentation/devicetree/bindings/net/dsa/qca8k.txt index 9e6748ec13da..f057117764af 100644 --- a/Documentation/devicetree/bindings/net/dsa/qca8k.txt +++ b/Documentation/devicetree/bindings/net/dsa/qca8k.txt @@ -3,9 +3,10 @@ Required properties: - compatible: should be one of: - "qca,qca8327" - "qca,qca8334" - "qca,qca8337" + "qca,qca8328": referenced as AR8328(N)-AK1(A/B) QFN 176 pin package + "qca,qca8327": referenced as AR8327(N)-AL1A DR-QFN 148 pin package + "qca,qca8334": referenced as QCA8334-AL3C QFN 88 pin package + "qca,qca8337": referenced as QCA8337N-AL3(B/C) DR-QFN 148 pin package - #size-cells: must be 0 - #address-cells: must be 1 From f477d1c8bdbef4f400718238e350f16f521d2a3e Mon Sep 17 00:00:00 2001 From: Ansuel Smith Date: Thu, 14 Oct 2021 00:39:17 +0200 Subject: [PATCH 213/440] net: dsa: qca8k: add support for QCA8328 QCA8328 switch is the bigger brother of the qca8327. Same regs different chip. Change the function to set the correct pin layout and introduce a new match_data to differentiate the 2 switch as they have the same ID and their internal PHY have the same ID. Signed-off-by: Ansuel Smith Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/qca8k.c | 19 ++++++++++++++++--- drivers/net/dsa/qca8k.h | 1 + 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c index 45a5321e03e8..36365d601c41 100644 --- a/drivers/net/dsa/qca8k.c +++ b/drivers/net/dsa/qca8k.c @@ -935,6 +935,7 @@ static int qca8k_setup_of_pws_reg(struct qca8k_priv *priv) { struct device_node *node = priv->dev->of_node; + const struct qca8k_match_data *data; u32 val = 0; int ret; @@ -943,8 +944,14 @@ qca8k_setup_of_pws_reg(struct qca8k_priv *priv) * Should be applied by default but we set this just to make sure. */ if (priv->switch_id == QCA8K_ID_QCA8327) { + data = of_device_get_match_data(priv->dev); + + /* Set the correct package of 148 pin for QCA8327 */ + if (data->reduced_package) + val |= QCA8327_PWS_PACKAGE148_EN; + ret = qca8k_rmw(priv, QCA8K_REG_PWS, QCA8327_PWS_PACKAGE148_EN, - QCA8327_PWS_PACKAGE148_EN); + val); if (ret) return ret; } @@ -2105,7 +2112,12 @@ static int qca8k_resume(struct device *dev) static SIMPLE_DEV_PM_OPS(qca8k_pm_ops, qca8k_suspend, qca8k_resume); -static const struct qca8k_match_data qca832x = { +static const struct qca8k_match_data qca8327 = { + .id = QCA8K_ID_QCA8327, + .reduced_package = true, +}; + +static const struct qca8k_match_data qca8328 = { .id = QCA8K_ID_QCA8327, }; @@ -2114,7 +2126,8 @@ static const struct qca8k_match_data qca833x = { }; static const struct of_device_id qca8k_of_match[] = { - { .compatible = "qca,qca8327", .data = &qca832x }, + { .compatible = "qca,qca8327", .data = &qca8327 }, + { .compatible = "qca,qca8328", .data = &qca8328 }, { .compatible = "qca,qca8334", .data = &qca833x }, { .compatible = "qca,qca8337", .data = &qca833x }, { /* sentinel */ }, diff --git a/drivers/net/dsa/qca8k.h b/drivers/net/dsa/qca8k.h index 35a471bfd27f..9c115cfe613b 100644 --- a/drivers/net/dsa/qca8k.h +++ b/drivers/net/dsa/qca8k.h @@ -260,6 +260,7 @@ struct ar8xxx_port_status { struct qca8k_match_data { u8 id; + bool reduced_package; }; enum { From cef08115846e581f80ff99abf7bf218da1840616 Mon Sep 17 00:00:00 2001 From: Ansuel Smith Date: Thu, 14 Oct 2021 00:39:18 +0200 Subject: [PATCH 214/440] net: dsa: qca8k: set internal delay also for sgmii QCA original code report port instability and sa that SGMII also require to set internal delay. Generalize the rgmii delay function and apply the advised value if they are not defined in DT. Signed-off-by: Ansuel Smith Signed-off-by: David S. Miller --- drivers/net/dsa/qca8k.c | 88 ++++++++++++++++++++++++++++------------- drivers/net/dsa/qca8k.h | 2 + 2 files changed, 62 insertions(+), 28 deletions(-) diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c index 36365d601c41..296633147f46 100644 --- a/drivers/net/dsa/qca8k.c +++ b/drivers/net/dsa/qca8k.c @@ -1004,6 +1004,7 @@ qca8k_parse_port_config(struct qca8k_priv *priv) case PHY_INTERFACE_MODE_RGMII_ID: case PHY_INTERFACE_MODE_RGMII_TXID: case PHY_INTERFACE_MODE_RGMII_RXID: + case PHY_INTERFACE_MODE_SGMII: delay = 0; if (!of_property_read_u32(port_dn, "tx-internal-delay-ps", &delay)) @@ -1036,8 +1037,13 @@ qca8k_parse_port_config(struct qca8k_priv *priv) priv->rgmii_rx_delay[cpu_port_index] = delay; - break; - case PHY_INTERFACE_MODE_SGMII: + /* Skip sgmii parsing for rgmii* mode */ + if (mode == PHY_INTERFACE_MODE_RGMII || + mode == PHY_INTERFACE_MODE_RGMII_ID || + mode == PHY_INTERFACE_MODE_RGMII_TXID || + mode == PHY_INTERFACE_MODE_RGMII_RXID) + break; + if (of_property_read_bool(port_dn, "qca,sgmii-txclk-falling-edge")) priv->sgmii_tx_clk_falling_edge = true; @@ -1260,13 +1266,54 @@ qca8k_setup(struct dsa_switch *ds) return 0; } +static void +qca8k_mac_config_setup_internal_delay(struct qca8k_priv *priv, int cpu_port_index, + u32 reg) +{ + u32 delay, val = 0; + int ret; + + /* Delay can be declared in 3 different way. + * Mode to rgmii and internal-delay standard binding defined + * rgmii-id or rgmii-tx/rx phy mode set. + * The parse logic set a delay different than 0 only when one + * of the 3 different way is used. In all other case delay is + * not enabled. With ID or TX/RXID delay is enabled and set + * to the default and recommended value. + */ + if (priv->rgmii_tx_delay[cpu_port_index]) { + delay = priv->rgmii_tx_delay[cpu_port_index]; + + val |= QCA8K_PORT_PAD_RGMII_TX_DELAY(delay) | + QCA8K_PORT_PAD_RGMII_TX_DELAY_EN; + } + + if (priv->rgmii_rx_delay[cpu_port_index]) { + delay = priv->rgmii_rx_delay[cpu_port_index]; + + val |= QCA8K_PORT_PAD_RGMII_RX_DELAY(delay) | + QCA8K_PORT_PAD_RGMII_RX_DELAY_EN; + } + + /* Set RGMII delay based on the selected values */ + ret = qca8k_rmw(priv, reg, + QCA8K_PORT_PAD_RGMII_TX_DELAY_MASK | + QCA8K_PORT_PAD_RGMII_RX_DELAY_MASK | + QCA8K_PORT_PAD_RGMII_TX_DELAY_EN | + QCA8K_PORT_PAD_RGMII_RX_DELAY_EN, + val); + if (ret) + dev_err(priv->dev, "Failed to set internal delay for CPU port%d", + cpu_port_index == QCA8K_CPU_PORT0 ? 0 : 6); +} + static void qca8k_phylink_mac_config(struct dsa_switch *ds, int port, unsigned int mode, const struct phylink_link_state *state) { struct qca8k_priv *priv = ds->priv; int cpu_port_index, ret; - u32 reg, val, delay; + u32 reg, val; switch (port) { case 0: /* 1st CPU port */ @@ -1315,32 +1362,10 @@ qca8k_phylink_mac_config(struct dsa_switch *ds, int port, unsigned int mode, case PHY_INTERFACE_MODE_RGMII_ID: case PHY_INTERFACE_MODE_RGMII_TXID: case PHY_INTERFACE_MODE_RGMII_RXID: - val = QCA8K_PORT_PAD_RGMII_EN; + qca8k_write(priv, reg, QCA8K_PORT_PAD_RGMII_EN); - /* Delay can be declared in 3 different way. - * Mode to rgmii and internal-delay standard binding defined - * rgmii-id or rgmii-tx/rx phy mode set. - * The parse logic set a delay different than 0 only when one - * of the 3 different way is used. In all other case delay is - * not enabled. With ID or TX/RXID delay is enabled and set - * to the default and recommended value. - */ - if (priv->rgmii_tx_delay[cpu_port_index]) { - delay = priv->rgmii_tx_delay[cpu_port_index]; - - val |= QCA8K_PORT_PAD_RGMII_TX_DELAY(delay) | - QCA8K_PORT_PAD_RGMII_TX_DELAY_EN; - } - - if (priv->rgmii_rx_delay[cpu_port_index]) { - delay = priv->rgmii_rx_delay[cpu_port_index]; - - val |= QCA8K_PORT_PAD_RGMII_RX_DELAY(delay) | - QCA8K_PORT_PAD_RGMII_RX_DELAY_EN; - } - - /* Set RGMII delay based on the selected values */ - qca8k_write(priv, reg, val); + /* Configure rgmii delay */ + qca8k_mac_config_setup_internal_delay(priv, cpu_port_index, reg); /* QCA8337 requires to set rgmii rx delay for all ports. * This is enabled through PORT5_PAD_CTRL for all ports, @@ -1411,6 +1436,13 @@ qca8k_phylink_mac_config(struct dsa_switch *ds, int port, unsigned int mode, QCA8K_PORT0_PAD_SGMII_RXCLK_FALLING_EDGE | QCA8K_PORT0_PAD_SGMII_TXCLK_FALLING_EDGE, val); + + /* From original code is reported port instability as SGMII also + * require delay set. Apply advised values here or take them from DT. + */ + if (state->interface == PHY_INTERFACE_MODE_SGMII) + qca8k_mac_config_setup_internal_delay(priv, cpu_port_index, reg); + break; default: dev_err(ds->dev, "xMII mode %s not supported for port %d\n", diff --git a/drivers/net/dsa/qca8k.h b/drivers/net/dsa/qca8k.h index 9c115cfe613b..c5ca6277b45b 100644 --- a/drivers/net/dsa/qca8k.h +++ b/drivers/net/dsa/qca8k.h @@ -39,7 +39,9 @@ #define QCA8K_REG_PORT5_PAD_CTRL 0x008 #define QCA8K_REG_PORT6_PAD_CTRL 0x00c #define QCA8K_PORT_PAD_RGMII_EN BIT(26) +#define QCA8K_PORT_PAD_RGMII_TX_DELAY_MASK GENMASK(23, 22) #define QCA8K_PORT_PAD_RGMII_TX_DELAY(x) ((x) << 22) +#define QCA8K_PORT_PAD_RGMII_RX_DELAY_MASK GENMASK(21, 20) #define QCA8K_PORT_PAD_RGMII_RX_DELAY(x) ((x) << 20) #define QCA8K_PORT_PAD_RGMII_TX_DELAY_EN BIT(25) #define QCA8K_PORT_PAD_RGMII_RX_DELAY_EN BIT(24) From fd0bb28c547f7c8affb1691128cece38f5b626a1 Mon Sep 17 00:00:00 2001 From: Ansuel Smith Date: Thu, 14 Oct 2021 00:39:19 +0200 Subject: [PATCH 215/440] net: dsa: qca8k: move port config to dedicated struct Move ports related config to dedicated struct to keep things organized. Signed-off-by: Ansuel Smith Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/qca8k.c | 26 +++++++++++++------------- drivers/net/dsa/qca8k.h | 10 +++++++--- 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c index 296633147f46..bf58d4d1ae9b 100644 --- a/drivers/net/dsa/qca8k.c +++ b/drivers/net/dsa/qca8k.c @@ -1019,7 +1019,7 @@ qca8k_parse_port_config(struct qca8k_priv *priv) delay = 3; } - priv->rgmii_tx_delay[cpu_port_index] = delay; + priv->ports_config.rgmii_tx_delay[cpu_port_index] = delay; delay = 0; @@ -1035,7 +1035,7 @@ qca8k_parse_port_config(struct qca8k_priv *priv) delay = 3; } - priv->rgmii_rx_delay[cpu_port_index] = delay; + priv->ports_config.rgmii_rx_delay[cpu_port_index] = delay; /* Skip sgmii parsing for rgmii* mode */ if (mode == PHY_INTERFACE_MODE_RGMII || @@ -1045,17 +1045,17 @@ qca8k_parse_port_config(struct qca8k_priv *priv) break; if (of_property_read_bool(port_dn, "qca,sgmii-txclk-falling-edge")) - priv->sgmii_tx_clk_falling_edge = true; + priv->ports_config.sgmii_tx_clk_falling_edge = true; if (of_property_read_bool(port_dn, "qca,sgmii-rxclk-falling-edge")) - priv->sgmii_rx_clk_falling_edge = true; + priv->ports_config.sgmii_rx_clk_falling_edge = true; if (of_property_read_bool(port_dn, "qca,sgmii-enable-pll")) { - priv->sgmii_enable_pll = true; + priv->ports_config.sgmii_enable_pll = true; if (priv->switch_id == QCA8K_ID_QCA8327) { dev_err(priv->dev, "SGMII PLL should NOT be enabled for qca8327. Aborting enabling"); - priv->sgmii_enable_pll = false; + priv->ports_config.sgmii_enable_pll = false; } if (priv->switch_revision < 2) @@ -1281,15 +1281,15 @@ qca8k_mac_config_setup_internal_delay(struct qca8k_priv *priv, int cpu_port_inde * not enabled. With ID or TX/RXID delay is enabled and set * to the default and recommended value. */ - if (priv->rgmii_tx_delay[cpu_port_index]) { - delay = priv->rgmii_tx_delay[cpu_port_index]; + if (priv->ports_config.rgmii_tx_delay[cpu_port_index]) { + delay = priv->ports_config.rgmii_tx_delay[cpu_port_index]; val |= QCA8K_PORT_PAD_RGMII_TX_DELAY(delay) | QCA8K_PORT_PAD_RGMII_TX_DELAY_EN; } - if (priv->rgmii_rx_delay[cpu_port_index]) { - delay = priv->rgmii_rx_delay[cpu_port_index]; + if (priv->ports_config.rgmii_rx_delay[cpu_port_index]) { + delay = priv->ports_config.rgmii_rx_delay[cpu_port_index]; val |= QCA8K_PORT_PAD_RGMII_RX_DELAY(delay) | QCA8K_PORT_PAD_RGMII_RX_DELAY_EN; @@ -1397,7 +1397,7 @@ qca8k_phylink_mac_config(struct dsa_switch *ds, int port, unsigned int mode, val |= QCA8K_SGMII_EN_SD; - if (priv->sgmii_enable_pll) + if (priv->ports_config.sgmii_enable_pll) val |= QCA8K_SGMII_EN_PLL | QCA8K_SGMII_EN_RX | QCA8K_SGMII_EN_TX; @@ -1425,10 +1425,10 @@ qca8k_phylink_mac_config(struct dsa_switch *ds, int port, unsigned int mode, val = 0; /* SGMII Clock phase configuration */ - if (priv->sgmii_rx_clk_falling_edge) + if (priv->ports_config.sgmii_rx_clk_falling_edge) val |= QCA8K_PORT0_PAD_SGMII_RXCLK_FALLING_EDGE; - if (priv->sgmii_tx_clk_falling_edge) + if (priv->ports_config.sgmii_tx_clk_falling_edge) val |= QCA8K_PORT0_PAD_SGMII_TXCLK_FALLING_EDGE; if (val) diff --git a/drivers/net/dsa/qca8k.h b/drivers/net/dsa/qca8k.h index c5ca6277b45b..e10571a398c9 100644 --- a/drivers/net/dsa/qca8k.h +++ b/drivers/net/dsa/qca8k.h @@ -270,15 +270,19 @@ enum { QCA8K_CPU_PORT6, }; -struct qca8k_priv { - u8 switch_id; - u8 switch_revision; +struct qca8k_ports_config { bool sgmii_rx_clk_falling_edge; bool sgmii_tx_clk_falling_edge; bool sgmii_enable_pll; u8 rgmii_rx_delay[QCA8K_NUM_CPU_PORTS]; /* 0: CPU port0, 1: CPU port6 */ u8 rgmii_tx_delay[QCA8K_NUM_CPU_PORTS]; /* 0: CPU port0, 1: CPU port6 */ +}; + +struct qca8k_priv { + u8 switch_id; + u8 switch_revision; bool legacy_phy_port_mapping; + struct qca8k_ports_config ports_config; struct regmap *regmap; struct mii_bus *bus; struct ar8xxx_port_status port_sts[QCA8K_NUM_PORTS]; From e52073a8e3086046a098b8a7cbeb282ff0cdb424 Mon Sep 17 00:00:00 2001 From: Ansuel Smith Date: Thu, 14 Oct 2021 00:39:20 +0200 Subject: [PATCH 216/440] dt-bindings: net: ipq8064-mdio: fix warning with new qca8k switch Fix warning now that we have qca8k switch Documentation using yaml. Signed-off-by: Ansuel Smith Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/qcom,ipq8064-mdio.yaml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/net/qcom,ipq8064-mdio.yaml b/Documentation/devicetree/bindings/net/qcom,ipq8064-mdio.yaml index 948677ade6d1..d7748dd33199 100644 --- a/Documentation/devicetree/bindings/net/qcom,ipq8064-mdio.yaml +++ b/Documentation/devicetree/bindings/net/qcom,ipq8064-mdio.yaml @@ -51,6 +51,9 @@ examples: switch@10 { compatible = "qca,qca8337"; reg = <0x10>; - /* ... */ + + ports { + /* ... */ + }; }; }; From d291fbb8245d5ba04979fed85575860a5cea7196 Mon Sep 17 00:00:00 2001 From: Matthew Hagan Date: Thu, 14 Oct 2021 00:39:21 +0200 Subject: [PATCH 217/440] dt-bindings: net: dsa: qca8k: convert to YAML schema Convert the qca8k bindings to YAML format. Signed-off-by: Matthew Hagan Co-developed-by: Ansuel Smith Signed-off-by: Ansuel Smith Signed-off-by: David S. Miller --- .../devicetree/bindings/net/dsa/qca8k.txt | 245 ------------ .../devicetree/bindings/net/dsa/qca8k.yaml | 362 ++++++++++++++++++ 2 files changed, 362 insertions(+), 245 deletions(-) delete mode 100644 Documentation/devicetree/bindings/net/dsa/qca8k.txt create mode 100644 Documentation/devicetree/bindings/net/dsa/qca8k.yaml diff --git a/Documentation/devicetree/bindings/net/dsa/qca8k.txt b/Documentation/devicetree/bindings/net/dsa/qca8k.txt deleted file mode 100644 index f057117764af..000000000000 --- a/Documentation/devicetree/bindings/net/dsa/qca8k.txt +++ /dev/null @@ -1,245 +0,0 @@ -* Qualcomm Atheros QCA8xxx switch family - -Required properties: - -- compatible: should be one of: - "qca,qca8328": referenced as AR8328(N)-AK1(A/B) QFN 176 pin package - "qca,qca8327": referenced as AR8327(N)-AL1A DR-QFN 148 pin package - "qca,qca8334": referenced as QCA8334-AL3C QFN 88 pin package - "qca,qca8337": referenced as QCA8337N-AL3(B/C) DR-QFN 148 pin package - -- #size-cells: must be 0 -- #address-cells: must be 1 - -Optional properties: - -- reset-gpios: GPIO to be used to reset the whole device -- qca,ignore-power-on-sel: Ignore power on pin strapping to configure led open - drain or eeprom presence. This is needed for broken - devices that have wrong configuration or when the oem - decided to not use pin strapping and fallback to sw - regs. -- qca,led-open-drain: Set leds to open-drain mode. This requires the - qca,ignore-power-on-sel to be set or the driver will fail - to probe. This is needed if the oem doesn't use pin - strapping to set this mode and prefers to set it using sw - regs. The pin strapping related to led open drain mode is - the pin B68 for QCA832x and B49 for QCA833x - -Subnodes: - -The integrated switch subnode should be specified according to the binding -described in dsa/dsa.txt. If the QCA8K switch is connect to a SoC's external -mdio-bus each subnode describing a port needs to have a valid phandle -referencing the internal PHY it is connected to. This is because there's no -N:N mapping of port and PHY id. -To declare the internal mdio-bus configuration, declare a mdio node in the -switch node and declare the phandle for the port referencing the internal -PHY is connected to. In this config a internal mdio-bus is registered and -the mdio MASTER is used as communication. - -Don't use mixed external and internal mdio-bus configurations, as this is -not supported by the hardware. - -This switch support 2 CPU port. Normally and advised configuration is with -CPU port set to port 0. It is also possible to set the CPU port to port 6 -if the device requires it. The driver will configure the switch to the defined -port. With both CPU port declared the first CPU port is selected as primary -and the secondary CPU ignored. - -A CPU port node has the following optional node: - -- fixed-link : Fixed-link subnode describing a link to a non-MDIO - managed entity. See - Documentation/devicetree/bindings/net/fixed-link.txt - for details. -- qca,sgmii-rxclk-falling-edge: Set the receive clock phase to falling edge. - Mostly used in qca8327 with CPU port 0 set to - sgmii. -- qca,sgmii-txclk-falling-edge: Set the transmit clock phase to falling edge. -- qca,sgmii-enable-pll : For SGMII CPU port, explicitly enable PLL, TX and RX - chain along with Signal Detection. - This should NOT be enabled for qca8327. If enabled with - qca8327 the sgmii port won't correctly init and an err - is printed. - This can be required for qca8337 switch with revision 2. - A warning is displayed when used with revision greater - 2. - With CPU port set to sgmii and qca8337 it is advised - to set this unless a communication problem is observed. - -For QCA8K the 'fixed-link' sub-node supports only the following properties: - -- 'speed' (integer, mandatory), to indicate the link speed. Accepted - values are 10, 100 and 1000 -- 'full-duplex' (boolean, optional), to indicate that full duplex is - used. When absent, half duplex is assumed. - -Examples: - -for the external mdio-bus configuration: - - &mdio0 { - phy_port1: phy@0 { - reg = <0>; - }; - - phy_port2: phy@1 { - reg = <1>; - }; - - phy_port3: phy@2 { - reg = <2>; - }; - - phy_port4: phy@3 { - reg = <3>; - }; - - phy_port5: phy@4 { - reg = <4>; - }; - - switch@10 { - compatible = "qca,qca8337"; - #address-cells = <1>; - #size-cells = <0>; - - reset-gpios = <&gpio 42 GPIO_ACTIVE_LOW>; - reg = <0x10>; - - ports { - #address-cells = <1>; - #size-cells = <0>; - port@0 { - reg = <0>; - label = "cpu"; - ethernet = <&gmac1>; - phy-mode = "rgmii"; - fixed-link { - speed = 1000; - full-duplex; - }; - }; - - port@1 { - reg = <1>; - label = "lan1"; - phy-handle = <&phy_port1>; - }; - - port@2 { - reg = <2>; - label = "lan2"; - phy-handle = <&phy_port2>; - }; - - port@3 { - reg = <3>; - label = "lan3"; - phy-handle = <&phy_port3>; - }; - - port@4 { - reg = <4>; - label = "lan4"; - phy-handle = <&phy_port4>; - }; - - port@5 { - reg = <5>; - label = "wan"; - phy-handle = <&phy_port5>; - }; - }; - }; - }; - -for the internal master mdio-bus configuration: - - &mdio0 { - switch@10 { - compatible = "qca,qca8337"; - #address-cells = <1>; - #size-cells = <0>; - - reset-gpios = <&gpio 42 GPIO_ACTIVE_LOW>; - reg = <0x10>; - - ports { - #address-cells = <1>; - #size-cells = <0>; - - port@0 { - reg = <0>; - label = "cpu"; - ethernet = <&gmac1>; - phy-mode = "rgmii"; - fixed-link { - speed = 1000; - full-duplex; - }; - }; - - port@1 { - reg = <1>; - label = "lan1"; - phy-mode = "internal"; - phy-handle = <&phy_port1>; - }; - - port@2 { - reg = <2>; - label = "lan2"; - phy-mode = "internal"; - phy-handle = <&phy_port2>; - }; - - port@3 { - reg = <3>; - label = "lan3"; - phy-mode = "internal"; - phy-handle = <&phy_port3>; - }; - - port@4 { - reg = <4>; - label = "lan4"; - phy-mode = "internal"; - phy-handle = <&phy_port4>; - }; - - port@5 { - reg = <5>; - label = "wan"; - phy-mode = "internal"; - phy-handle = <&phy_port5>; - }; - }; - - mdio { - #address-cells = <1>; - #size-cells = <0>; - - phy_port1: phy@0 { - reg = <0>; - }; - - phy_port2: phy@1 { - reg = <1>; - }; - - phy_port3: phy@2 { - reg = <2>; - }; - - phy_port4: phy@3 { - reg = <3>; - }; - - phy_port5: phy@4 { - reg = <4>; - }; - }; - }; - }; diff --git a/Documentation/devicetree/bindings/net/dsa/qca8k.yaml b/Documentation/devicetree/bindings/net/dsa/qca8k.yaml new file mode 100644 index 000000000000..48de0ace265d --- /dev/null +++ b/Documentation/devicetree/bindings/net/dsa/qca8k.yaml @@ -0,0 +1,362 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/net/dsa/qca8k.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm Atheros QCA83xx switch family + +maintainers: + - John Crispin + +description: + If the QCA8K switch is connect to an SoC's external mdio-bus, each subnode + describing a port needs to have a valid phandle referencing the internal PHY + it is connected to. This is because there is no N:N mapping of port and PHY + ID. To declare the internal mdio-bus configuration, declare an MDIO node in + the switch node and declare the phandle for the port, referencing the internal + PHY it is connected to. In this config, an internal mdio-bus is registered and + the MDIO master is used for communication. Mixed external and internal + mdio-bus configurations are not supported by the hardware. + +properties: + compatible: + oneOf: + - enum: + - qca,qca8327 + - qca,qca8328 + - qca,qca8334 + - qca,qca8337 + description: | + qca,qca8328: referenced as AR8328(N)-AK1(A/B) QFN 176 pin package + qca,qca8327: referenced as AR8327(N)-AL1A DR-QFN 148 pin package + qca,qca8334: referenced as QCA8334-AL3C QFN 88 pin package + qca,qca8337: referenced as QCA8337N-AL3(B/C) DR-QFN 148 pin package + + reg: + maxItems: 1 + + reset-gpios: + description: + GPIO to be used to reset the whole device + maxItems: 1 + + qca,ignore-power-on-sel: + $ref: /schemas/types.yaml#/definitions/flag + description: + Ignore power-on pin strapping to configure LED open-drain or EEPROM + presence. This is needed for devices with incorrect configuration or when + the OEM has decided not to use pin strapping and falls back to SW regs. + + qca,led-open-drain: + $ref: /schemas/types.yaml#/definitions/flag + description: + Set LEDs to open-drain mode. This requires the qca,ignore-power-on-sel to + be set, otherwise the driver will fail at probe. This is required if the + OEM does not use pin strapping to set this mode and prefers to set it + using SW regs. The pin strappings related to LED open-drain mode are + B68 on the QCA832x and B49 on the QCA833x. + + mdio: + type: object + description: Qca8k switch have an internal mdio to access switch port. + If this is not present, the legacy mapping is used and the + internal mdio access is used. + With the legacy mapping the reg corresponding to the internal + mdio is the switch reg with an offset of -1. + + properties: + '#address-cells': + const: 1 + '#size-cells': + const: 0 + + patternProperties: + "^(ethernet-)?phy@[0-4]$": + type: object + + allOf: + - $ref: "http://devicetree.org/schemas/net/mdio.yaml#" + + properties: + reg: + maxItems: 1 + + required: + - reg + +patternProperties: + "^(ethernet-)?ports$": + type: object + properties: + '#address-cells': + const: 1 + '#size-cells': + const: 0 + + patternProperties: + "^(ethernet-)?port@[0-6]$": + type: object + description: Ethernet switch ports + + properties: + reg: + description: Port number + + label: + description: + Describes the label associated with this port, which will become + the netdev name + $ref: /schemas/types.yaml#/definitions/string + + link: + description: + Should be a list of phandles to other switch's DSA port. This + port is used as the outgoing port towards the phandle ports. The + full routing information must be given, not just the one hop + routes to neighbouring switches + $ref: /schemas/types.yaml#/definitions/phandle-array + + ethernet: + description: + Should be a phandle to a valid Ethernet device node. This host + device is what the switch port is connected to + $ref: /schemas/types.yaml#/definitions/phandle + + phy-handle: true + + phy-mode: true + + fixed-link: true + + mac-address: true + + sfp: true + + qca,sgmii-rxclk-falling-edge: + $ref: /schemas/types.yaml#/definitions/flag + description: + Set the receive clock phase to falling edge. Mostly commonly used on + the QCA8327 with CPU port 0 set to SGMII. + + qca,sgmii-txclk-falling-edge: + $ref: /schemas/types.yaml#/definitions/flag + description: + Set the transmit clock phase to falling edge. + + qca,sgmii-enable-pll: + $ref: /schemas/types.yaml#/definitions/flag + description: + For SGMII CPU port, explicitly enable PLL, TX and RX chain along with + Signal Detection. On the QCA8327 this should not be enabled, otherwise + the SGMII port will not initialize. When used on the QCA8337, revision 3 + or greater, a warning will be displayed. When the CPU port is set to + SGMII on the QCA8337, it is advised to set this unless a communication + issue is observed. + + required: + - reg + + additionalProperties: false + +oneOf: + - required: + - ports + - required: + - ethernet-ports + +required: + - compatible + - reg + +additionalProperties: true + +examples: + - | + #include + + mdio { + #address-cells = <1>; + #size-cells = <0>; + + external_phy_port1: ethernet-phy@0 { + reg = <0>; + }; + + external_phy_port2: ethernet-phy@1 { + reg = <1>; + }; + + external_phy_port3: ethernet-phy@2 { + reg = <2>; + }; + + external_phy_port4: ethernet-phy@3 { + reg = <3>; + }; + + external_phy_port5: ethernet-phy@4 { + reg = <4>; + }; + + switch@10 { + compatible = "qca,qca8337"; + #address-cells = <1>; + #size-cells = <0>; + reset-gpios = <&gpio 42 GPIO_ACTIVE_LOW>; + reg = <0x10>; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + label = "cpu"; + ethernet = <&gmac1>; + phy-mode = "rgmii"; + + fixed-link { + speed = <1000>; + full-duplex; + }; + }; + + port@1 { + reg = <1>; + label = "lan1"; + phy-handle = <&external_phy_port1>; + }; + + port@2 { + reg = <2>; + label = "lan2"; + phy-handle = <&external_phy_port2>; + }; + + port@3 { + reg = <3>; + label = "lan3"; + phy-handle = <&external_phy_port3>; + }; + + port@4 { + reg = <4>; + label = "lan4"; + phy-handle = <&external_phy_port4>; + }; + + port@5 { + reg = <5>; + label = "wan"; + phy-handle = <&external_phy_port5>; + }; + }; + }; + }; + - | + #include + + mdio { + #address-cells = <1>; + #size-cells = <0>; + + switch@10 { + compatible = "qca,qca8337"; + #address-cells = <1>; + #size-cells = <0>; + reset-gpios = <&gpio 42 GPIO_ACTIVE_LOW>; + reg = <0x10>; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + label = "cpu"; + ethernet = <&gmac1>; + phy-mode = "rgmii"; + + fixed-link { + speed = <1000>; + full-duplex; + }; + }; + + port@1 { + reg = <1>; + label = "lan1"; + phy-mode = "internal"; + phy-handle = <&internal_phy_port1>; + }; + + port@2 { + reg = <2>; + label = "lan2"; + phy-mode = "internal"; + phy-handle = <&internal_phy_port2>; + }; + + port@3 { + reg = <3>; + label = "lan3"; + phy-mode = "internal"; + phy-handle = <&internal_phy_port3>; + }; + + port@4 { + reg = <4>; + label = "lan4"; + phy-mode = "internal"; + phy-handle = <&internal_phy_port4>; + }; + + port@5 { + reg = <5>; + label = "wan"; + phy-mode = "internal"; + phy-handle = <&internal_phy_port5>; + }; + + port@6 { + reg = <0>; + label = "cpu"; + ethernet = <&gmac1>; + phy-mode = "sgmii"; + + qca,sgmii-rxclk-falling-edge; + + fixed-link { + speed = <1000>; + full-duplex; + }; + }; + }; + + mdio { + #address-cells = <1>; + #size-cells = <0>; + + internal_phy_port1: ethernet-phy@0 { + reg = <0>; + }; + + internal_phy_port2: ethernet-phy@1 { + reg = <1>; + }; + + internal_phy_port3: ethernet-phy@2 { + reg = <2>; + }; + + internal_phy_port4: ethernet-phy@3 { + reg = <3>; + }; + + internal_phy_port5: ethernet-phy@4 { + reg = <4>; + }; + }; + }; + }; From 0b93aed2842d950e8d2625e975e5a57febeff33d Mon Sep 17 00:00:00 2001 From: Matt Johnston Date: Thu, 14 Oct 2021 16:10:50 +0800 Subject: [PATCH 218/440] mctp: Avoid leak of mctp_sk_key mctp_key_alloc() returns a key already referenced. The mctp_route_input() path receives a packet for a bind socket and allocates a key. It passes the key to mctp_key_add() which takes a refcount and adds the key to lists. mctp_route_input() should then release its own refcount when setting the key pointer to NULL. In the mctp_alloc_local_tag() path (for mctp_local_output()) we similarly need to unref the key before returning (mctp_reserve_tag() takes a refcount and adds the key to lists). Fixes: 73c618456dc5 ("mctp: locking, lifetime and validity changes for sk_keys") Signed-off-by: Matt Johnston Reviewed-by: Jeremy Kerr Signed-off-by: David S. Miller --- net/mctp/route.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/mctp/route.c b/net/mctp/route.c index 04781459b2be..82fb5ae524f6 100644 --- a/net/mctp/route.c +++ b/net/mctp/route.c @@ -372,6 +372,7 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb) trace_mctp_key_acquire(key); /* we don't need to release key->lock on exit */ + mctp_key_unref(key); key = NULL; } else { @@ -584,6 +585,9 @@ static int mctp_alloc_local_tag(struct mctp_sock *msk, trace_mctp_key_acquire(key); *tagp = key->tag; + /* done with the key in this scope */ + mctp_key_unref(key); + key = NULL; rc = 0; } From 19757cebf0c5016a1f36f7fe9810a9f0b33c0832 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 14 Oct 2021 06:41:26 -0700 Subject: [PATCH 219/440] tcp: switch orphan_count to bare per-cpu counters Use of percpu_counter structure to track count of orphaned sockets is causing problems on modern hosts with 256 cpus or more. Stefan Bach reported a serious spinlock contention in real workloads, that I was able to reproduce with a netfilter rule dropping incoming FIN packets. 53.56% server [kernel.kallsyms] [k] queued_spin_lock_slowpath | ---queued_spin_lock_slowpath | --53.51%--_raw_spin_lock_irqsave | --53.51%--__percpu_counter_sum tcp_check_oom | |--39.03%--__tcp_close | tcp_close | inet_release | inet6_release | sock_close | __fput | ____fput | task_work_run | exit_to_usermode_loop | do_syscall_64 | entry_SYSCALL_64_after_hwframe | __GI___libc_close | --14.48%--tcp_out_of_resources tcp_write_timeout tcp_retransmit_timer tcp_write_timer_handler tcp_write_timer call_timer_fn expire_timers __run_timers run_timer_softirq __softirqentry_text_start As explained in commit cf86a086a180 ("net/dst: use a smaller percpu_counter batch for dst entries accounting"), default batch size is too big for the default value of tcp_max_orphans (262144). But even if we reduce batch sizes, there would still be cases where the estimated count of orphans is beyond the limit, and where tcp_too_many_orphans() has to call the expensive percpu_counter_sum_positive(). One solution is to use plain per-cpu counters, and have a timer to periodically refresh this cache. Updating this cache every 100ms seems about right, tcp pressure state is not radically changing over shorter periods. percpu_counter was nice 15 years ago while hosts had less than 16 cpus, not anymore by current standards. v2: Fix the build issue for CONFIG_CRYPTO_DEV_CHELSIO_TLS=m, reported by kernel test robot Remove unused socket argument from tcp_too_many_orphans() Fixes: dd24c00191d5 ("net: Use a percpu_counter for orphan_count") Signed-off-by: Eric Dumazet Reported-by: Stefan Bach Cc: Neal Cardwell Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- .../chelsio/inline_crypto/chtls/chtls_cm.c | 2 +- .../chelsio/inline_crypto/chtls/chtls_cm.h | 2 +- include/net/inet_connection_sock.h | 2 +- include/net/sock.h | 2 +- include/net/tcp.h | 17 ++------- net/dccp/dccp.h | 2 +- net/dccp/proto.c | 14 ++----- net/ipv4/inet_connection_sock.c | 4 +- net/ipv4/inet_hashtables.c | 2 +- net/ipv4/proc.c | 2 +- net/ipv4/tcp.c | 38 ++++++++++++++++--- 11 files changed, 49 insertions(+), 38 deletions(-) diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c index bcad69c48074..4af5561cbfc5 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c @@ -870,7 +870,7 @@ static void do_abort_syn_rcv(struct sock *child, struct sock *parent) * created only after 3 way handshake is done. */ sock_orphan(child); - percpu_counter_inc((child)->sk_prot->orphan_count); + INC_ORPHAN_COUNT(child); chtls_release_resources(child); chtls_conn_done(child); } else { diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.h b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.h index b1161bdeda4d..f61ca657601c 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.h +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.h @@ -95,7 +95,7 @@ struct deferred_skb_cb { #define WSCALE_OK(tp) ((tp)->rx_opt.wscale_ok) #define TSTAMP_OK(tp) ((tp)->rx_opt.tstamp_ok) #define SACK_OK(tp) ((tp)->rx_opt.sack_ok) -#define INC_ORPHAN_COUNT(sk) percpu_counter_inc((sk)->sk_prot->orphan_count) +#define INC_ORPHAN_COUNT(sk) this_cpu_inc(*(sk)->sk_prot->orphan_count) /* TLS SKB */ #define skb_ulp_tls_inline(skb) (ULP_SKB_CB(skb)->ulp.tls.ofld) diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index b06c2d02ec84..fa6a87246a7b 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -289,7 +289,7 @@ static inline void inet_csk_prepare_for_destroy_sock(struct sock *sk) { /* The below has to be done to allow calling inet_csk_destroy_sock */ sock_set_flag(sk, SOCK_DEAD); - percpu_counter_inc(sk->sk_prot->orphan_count); + this_cpu_inc(*sk->sk_prot->orphan_count); } void inet_csk_destroy_sock(struct sock *sk); diff --git a/include/net/sock.h b/include/net/sock.h index d08ab55fa4a0..596ba85611bc 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1235,7 +1235,7 @@ struct proto { unsigned int useroffset; /* Usercopy region offset */ unsigned int usersize; /* Usercopy region size */ - struct percpu_counter *orphan_count; + unsigned int __percpu *orphan_count; struct request_sock_ops *rsk_prot; struct timewait_sock_ops *twsk_prot; diff --git a/include/net/tcp.h b/include/net/tcp.h index 4c2898ac6569..af77e6453b1b 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -48,7 +48,9 @@ extern struct inet_hashinfo tcp_hashinfo; -extern struct percpu_counter tcp_orphan_count; +DECLARE_PER_CPU(unsigned int, tcp_orphan_count); +int tcp_orphan_count_sum(void); + void tcp_time_wait(struct sock *sk, int state, int timeo); #define MAX_TCP_HEADER L1_CACHE_ALIGN(128 + MAX_HEADER) @@ -290,19 +292,6 @@ static inline bool tcp_out_of_memory(struct sock *sk) void sk_forced_mem_schedule(struct sock *sk, int size); -static inline bool tcp_too_many_orphans(struct sock *sk, int shift) -{ - struct percpu_counter *ocp = sk->sk_prot->orphan_count; - int orphans = percpu_counter_read_positive(ocp); - - if (orphans << shift > sysctl_tcp_max_orphans) { - orphans = percpu_counter_sum_positive(ocp); - if (orphans << shift > sysctl_tcp_max_orphans) - return true; - } - return false; -} - bool tcp_check_oom(struct sock *sk, int shift); diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index c5c1d2b8045e..5183e627468d 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -48,7 +48,7 @@ extern bool dccp_debug; extern struct inet_hashinfo dccp_hashinfo; -extern struct percpu_counter dccp_orphan_count; +DECLARE_PER_CPU(unsigned int, dccp_orphan_count); void dccp_time_wait(struct sock *sk, int state, int timeo); diff --git a/net/dccp/proto.c b/net/dccp/proto.c index abb5c596a817..fc44dadc778b 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -42,8 +42,8 @@ DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly; EXPORT_SYMBOL_GPL(dccp_statistics); -struct percpu_counter dccp_orphan_count; -EXPORT_SYMBOL_GPL(dccp_orphan_count); +DEFINE_PER_CPU(unsigned int, dccp_orphan_count); +EXPORT_PER_CPU_SYMBOL_GPL(dccp_orphan_count); struct inet_hashinfo dccp_hashinfo; EXPORT_SYMBOL_GPL(dccp_hashinfo); @@ -1055,7 +1055,7 @@ adjudge_to_death: bh_lock_sock(sk); WARN_ON(sock_owned_by_user(sk)); - percpu_counter_inc(sk->sk_prot->orphan_count); + this_cpu_inc(dccp_orphan_count); /* Have we already been destroyed by a softirq or backlog? */ if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED) @@ -1115,13 +1115,10 @@ static int __init dccp_init(void) BUILD_BUG_ON(sizeof(struct dccp_skb_cb) > sizeof_field(struct sk_buff, cb)); - rc = percpu_counter_init(&dccp_orphan_count, 0, GFP_KERNEL); - if (rc) - goto out_fail; inet_hashinfo_init(&dccp_hashinfo); rc = inet_hashinfo2_init_mod(&dccp_hashinfo); if (rc) - goto out_free_percpu; + goto out_fail; rc = -ENOBUFS; dccp_hashinfo.bind_bucket_cachep = kmem_cache_create("dccp_bind_bucket", @@ -1226,8 +1223,6 @@ out_free_bind_bucket_cachep: kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep); out_free_hashinfo2: inet_hashinfo2_free_mod(&dccp_hashinfo); -out_free_percpu: - percpu_counter_destroy(&dccp_orphan_count); out_fail: dccp_hashinfo.bhash = NULL; dccp_hashinfo.ehash = NULL; @@ -1250,7 +1245,6 @@ static void __exit dccp_fini(void) dccp_ackvec_exit(); dccp_sysctl_exit(); inet_hashinfo2_free_mod(&dccp_hashinfo); - percpu_counter_destroy(&dccp_orphan_count); } module_init(dccp_init); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index f25d02ad4a8a..f7fea3a7c5e6 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -1015,7 +1015,7 @@ void inet_csk_destroy_sock(struct sock *sk) sk_refcnt_debug_release(sk); - percpu_counter_dec(sk->sk_prot->orphan_count); + this_cpu_dec(*sk->sk_prot->orphan_count); sock_put(sk); } @@ -1074,7 +1074,7 @@ static void inet_child_forget(struct sock *sk, struct request_sock *req, sock_orphan(child); - percpu_counter_inc(sk->sk_prot->orphan_count); + this_cpu_inc(*sk->sk_prot->orphan_count); if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(req)->tfo_listener) { BUG_ON(rcu_access_pointer(tcp_sk(child)->fastopen_rsk) != req); diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index bfb522e51346..75737267746f 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -598,7 +598,7 @@ bool inet_ehash_nolisten(struct sock *sk, struct sock *osk, bool *found_dup_sk) if (ok) { sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); } else { - percpu_counter_inc(sk->sk_prot->orphan_count); + this_cpu_inc(*sk->sk_prot->orphan_count); inet_sk_set_state(sk, TCP_CLOSE); sock_set_flag(sk, SOCK_DEAD); inet_csk_destroy_sock(sk); diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index b0d3a09dc84e..f30273afb539 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -53,7 +53,7 @@ static int sockstat_seq_show(struct seq_file *seq, void *v) struct net *net = seq->private; int orphans, sockets; - orphans = percpu_counter_sum_positive(&tcp_orphan_count); + orphans = tcp_orphan_count_sum(); sockets = proto_sockets_allocated_sum_positive(&tcp_prot); socket_seq_show(seq); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 414c179c28e0..c2d9830136d2 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -287,8 +287,8 @@ enum { TCP_CMSG_TS = 2 }; -struct percpu_counter tcp_orphan_count; -EXPORT_SYMBOL_GPL(tcp_orphan_count); +DEFINE_PER_CPU(unsigned int, tcp_orphan_count); +EXPORT_PER_CPU_SYMBOL_GPL(tcp_orphan_count); long sysctl_tcp_mem[3] __read_mostly; EXPORT_SYMBOL(sysctl_tcp_mem); @@ -2673,11 +2673,36 @@ void tcp_shutdown(struct sock *sk, int how) } EXPORT_SYMBOL(tcp_shutdown); +int tcp_orphan_count_sum(void) +{ + int i, total = 0; + + for_each_possible_cpu(i) + total += per_cpu(tcp_orphan_count, i); + + return max(total, 0); +} + +static int tcp_orphan_cache; +static struct timer_list tcp_orphan_timer; +#define TCP_ORPHAN_TIMER_PERIOD msecs_to_jiffies(100) + +static void tcp_orphan_update(struct timer_list *unused) +{ + WRITE_ONCE(tcp_orphan_cache, tcp_orphan_count_sum()); + mod_timer(&tcp_orphan_timer, jiffies + TCP_ORPHAN_TIMER_PERIOD); +} + +static bool tcp_too_many_orphans(int shift) +{ + return READ_ONCE(tcp_orphan_cache) << shift > sysctl_tcp_max_orphans; +} + bool tcp_check_oom(struct sock *sk, int shift) { bool too_many_orphans, out_of_socket_memory; - too_many_orphans = tcp_too_many_orphans(sk, shift); + too_many_orphans = tcp_too_many_orphans(shift); out_of_socket_memory = tcp_out_of_memory(sk); if (too_many_orphans) @@ -2786,7 +2811,7 @@ adjudge_to_death: /* remove backlog if any, without releasing ownership. */ __release_sock(sk); - percpu_counter_inc(sk->sk_prot->orphan_count); + this_cpu_inc(tcp_orphan_count); /* Have we already been destroyed by a softirq or backlog? */ if (state != TCP_CLOSE && sk->sk_state == TCP_CLOSE) @@ -4479,7 +4504,10 @@ void __init tcp_init(void) sizeof_field(struct sk_buff, cb)); percpu_counter_init(&tcp_sockets_allocated, 0, GFP_KERNEL); - percpu_counter_init(&tcp_orphan_count, 0, GFP_KERNEL); + + timer_setup(&tcp_orphan_timer, tcp_orphan_update, TIMER_DEFERRABLE); + mod_timer(&tcp_orphan_timer, jiffies + TCP_ORPHAN_TIMER_PERIOD); + inet_hashinfo_init(&tcp_hashinfo); inet_hashinfo2_init(&tcp_hashinfo, "tcp_listen_portaddr_hash", thash_entries, 21, /* one slot per 2 MB*/ From 70e939ddea7f014b94fe001db65c3efc986e4add Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 14 Oct 2021 10:59:17 -0700 Subject: [PATCH 220/440] net: add skb_get_dsfield() helper skb_get_dsfield(skb) gets dsfield from skb, or -1 if an error was found. This is basically a wrapper around ipv4_get_dsfield() and ipv6_get_dsfield(). Used by following patch for fq_codel. Signed-off-by: Eric Dumazet Cc: Neal Cardwell Cc: Ingemar Johansson S Cc: Tom Henderson Signed-off-by: David S. Miller --- include/net/inet_ecn.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/include/net/inet_ecn.h b/include/net/inet_ecn.h index ba77f47ef61e..ea32393464a2 100644 --- a/include/net/inet_ecn.h +++ b/include/net/inet_ecn.h @@ -188,6 +188,23 @@ static inline int INET_ECN_set_ce(struct sk_buff *skb) return 0; } +static inline int skb_get_dsfield(struct sk_buff *skb) +{ + switch (skb_protocol(skb, true)) { + case cpu_to_be16(ETH_P_IP): + if (!pskb_network_may_pull(skb, sizeof(struct iphdr))) + break; + return ipv4_get_dsfield(ip_hdr(skb)); + + case cpu_to_be16(ETH_P_IPV6): + if (!pskb_network_may_pull(skb, sizeof(struct ipv6hdr))) + break; + return ipv6_get_dsfield(ipv6_hdr(skb)); + } + + return -1; +} + static inline int INET_ECN_set_ect1(struct sk_buff *skb) { switch (skb_protocol(skb, true)) { From e72aeb9ee0e34c57dc90793d0bf82cab9624d64e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 14 Oct 2021 10:59:18 -0700 Subject: [PATCH 221/440] fq_codel: implement L4S style ce_threshold_ect1 marking Add TCA_FQ_CODEL_CE_THRESHOLD_ECT1 boolean option to select Low Latency, Low Loss, Scalable Throughput (L4S) style marking, along with ce_threshold. If enabled, only packets with ECT(1) can be transformed to CE if their sojourn time is above the ce_threshold. Note that this new option does not change rules for codel law. In particular, if TCA_FQ_CODEL_ECN is left enabled (this is the default when fq_codel qdisc is created), ECT(0) packets can still get CE if codel law (as governed by limit/target) decides so. Section 4.3.b of current draft [1] states: b. A scheduler with per-flow queues such as FQ-CoDel or FQ-PIE can be used for L4S. For instance within each queue of an FQ-CoDel system, as well as a CoDel AQM, there is typically also ECN marking at an immediate (unsmoothed) shallow threshold to support use in data centres (see Sec.5.2.7 of [RFC8290]). This can be modified so that the shallow threshold is solely applied to ECT(1) packets. Then if there is a flow of non-ECN or ECT(0) packets in the per-flow-queue, the Classic AQM (e.g. CoDel) is applied; while if there is a flow of ECT(1) packets in the queue, the shallower (typically sub-millisecond) threshold is applied. Tested: tc qd replace dev eth1 root fq_codel ce_threshold_ect1 50usec netperf ... -t TCP_STREAM -- K dctcp tc -s -d qd sh dev eth1 qdisc fq_codel 8022: root refcnt 32 limit 10240p flows 1024 quantum 9212 target 5ms ce_threshold_ect1 49us interval 100ms memory_limit 32Mb ecn drop_batch 64 Sent 14388596616 bytes 9543449 pkt (dropped 0, overlimits 0 requeues 152013) backlog 0b 0p requeues 152013 maxpacket 68130 drop_overlimit 0 new_flow_count 95678 ecn_mark 0 ce_mark 7639 new_flows_len 0 old_flows_len 0 [1] L4S current draft: https://datatracker.ietf.org/doc/html/draft-ietf-tsvwg-l4s-arch Signed-off-by: Eric Dumazet Cc: Neal Cardwell Cc: Ingemar Johansson S Cc: Tom Henderson Cc: Bob Briscoe Signed-off-by: David S. Miller --- include/net/codel.h | 2 ++ include/net/codel_impl.h | 18 +++++++++++++++--- include/uapi/linux/pkt_sched.h | 1 + net/mac80211/sta_info.c | 1 + net/sched/sch_fq_codel.c | 15 +++++++++++---- 5 files changed, 30 insertions(+), 7 deletions(-) diff --git a/include/net/codel.h b/include/net/codel.h index a6e428f80135..5e8b181b76b8 100644 --- a/include/net/codel.h +++ b/include/net/codel.h @@ -102,6 +102,7 @@ static inline u32 codel_time_to_us(codel_time_t val) * @interval: width of moving time window * @mtu: device mtu, or minimal queue backlog in bytes. * @ecn: is Explicit Congestion Notification enabled + * @ce_threshold_ect1: if ce_threshold only marks ECT(1) packets */ struct codel_params { codel_time_t target; @@ -109,6 +110,7 @@ struct codel_params { codel_time_t interval; u32 mtu; bool ecn; + bool ce_threshold_ect1; }; /** diff --git a/include/net/codel_impl.h b/include/net/codel_impl.h index d289b91dcd65..7af2c3eb3c43 100644 --- a/include/net/codel_impl.h +++ b/include/net/codel_impl.h @@ -54,6 +54,7 @@ static void codel_params_init(struct codel_params *params) params->interval = MS2TIME(100); params->target = MS2TIME(5); params->ce_threshold = CODEL_DISABLED_THRESHOLD; + params->ce_threshold_ect1 = false; params->ecn = false; } @@ -246,9 +247,20 @@ static struct sk_buff *codel_dequeue(void *ctx, vars->rec_inv_sqrt); } end: - if (skb && codel_time_after(vars->ldelay, params->ce_threshold) && - INET_ECN_set_ce(skb)) - stats->ce_mark++; + if (skb && codel_time_after(vars->ldelay, params->ce_threshold)) { + bool set_ce = true; + + if (params->ce_threshold_ect1) { + /* Note: if skb_get_dsfield() returns -1, following + * gives INET_ECN_MASK, which is != INET_ECN_ECT_1. + */ + u8 ecn = skb_get_dsfield(skb) & INET_ECN_MASK; + + set_ce = (ecn == INET_ECN_ECT_1); + } + if (set_ce && INET_ECN_set_ce(skb)) + stats->ce_mark++; + } return skb; } diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index ec88590b3198..6be9a84cccfa 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -840,6 +840,7 @@ enum { TCA_FQ_CODEL_CE_THRESHOLD, TCA_FQ_CODEL_DROP_BATCH_SIZE, TCA_FQ_CODEL_MEMORY_LIMIT, + TCA_FQ_CODEL_CE_THRESHOLD_ECT1, __TCA_FQ_CODEL_MAX }; diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 2b5acb37587f..a39830418434 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -513,6 +513,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, sta->cparams.target = MS2TIME(20); sta->cparams.interval = MS2TIME(100); sta->cparams.ecn = true; + sta->cparams.ce_threshold_ect1 = false; sta_dbg(sdata, "Allocated STA %pM\n", sta->sta.addr); diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index bb0cd6d3d2c2..033d65d06eb1 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -362,6 +362,7 @@ static const struct nla_policy fq_codel_policy[TCA_FQ_CODEL_MAX + 1] = { [TCA_FQ_CODEL_CE_THRESHOLD] = { .type = NLA_U32 }, [TCA_FQ_CODEL_DROP_BATCH_SIZE] = { .type = NLA_U32 }, [TCA_FQ_CODEL_MEMORY_LIMIT] = { .type = NLA_U32 }, + [TCA_FQ_CODEL_CE_THRESHOLD_ECT1] = { .type = NLA_U8 }, }; static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt, @@ -408,6 +409,9 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt, q->cparams.ce_threshold = (val * NSEC_PER_USEC) >> CODEL_SHIFT; } + if (tb[TCA_FQ_CODEL_CE_THRESHOLD_ECT1]) + q->cparams.ce_threshold_ect1 = !!nla_get_u8(tb[TCA_FQ_CODEL_CE_THRESHOLD_ECT1]); + if (tb[TCA_FQ_CODEL_INTERVAL]) { u64 interval = nla_get_u32(tb[TCA_FQ_CODEL_INTERVAL]); @@ -544,10 +548,13 @@ static int fq_codel_dump(struct Qdisc *sch, struct sk_buff *skb) q->flows_cnt)) goto nla_put_failure; - if (q->cparams.ce_threshold != CODEL_DISABLED_THRESHOLD && - nla_put_u32(skb, TCA_FQ_CODEL_CE_THRESHOLD, - codel_time_to_us(q->cparams.ce_threshold))) - goto nla_put_failure; + if (q->cparams.ce_threshold != CODEL_DISABLED_THRESHOLD) { + if (nla_put_u32(skb, TCA_FQ_CODEL_CE_THRESHOLD, + codel_time_to_us(q->cparams.ce_threshold))) + goto nla_put_failure; + if (nla_put_u8(skb, TCA_FQ_CODEL_CE_THRESHOLD_ECT1, q->cparams.ce_threshold_ect1)) + goto nla_put_failure; + } return nla_nest_end(skb, opts); From 2cf0b6fe9bd3c05b499b26ba871651d7860c10f4 Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Fri, 15 Oct 2021 12:01:23 +0300 Subject: [PATCH 222/440] soc: fsl: dpio: extract the QBMAN clock frequency from the attributes Through the dpio_get_attributes() firmware call the dpio driver has access to the QBMAN clock frequency. Extend the structure which holds the firmware's response so that we can have access to this information. This will be needed in the next patches which also add support for interrupt coalescing which needs to be configured based on the frequency. Signed-off-by: Ioana Ciornei Signed-off-by: David S. Miller --- drivers/soc/fsl/dpio/dpio-cmd.h | 3 +++ drivers/soc/fsl/dpio/dpio-driver.c | 1 + drivers/soc/fsl/dpio/dpio-service.c | 1 + drivers/soc/fsl/dpio/dpio.c | 1 + drivers/soc/fsl/dpio/dpio.h | 2 ++ drivers/soc/fsl/dpio/qbman-portal.h | 1 + include/soc/fsl/dpaa2-io.h | 2 ++ 7 files changed, 11 insertions(+) diff --git a/drivers/soc/fsl/dpio/dpio-cmd.h b/drivers/soc/fsl/dpio/dpio-cmd.h index e13fd3ac1939..2fbcb78cdaaf 100644 --- a/drivers/soc/fsl/dpio/dpio-cmd.h +++ b/drivers/soc/fsl/dpio/dpio-cmd.h @@ -46,6 +46,9 @@ struct dpio_rsp_get_attr { __le64 qbman_portal_ci_addr; /* cmd word 3 */ __le32 qbman_version; + __le32 pad1; + /* cmd word 4 */ + __le32 clk; }; struct dpio_stashing_dest { diff --git a/drivers/soc/fsl/dpio/dpio-driver.c b/drivers/soc/fsl/dpio/dpio-driver.c index 7f397b4ad878..dd948889eeab 100644 --- a/drivers/soc/fsl/dpio/dpio-driver.c +++ b/drivers/soc/fsl/dpio/dpio-driver.c @@ -162,6 +162,7 @@ static int dpaa2_dpio_probe(struct fsl_mc_device *dpio_dev) goto err_get_attr; } desc.qman_version = dpio_attrs.qbman_version; + desc.qman_clk = dpio_attrs.clk; err = dpio_enable(dpio_dev->mc_io, 0, dpio_dev->mc_handle); if (err) { diff --git a/drivers/soc/fsl/dpio/dpio-service.c b/drivers/soc/fsl/dpio/dpio-service.c index 7351f3030550..2acbb96c5e45 100644 --- a/drivers/soc/fsl/dpio/dpio-service.c +++ b/drivers/soc/fsl/dpio/dpio-service.c @@ -127,6 +127,7 @@ struct dpaa2_io *dpaa2_io_create(const struct dpaa2_io_desc *desc, obj->dpio_desc = *desc; obj->swp_desc.cena_bar = obj->dpio_desc.regs_cena; obj->swp_desc.cinh_bar = obj->dpio_desc.regs_cinh; + obj->swp_desc.qman_clk = obj->dpio_desc.qman_clk; obj->swp_desc.qman_version = obj->dpio_desc.qman_version; obj->swp = qbman_swp_init(&obj->swp_desc); diff --git a/drivers/soc/fsl/dpio/dpio.c b/drivers/soc/fsl/dpio/dpio.c index af74c597a675..8ed606ffaac5 100644 --- a/drivers/soc/fsl/dpio/dpio.c +++ b/drivers/soc/fsl/dpio/dpio.c @@ -162,6 +162,7 @@ int dpio_get_attributes(struct fsl_mc_io *mc_io, attr->qbman_portal_ci_offset = le64_to_cpu(dpio_rsp->qbman_portal_ci_addr); attr->qbman_version = le32_to_cpu(dpio_rsp->qbman_version); + attr->clk = le32_to_cpu(dpio_rsp->clk); return 0; } diff --git a/drivers/soc/fsl/dpio/dpio.h b/drivers/soc/fsl/dpio/dpio.h index da06f7258098..7fda44f0d7f4 100644 --- a/drivers/soc/fsl/dpio/dpio.h +++ b/drivers/soc/fsl/dpio/dpio.h @@ -59,6 +59,7 @@ int dpio_disable(struct fsl_mc_io *mc_io, * @num_priorities: Number of priorities for the notification channel (1-8); * relevant only if 'channel_mode = DPIO_LOCAL_CHANNEL' * @qbman_version: QBMAN version + * @clk: QBMAN clock frequency value in Hz */ struct dpio_attr { int id; @@ -68,6 +69,7 @@ struct dpio_attr { enum dpio_channel_mode channel_mode; u8 num_priorities; u32 qbman_version; + u32 clk; }; int dpio_get_attributes(struct fsl_mc_io *mc_io, diff --git a/drivers/soc/fsl/dpio/qbman-portal.h b/drivers/soc/fsl/dpio/qbman-portal.h index c7c2225b7d91..f058289416af 100644 --- a/drivers/soc/fsl/dpio/qbman-portal.h +++ b/drivers/soc/fsl/dpio/qbman-portal.h @@ -24,6 +24,7 @@ struct qbman_swp_desc { void *cena_bar; /* Cache-enabled portal base address */ void __iomem *cinh_bar; /* Cache-inhibited portal base address */ u32 qman_version; + u32 qman_clk; }; #define QBMAN_SWP_INTERRUPT_EQRI 0x01 diff --git a/include/soc/fsl/dpaa2-io.h b/include/soc/fsl/dpaa2-io.h index c9d849924f89..041ebf7d804c 100644 --- a/include/soc/fsl/dpaa2-io.h +++ b/include/soc/fsl/dpaa2-io.h @@ -44,6 +44,7 @@ struct device; * @regs_cinh: The cache inhibited regs * @dpio_id: The dpio index * @qman_version: The qman version + * @qman_clk: The qman clock frequency in Hz * * Describes the attributes and features of the DPIO object. */ @@ -55,6 +56,7 @@ struct dpaa2_io_desc { void __iomem *regs_cinh; int dpio_id; u32 qman_version; + u32 qman_clk; }; struct dpaa2_io *dpaa2_io_create(const struct dpaa2_io_desc *desc, From ed1d2143fee53755ec601eb4d48a337a93933f71 Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Fri, 15 Oct 2021 12:01:24 +0300 Subject: [PATCH 223/440] soc: fsl: dpio: add support for irq coalescing per software portal In DPAA2 based SoCs, the IRQ coalesing support per software portal has 2 configurable parameters: - the IRQ timeout period (QBMAN_CINH_SWP_ITPR): how many 256 QBMAN cycles need to pass until a dequeue interrupt is asserted. - the IRQ threshold (QBMAN_CINH_SWP_DQRR_ITR): how many dequeue responses in the DQRR ring would generate an IRQ. Add support for setting up and querying these IRQ coalescing related parameters. Signed-off-by: Ioana Ciornei Signed-off-by: David S. Miller --- drivers/soc/fsl/dpio/dpio-service.c | 37 ++++++++++++++++++ drivers/soc/fsl/dpio/qbman-portal.c | 59 +++++++++++++++++++++++++++++ drivers/soc/fsl/dpio/qbman-portal.h | 11 ++++++ include/soc/fsl/dpaa2-io.h | 4 ++ 4 files changed, 111 insertions(+) diff --git a/drivers/soc/fsl/dpio/dpio-service.c b/drivers/soc/fsl/dpio/dpio-service.c index 2acbb96c5e45..44fafed045ca 100644 --- a/drivers/soc/fsl/dpio/dpio-service.c +++ b/drivers/soc/fsl/dpio/dpio-service.c @@ -114,6 +114,7 @@ struct dpaa2_io *dpaa2_io_create(const struct dpaa2_io_desc *desc, struct device *dev) { struct dpaa2_io *obj = kmalloc(sizeof(*obj), GFP_KERNEL); + u32 qman_256_cycles_per_ns; if (!obj) return NULL; @@ -129,6 +130,13 @@ struct dpaa2_io *dpaa2_io_create(const struct dpaa2_io_desc *desc, obj->swp_desc.cinh_bar = obj->dpio_desc.regs_cinh; obj->swp_desc.qman_clk = obj->dpio_desc.qman_clk; obj->swp_desc.qman_version = obj->dpio_desc.qman_version; + + /* Compute how many 256 QBMAN cycles fit into one ns. This is because + * the interrupt timeout period register needs to be specified in QBMAN + * clock cycles in increments of 256. + */ + qman_256_cycles_per_ns = 256000 / (obj->swp_desc.qman_clk / 1000000); + obj->swp_desc.qman_256_cycles_per_ns = qman_256_cycles_per_ns; obj->swp = qbman_swp_init(&obj->swp_desc); if (!obj->swp) { @@ -780,3 +788,32 @@ int dpaa2_io_query_bp_count(struct dpaa2_io *d, u16 bpid, u32 *num) return 0; } EXPORT_SYMBOL_GPL(dpaa2_io_query_bp_count); + +/** + * dpaa2_io_set_irq_coalescing() - Set new IRQ coalescing values + * @d: the given DPIO object + * @irq_holdoff: interrupt holdoff (timeout) period in us + * + * Return 0 for success, or negative error code on error. + */ +int dpaa2_io_set_irq_coalescing(struct dpaa2_io *d, u32 irq_holdoff) +{ + struct qbman_swp *swp = d->swp; + + return qbman_swp_set_irq_coalescing(swp, swp->dqrr.dqrr_size - 1, + irq_holdoff); +} +EXPORT_SYMBOL(dpaa2_io_set_irq_coalescing); + +/** + * dpaa2_io_get_irq_coalescing() - Get the current IRQ coalescing parameters + * @d: the given DPIO object + * @irq_holdoff: interrupt holdoff (timeout) period in us + */ +void dpaa2_io_get_irq_coalescing(struct dpaa2_io *d, u32 *irq_holdoff) +{ + struct qbman_swp *swp = d->swp; + + qbman_swp_get_irq_coalescing(swp, NULL, irq_holdoff); +} +EXPORT_SYMBOL(dpaa2_io_get_irq_coalescing); diff --git a/drivers/soc/fsl/dpio/qbman-portal.c b/drivers/soc/fsl/dpio/qbman-portal.c index f13da4d7d1c5..d3c58df6240d 100644 --- a/drivers/soc/fsl/dpio/qbman-portal.c +++ b/drivers/soc/fsl/dpio/qbman-portal.c @@ -29,6 +29,7 @@ #define QBMAN_CINH_SWP_EQCR_AM_RT 0x980 #define QBMAN_CINH_SWP_RCR_AM_RT 0x9c0 #define QBMAN_CINH_SWP_DQPI 0xa00 +#define QBMAN_CINH_SWP_DQRR_ITR 0xa80 #define QBMAN_CINH_SWP_DCAP 0xac0 #define QBMAN_CINH_SWP_SDQCR 0xb00 #define QBMAN_CINH_SWP_EQCR_AM_RT2 0xb40 @@ -38,6 +39,7 @@ #define QBMAN_CINH_SWP_IER 0xe40 #define QBMAN_CINH_SWP_ISDR 0xe80 #define QBMAN_CINH_SWP_IIR 0xec0 +#define QBMAN_CINH_SWP_ITPR 0xf40 /* CENA register offsets */ #define QBMAN_CENA_SWP_EQCR(n) (0x000 + ((u32)(n) << 6)) @@ -355,6 +357,9 @@ struct qbman_swp *qbman_swp_init(const struct qbman_swp_desc *d) & p->eqcr.pi_ci_mask; p->eqcr.available = p->eqcr.pi_ring_size; + /* Initialize the software portal with a irq timeout period of 0us */ + qbman_swp_set_irq_coalescing(p, p->dqrr.dqrr_size - 1, 0); + return p; } @@ -1796,3 +1801,57 @@ u32 qbman_bp_info_num_free_bufs(struct qbman_bp_query_rslt *a) { return le32_to_cpu(a->fill); } + +/** + * qbman_swp_set_irq_coalescing() - Set new IRQ coalescing values + * @p: the software portal object + * @irq_threshold: interrupt threshold + * @irq_holdoff: interrupt holdoff (timeout) period in us + * + * Return 0 for success, or negative error code on error. + */ +int qbman_swp_set_irq_coalescing(struct qbman_swp *p, u32 irq_threshold, + u32 irq_holdoff) +{ + u32 itp, max_holdoff; + + /* Convert irq_holdoff value from usecs to 256 QBMAN clock cycles + * increments. This depends to the QBMAN internal frequency. + */ + itp = (irq_holdoff * 1000) / p->desc->qman_256_cycles_per_ns; + if (itp < 0 || itp > 4096) { + max_holdoff = (p->desc->qman_256_cycles_per_ns * 4096) / 1000; + pr_err("irq_holdoff must be between 0..%dus\n", max_holdoff); + return -EINVAL; + } + + if (irq_threshold >= p->dqrr.dqrr_size || irq_threshold < 0) { + pr_err("irq_threshold must be between 0..%d\n", + p->dqrr.dqrr_size - 1); + return -EINVAL; + } + + p->irq_threshold = irq_threshold; + p->irq_holdoff = irq_holdoff; + + qbman_write_register(p, QBMAN_CINH_SWP_DQRR_ITR, irq_threshold); + qbman_write_register(p, QBMAN_CINH_SWP_ITPR, itp); + + return 0; +} + +/** + * qbman_swp_get_irq_coalescing() - Get the current IRQ coalescing parameters + * @p: the software portal object + * @irq_threshold: interrupt threshold (an IRQ is generated when there are more + * DQRR entries in the portal than the threshold) + * @irq_holdoff: interrupt holdoff (timeout) period in us + */ +void qbman_swp_get_irq_coalescing(struct qbman_swp *p, u32 *irq_threshold, + u32 *irq_holdoff) +{ + if (irq_threshold) + *irq_threshold = p->irq_threshold; + if (irq_holdoff) + *irq_holdoff = p->irq_holdoff; +} diff --git a/drivers/soc/fsl/dpio/qbman-portal.h b/drivers/soc/fsl/dpio/qbman-portal.h index f058289416af..4ea2dd950a2a 100644 --- a/drivers/soc/fsl/dpio/qbman-portal.h +++ b/drivers/soc/fsl/dpio/qbman-portal.h @@ -25,6 +25,7 @@ struct qbman_swp_desc { void __iomem *cinh_bar; /* Cache-inhibited portal base address */ u32 qman_version; u32 qman_clk; + u32 qman_256_cycles_per_ns; }; #define QBMAN_SWP_INTERRUPT_EQRI 0x01 @@ -157,6 +158,10 @@ struct qbman_swp { } eqcr; spinlock_t access_spinlock; + + /* Interrupt coalescing */ + u32 irq_threshold; + u32 irq_holdoff; }; /* Function pointers */ @@ -649,4 +654,10 @@ static inline const struct dpaa2_dq *qbman_swp_dqrr_next(struct qbman_swp *s) return qbman_swp_dqrr_next_ptr(s); } +int qbman_swp_set_irq_coalescing(struct qbman_swp *p, u32 irq_threshold, + u32 irq_holdoff); + +void qbman_swp_get_irq_coalescing(struct qbman_swp *p, u32 *irq_threshold, + u32 *irq_holdoff); + #endif /* __FSL_QBMAN_PORTAL_H */ diff --git a/include/soc/fsl/dpaa2-io.h b/include/soc/fsl/dpaa2-io.h index 041ebf7d804c..9bff280fe8f4 100644 --- a/include/soc/fsl/dpaa2-io.h +++ b/include/soc/fsl/dpaa2-io.h @@ -131,4 +131,8 @@ int dpaa2_io_query_fq_count(struct dpaa2_io *d, u32 fqid, u32 *fcnt, u32 *bcnt); int dpaa2_io_query_bp_count(struct dpaa2_io *d, u16 bpid, u32 *num); + +int dpaa2_io_set_irq_coalescing(struct dpaa2_io *d, u32 irq_holdoff); +void dpaa2_io_get_irq_coalescing(struct dpaa2_io *d, u32 *irq_holdoff); + #endif /* __FSL_DPAA2_IO_H */ From a64b442137669c9e839c6a70965989b01b1253b7 Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Fri, 15 Oct 2021 12:01:25 +0300 Subject: [PATCH 224/440] net: dpaa2: add support for manual setup of IRQ coalesing Use the newly exported dpio driver API to manually configure the IRQ coalescing parameters requested by the user. The .get_coalesce() and .set_coalesce() net_device callbacks are implemented and directly export or setup the rx-usecs on all the channels configured. Signed-off-by: Ioana Ciornei Signed-off-by: David S. Miller --- .../ethernet/freescale/dpaa2/dpaa2-ethtool.c | 51 +++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c index 2da5f881f630..69a6860e11fa 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c @@ -820,7 +820,56 @@ static int dpaa2_eth_set_tunable(struct net_device *net_dev, return err; } +static int dpaa2_eth_get_coalesce(struct net_device *dev, + struct ethtool_coalesce *ic, + struct kernel_ethtool_coalesce *kernel_coal, + struct netlink_ext_ack *extack) +{ + struct dpaa2_eth_priv *priv = netdev_priv(dev); + struct dpaa2_io *dpio = priv->channel[0]->dpio; + + dpaa2_io_get_irq_coalescing(dpio, &ic->rx_coalesce_usecs); + + return 0; +} + +static int dpaa2_eth_set_coalesce(struct net_device *dev, + struct ethtool_coalesce *ic, + struct kernel_ethtool_coalesce *kernel_coal, + struct netlink_ext_ack *extack) +{ + struct dpaa2_eth_priv *priv = netdev_priv(dev); + struct dpaa2_io *dpio; + u32 prev_rx_usecs; + int i, j, err; + + /* Keep track of the previous value, just in case we fail */ + dpio = priv->channel[0]->dpio; + dpaa2_io_get_irq_coalescing(dpio, &prev_rx_usecs); + + /* Setup new value for rx coalescing */ + for (i = 0; i < priv->num_channels; i++) { + dpio = priv->channel[i]->dpio; + + err = dpaa2_io_set_irq_coalescing(dpio, ic->rx_coalesce_usecs); + if (err) + goto restore_rx_usecs; + } + + return 0; + +restore_rx_usecs: + for (j = 0; j < i; j++) { + dpio = priv->channel[j]->dpio; + + dpaa2_io_set_irq_coalescing(dpio, prev_rx_usecs); + } + + return err; +} + const struct ethtool_ops dpaa2_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_RX_USECS, .get_drvinfo = dpaa2_eth_get_drvinfo, .nway_reset = dpaa2_eth_nway_reset, .get_link = ethtool_op_get_link, @@ -836,4 +885,6 @@ const struct ethtool_ops dpaa2_ethtool_ops = { .get_ts_info = dpaa2_eth_get_ts_info, .get_tunable = dpaa2_eth_get_tunable, .set_tunable = dpaa2_eth_set_tunable, + .get_coalesce = dpaa2_eth_get_coalesce, + .set_coalesce = dpaa2_eth_set_coalesce, }; From 69651bd8d303e0b4d160569de37d04512acd6b2f Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Fri, 15 Oct 2021 12:01:26 +0300 Subject: [PATCH 225/440] soc: fsl: dpio: add Net DIM integration Use the generic dynamic interrupt moderation (dim) framework to implement adaptive interrupt coalescing on Rx. With the per-packet interrupt scheme, a high interrupt rate has been noted for moderate traffic flows leading to high CPU utilization. The dpio driver exports new functions to enable/disable adaptive IRQ coalescing on a DPIO object, to query the state or to update Net DIM with a new set of bytes and frames dequeued. Signed-off-by: Ioana Ciornei Signed-off-by: David S. Miller --- drivers/soc/fsl/Kconfig | 1 + drivers/soc/fsl/dpio/dpio-service.c | 79 +++++++++++++++++++++++++++++ drivers/soc/fsl/dpio/qbman-portal.h | 1 + include/soc/fsl/dpaa2-io.h | 5 +- 4 files changed, 85 insertions(+), 1 deletion(-) diff --git a/drivers/soc/fsl/Kconfig b/drivers/soc/fsl/Kconfig index 4df32bc4c7a6..07d52cafbb31 100644 --- a/drivers/soc/fsl/Kconfig +++ b/drivers/soc/fsl/Kconfig @@ -24,6 +24,7 @@ config FSL_MC_DPIO tristate "QorIQ DPAA2 DPIO driver" depends on FSL_MC_BUS select SOC_BUS + select DIMLIB help Driver for the DPAA2 DPIO object. A DPIO provides queue and buffer management facilities for software to interact with diff --git a/drivers/soc/fsl/dpio/dpio-service.c b/drivers/soc/fsl/dpio/dpio-service.c index 44fafed045ca..3fd0d0840287 100644 --- a/drivers/soc/fsl/dpio/dpio-service.c +++ b/drivers/soc/fsl/dpio/dpio-service.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include "dpio.h" @@ -28,6 +29,14 @@ struct dpaa2_io { spinlock_t lock_notifications; struct list_head notifications; struct device *dev; + + /* Net DIM */ + struct dim rx_dim; + /* protect against concurrent Net DIM updates */ + spinlock_t dim_lock; + u16 event_ctr; + u64 bytes; + u64 frames; }; struct dpaa2_io_store { @@ -100,6 +109,17 @@ struct dpaa2_io *dpaa2_io_service_select(int cpu) } EXPORT_SYMBOL_GPL(dpaa2_io_service_select); +static void dpaa2_io_dim_work(struct work_struct *w) +{ + struct dim *dim = container_of(w, struct dim, work); + struct dim_cq_moder moder = + net_dim_get_rx_moderation(dim->mode, dim->profile_ix); + struct dpaa2_io *d = container_of(dim, struct dpaa2_io, rx_dim); + + dpaa2_io_set_irq_coalescing(d, moder.usec); + dim->state = DIM_START_MEASURE; +} + /** * dpaa2_io_create() - create a dpaa2_io object. * @desc: the dpaa2_io descriptor @@ -147,6 +167,7 @@ struct dpaa2_io *dpaa2_io_create(const struct dpaa2_io_desc *desc, INIT_LIST_HEAD(&obj->node); spin_lock_init(&obj->lock_mgmt_cmd); spin_lock_init(&obj->lock_notifications); + spin_lock_init(&obj->dim_lock); INIT_LIST_HEAD(&obj->notifications); /* For now only enable DQRR interrupts */ @@ -164,6 +185,12 @@ struct dpaa2_io *dpaa2_io_create(const struct dpaa2_io_desc *desc, obj->dev = dev; + memset(&obj->rx_dim, 0, sizeof(obj->rx_dim)); + INIT_WORK(&obj->rx_dim.work, dpaa2_io_dim_work); + obj->event_ctr = 0; + obj->bytes = 0; + obj->frames = 0; + return obj; } @@ -203,6 +230,8 @@ irqreturn_t dpaa2_io_irq(struct dpaa2_io *obj) struct qbman_swp *swp; u32 status; + obj->event_ctr++; + swp = obj->swp; status = qbman_swp_interrupt_read_status(swp); if (!status) @@ -817,3 +846,53 @@ void dpaa2_io_get_irq_coalescing(struct dpaa2_io *d, u32 *irq_holdoff) qbman_swp_get_irq_coalescing(swp, NULL, irq_holdoff); } EXPORT_SYMBOL(dpaa2_io_get_irq_coalescing); + +/** + * dpaa2_io_set_adaptive_coalescing() - Enable/disable adaptive coalescing + * @d: the given DPIO object + * @use_adaptive_rx_coalesce: adaptive coalescing state + */ +void dpaa2_io_set_adaptive_coalescing(struct dpaa2_io *d, + int use_adaptive_rx_coalesce) +{ + d->swp->use_adaptive_rx_coalesce = use_adaptive_rx_coalesce; +} +EXPORT_SYMBOL(dpaa2_io_set_adaptive_coalescing); + +/** + * dpaa2_io_get_adaptive_coalescing() - Query adaptive coalescing state + * @d: the given DPIO object + * + * Return 1 when adaptive coalescing is enabled on the DPIO object and 0 + * otherwise. + */ +int dpaa2_io_get_adaptive_coalescing(struct dpaa2_io *d) +{ + return d->swp->use_adaptive_rx_coalesce; +} +EXPORT_SYMBOL(dpaa2_io_get_adaptive_coalescing); + +/** + * dpaa2_io_update_net_dim() - Update Net DIM + * @d: the given DPIO object + * @frames: how many frames have been dequeued by the user since the last call + * @bytes: how many bytes have been dequeued by the user since the last call + */ +void dpaa2_io_update_net_dim(struct dpaa2_io *d, __u64 frames, __u64 bytes) +{ + struct dim_sample dim_sample = {}; + + if (!d->swp->use_adaptive_rx_coalesce) + return; + + spin_lock(&d->dim_lock); + + d->bytes += bytes; + d->frames += frames; + + dim_update_sample(d->event_ctr, d->frames, d->bytes, &dim_sample); + net_dim(&d->rx_dim, dim_sample); + + spin_unlock(&d->dim_lock); +} +EXPORT_SYMBOL(dpaa2_io_update_net_dim); diff --git a/drivers/soc/fsl/dpio/qbman-portal.h b/drivers/soc/fsl/dpio/qbman-portal.h index 4ea2dd950a2a..b23883dd2725 100644 --- a/drivers/soc/fsl/dpio/qbman-portal.h +++ b/drivers/soc/fsl/dpio/qbman-portal.h @@ -162,6 +162,7 @@ struct qbman_swp { /* Interrupt coalescing */ u32 irq_threshold; u32 irq_holdoff; + int use_adaptive_rx_coalesce; }; /* Function pointers */ diff --git a/include/soc/fsl/dpaa2-io.h b/include/soc/fsl/dpaa2-io.h index 9bff280fe8f4..4bf62de2e00e 100644 --- a/include/soc/fsl/dpaa2-io.h +++ b/include/soc/fsl/dpaa2-io.h @@ -134,5 +134,8 @@ int dpaa2_io_query_bp_count(struct dpaa2_io *d, u16 bpid, int dpaa2_io_set_irq_coalescing(struct dpaa2_io *d, u32 irq_holdoff); void dpaa2_io_get_irq_coalescing(struct dpaa2_io *d, u32 *irq_holdoff); - +void dpaa2_io_set_adaptive_coalescing(struct dpaa2_io *d, + int use_adaptive_rx_coalesce); +int dpaa2_io_get_adaptive_coalescing(struct dpaa2_io *d); +void dpaa2_io_update_net_dim(struct dpaa2_io *d, __u64 frames, __u64 bytes); #endif /* __FSL_DPAA2_IO_H */ From fc398bec03879a7469f0c8e16567698b7d5d814c Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Fri, 15 Oct 2021 12:01:27 +0300 Subject: [PATCH 226/440] net: dpaa2: add adaptive interrupt coalescing Add support for adaptive interrupt coalescing to the dpaa2-eth driver. First of all, ETHTOOL_COALESCE_USE_ADAPTIVE_RX is defined as a supported coalesce parameter and the requested state is configured through the dpio APIs added in the previous patch. Besides the ethtool API interaction, we keep track of how many bytes and frames are dequeued per CDAN (Channel Data Availability Notification) and update the Net DIM instance through the dpaa2_io_update_net_dim() API. Signed-off-by: Ioana Ciornei Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c | 11 ++++++++++- drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h | 2 ++ drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c | 9 ++++++++- 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c index 34f189271ef2..714e961e7a77 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c @@ -533,6 +533,7 @@ static void dpaa2_eth_rx(struct dpaa2_eth_priv *priv, percpu_stats->rx_packets++; percpu_stats->rx_bytes += dpaa2_fd_get_len(fd); + ch->stats.bytes_per_cdan += dpaa2_fd_get_len(fd); list_add_tail(&skb->list, ch->rx_list); @@ -641,6 +642,7 @@ static int dpaa2_eth_consume_frames(struct dpaa2_eth_channel *ch, fq->stats.frames += cleaned; ch->stats.frames += cleaned; + ch->stats.frames_per_cdan += cleaned; /* A dequeue operation only pulls frames from a single queue * into the store. Return the frame queue as an out param. @@ -1264,7 +1266,7 @@ static netdev_tx_t dpaa2_eth_tx(struct sk_buff *skb, struct net_device *net_dev) /* Tx confirmation frame processing routine */ static void dpaa2_eth_tx_conf(struct dpaa2_eth_priv *priv, - struct dpaa2_eth_channel *ch __always_unused, + struct dpaa2_eth_channel *ch, const struct dpaa2_fd *fd, struct dpaa2_eth_fq *fq) { @@ -1279,6 +1281,7 @@ static void dpaa2_eth_tx_conf(struct dpaa2_eth_priv *priv, percpu_extras = this_cpu_ptr(priv->percpu_extras); percpu_extras->tx_conf_frames++; percpu_extras->tx_conf_bytes += fd_len; + ch->stats.bytes_per_cdan += fd_len; /* Check frame errors in the FD field */ fd_errors = dpaa2_fd_get_ctrl(fd) & DPAA2_FD_TX_ERR_MASK; @@ -1601,6 +1604,12 @@ static int dpaa2_eth_poll(struct napi_struct *napi, int budget) } } while (store_cleaned); + /* Update NET DIM with the values for this CDAN */ + dpaa2_io_update_net_dim(ch->dpio, ch->stats.frames_per_cdan, + ch->stats.bytes_per_cdan); + ch->stats.frames_per_cdan = 0; + ch->stats.bytes_per_cdan = 0; + /* We didn't consume the entire budget, so finish napi and * re-enable data availability notifications */ diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h index 628d2d45f045..2085844227fe 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h @@ -384,6 +384,8 @@ struct dpaa2_eth_ch_stats { __u64 xdp_redirect; /* Must be last, does not show up in ethtool stats */ __u64 frames; + __u64 frames_per_cdan; + __u64 bytes_per_cdan; }; /* Maximum number of queues associated with a DPNI */ diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c index 69a6860e11fa..adb8ce5306ee 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c @@ -829,6 +829,7 @@ static int dpaa2_eth_get_coalesce(struct net_device *dev, struct dpaa2_io *dpio = priv->channel[0]->dpio; dpaa2_io_get_irq_coalescing(dpio, &ic->rx_coalesce_usecs); + ic->use_adaptive_rx_coalesce = dpaa2_io_get_adaptive_coalescing(dpio); return 0; } @@ -840,17 +841,21 @@ static int dpaa2_eth_set_coalesce(struct net_device *dev, { struct dpaa2_eth_priv *priv = netdev_priv(dev); struct dpaa2_io *dpio; + int prev_adaptive; u32 prev_rx_usecs; int i, j, err; /* Keep track of the previous value, just in case we fail */ dpio = priv->channel[0]->dpio; dpaa2_io_get_irq_coalescing(dpio, &prev_rx_usecs); + prev_adaptive = dpaa2_io_get_adaptive_coalescing(dpio); /* Setup new value for rx coalescing */ for (i = 0; i < priv->num_channels; i++) { dpio = priv->channel[i]->dpio; + dpaa2_io_set_adaptive_coalescing(dpio, + ic->use_adaptive_rx_coalesce); err = dpaa2_io_set_irq_coalescing(dpio, ic->rx_coalesce_usecs); if (err) goto restore_rx_usecs; @@ -863,13 +868,15 @@ restore_rx_usecs: dpio = priv->channel[j]->dpio; dpaa2_io_set_irq_coalescing(dpio, prev_rx_usecs); + dpaa2_io_set_adaptive_coalescing(dpio, prev_adaptive); } return err; } const struct ethtool_ops dpaa2_ethtool_ops = { - .supported_coalesce_params = ETHTOOL_COALESCE_RX_USECS, + .supported_coalesce_params = ETHTOOL_COALESCE_RX_USECS | + ETHTOOL_COALESCE_USE_ADAPTIVE_RX, .get_drvinfo = dpaa2_eth_get_drvinfo, .nway_reset = dpaa2_eth_nway_reset, .get_link = ethtool_op_get_link, From e93d1c37a85b7276506c99ec964a4094d9c1bcf6 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Thu, 19 Aug 2021 13:59:56 +0200 Subject: [PATCH 227/440] ice: remove ring_active from ice_ring This field is dead and driver is not making any use of it. Simply remove it. Signed-off-by: Maciej Fijalkowski Tested-by: Gurucharan G Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_lib.c | 2 -- drivers/net/ethernet/intel/ice/ice_main.c | 1 - drivers/net/ethernet/intel/ice/ice_txrx.h | 2 -- 3 files changed, 5 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index c8a50898bbc1..472c4525e984 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -1338,7 +1338,6 @@ static int ice_vsi_alloc_rings(struct ice_vsi *vsi) ring->q_index = i; ring->reg_idx = vsi->txq_map[i]; - ring->ring_active = false; ring->vsi = vsi; ring->tx_tstamps = &pf->ptp.port.tx; ring->dev = dev; @@ -1357,7 +1356,6 @@ static int ice_vsi_alloc_rings(struct ice_vsi *vsi) ring->q_index = i; ring->reg_idx = vsi->rxq_map[i]; - ring->ring_active = false; ring->vsi = vsi; ring->netdev = vsi->netdev; ring->dev = dev; diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index ceb0912e5850..da91d9f9128c 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -2385,7 +2385,6 @@ static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi) xdp_ring->q_index = xdp_q_idx; xdp_ring->reg_idx = vsi->txq_map[xdp_q_idx]; - xdp_ring->ring_active = false; xdp_ring->vsi = vsi; xdp_ring->netdev = NULL; xdp_ring->dev = dev; diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index cce348c83da8..24e123cd6554 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -269,8 +269,6 @@ struct ice_ring { u16 q_index; /* Queue number of ring */ u16 q_handle; /* Queue handle per TC */ - u8 ring_active:1; /* is ring online or not */ - u16 count; /* Number of descriptors */ u16 reg_idx; /* HW register index of the ring */ From dc23715cf30a9acb808f5b08962877c390d3e6ea Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Thu, 19 Aug 2021 13:59:57 +0200 Subject: [PATCH 228/440] ice: move ice_container_type onto ice_ring_container Currently ice_container_type is scoped only for ice_ethtool.c. Next commit that will split the ice_ring struct onto Rx/Tx specific ring structs is going to also modify the type of linked list of rings that is within ice_ring_container. Therefore, the functions that are taking the ice_ring_container as an input argument will need to be aware of a ring type that will be looked up. Embed ice_container_type within ice_ring_container and initialize it properly when allocating the q_vectors. Signed-off-by: Maciej Fijalkowski Tested-by: Gurucharan G Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_base.c | 2 ++ drivers/net/ethernet/intel/ice/ice_ethtool.c | 38 ++++++++------------ drivers/net/ethernet/intel/ice/ice_txrx.h | 6 ++++ 3 files changed, 23 insertions(+), 23 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c index d7a5ac9346bc..b932759d6347 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.c +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -115,6 +115,8 @@ static int ice_vsi_alloc_q_vector(struct ice_vsi *vsi, u16 v_idx) q_vector->rx.itr_setting = ICE_DFLT_RX_ITR; q_vector->tx.itr_mode = ITR_DYNAMIC; q_vector->rx.itr_mode = ITR_DYNAMIC; + q_vector->tx.type = ICE_TX_CONTAINER; + q_vector->rx.type = ICE_RX_CONTAINER; if (vsi->type == ICE_VSI_VF) goto out; diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index 201979cc47fb..ecaf5c05eaf6 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -3501,15 +3501,9 @@ static int ice_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) return 0; } -enum ice_container_type { - ICE_RX_CONTAINER, - ICE_TX_CONTAINER, -}; - /** * ice_get_rc_coalesce - get ITR values for specific ring container * @ec: ethtool structure to fill with driver's coalesce settings - * @c_type: container type, Rx or Tx * @rc: ring container that the ITR values will come from * * Query the device for ice_ring_container specific ITR values. This is @@ -3519,13 +3513,12 @@ enum ice_container_type { * Returns 0 on success, negative otherwise. */ static int -ice_get_rc_coalesce(struct ethtool_coalesce *ec, enum ice_container_type c_type, - struct ice_ring_container *rc) +ice_get_rc_coalesce(struct ethtool_coalesce *ec, struct ice_ring_container *rc) { if (!rc->ring) return -EINVAL; - switch (c_type) { + switch (rc->type) { case ICE_RX_CONTAINER: ec->use_adaptive_rx_coalesce = ITR_IS_DYNAMIC(rc); ec->rx_coalesce_usecs = rc->itr_setting; @@ -3536,7 +3529,7 @@ ice_get_rc_coalesce(struct ethtool_coalesce *ec, enum ice_container_type c_type, ec->tx_coalesce_usecs = rc->itr_setting; break; default: - dev_dbg(ice_pf_to_dev(rc->ring->vsi->back), "Invalid c_type %d\n", c_type); + dev_dbg(ice_pf_to_dev(rc->ring->vsi->back), "Invalid c_type %d\n", rc->type); return -EINVAL; } @@ -3557,18 +3550,18 @@ static int ice_get_q_coalesce(struct ice_vsi *vsi, struct ethtool_coalesce *ec, int q_num) { if (q_num < vsi->num_rxq && q_num < vsi->num_txq) { - if (ice_get_rc_coalesce(ec, ICE_RX_CONTAINER, + if (ice_get_rc_coalesce(ec, &vsi->rx_rings[q_num]->q_vector->rx)) return -EINVAL; - if (ice_get_rc_coalesce(ec, ICE_TX_CONTAINER, + if (ice_get_rc_coalesce(ec, &vsi->tx_rings[q_num]->q_vector->tx)) return -EINVAL; } else if (q_num < vsi->num_rxq) { - if (ice_get_rc_coalesce(ec, ICE_RX_CONTAINER, + if (ice_get_rc_coalesce(ec, &vsi->rx_rings[q_num]->q_vector->rx)) return -EINVAL; } else if (q_num < vsi->num_txq) { - if (ice_get_rc_coalesce(ec, ICE_TX_CONTAINER, + if (ice_get_rc_coalesce(ec, &vsi->tx_rings[q_num]->q_vector->tx)) return -EINVAL; } else { @@ -3620,7 +3613,6 @@ ice_get_per_q_coalesce(struct net_device *netdev, u32 q_num, /** * ice_set_rc_coalesce - set ITR values for specific ring container - * @c_type: container type, Rx or Tx * @ec: ethtool structure from user to update ITR settings * @rc: ring container that the ITR values will come from * @vsi: VSI associated to the ring container @@ -3632,10 +3624,10 @@ ice_get_per_q_coalesce(struct net_device *netdev, u32 q_num, * Returns 0 on success, negative otherwise. */ static int -ice_set_rc_coalesce(enum ice_container_type c_type, struct ethtool_coalesce *ec, +ice_set_rc_coalesce(struct ethtool_coalesce *ec, struct ice_ring_container *rc, struct ice_vsi *vsi) { - const char *c_type_str = (c_type == ICE_RX_CONTAINER) ? "rx" : "tx"; + const char *c_type_str = (rc->type == ICE_RX_CONTAINER) ? "rx" : "tx"; u32 use_adaptive_coalesce, coalesce_usecs; struct ice_pf *pf = vsi->back; u16 itr_setting; @@ -3643,7 +3635,7 @@ ice_set_rc_coalesce(enum ice_container_type c_type, struct ethtool_coalesce *ec, if (!rc->ring) return -EINVAL; - switch (c_type) { + switch (rc->type) { case ICE_RX_CONTAINER: if (ec->rx_coalesce_usecs_high > ICE_MAX_INTRL || (ec->rx_coalesce_usecs_high && @@ -3676,7 +3668,7 @@ ice_set_rc_coalesce(enum ice_container_type c_type, struct ethtool_coalesce *ec, break; default: dev_dbg(ice_pf_to_dev(pf), "Invalid container type %d\n", - c_type); + rc->type); return -EINVAL; } @@ -3725,22 +3717,22 @@ static int ice_set_q_coalesce(struct ice_vsi *vsi, struct ethtool_coalesce *ec, int q_num) { if (q_num < vsi->num_rxq && q_num < vsi->num_txq) { - if (ice_set_rc_coalesce(ICE_RX_CONTAINER, ec, + if (ice_set_rc_coalesce(ec, &vsi->rx_rings[q_num]->q_vector->rx, vsi)) return -EINVAL; - if (ice_set_rc_coalesce(ICE_TX_CONTAINER, ec, + if (ice_set_rc_coalesce(ec, &vsi->tx_rings[q_num]->q_vector->tx, vsi)) return -EINVAL; } else if (q_num < vsi->num_rxq) { - if (ice_set_rc_coalesce(ICE_RX_CONTAINER, ec, + if (ice_set_rc_coalesce(ec, &vsi->rx_rings[q_num]->q_vector->rx, vsi)) return -EINVAL; } else if (q_num < vsi->num_txq) { - if (ice_set_rc_coalesce(ICE_TX_CONTAINER, ec, + if (ice_set_rc_coalesce(ec, &vsi->tx_rings[q_num]->q_vector->tx, vsi)) return -EINVAL; diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index 24e123cd6554..bf40608f9bac 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -330,6 +330,11 @@ static inline bool ice_ring_is_xdp(struct ice_ring *ring) return !!(ring->flags & ICE_TX_FLAGS_RING_XDP); } +enum ice_container_type { + ICE_RX_CONTAINER, + ICE_TX_CONTAINER, +}; + struct ice_ring_container { /* head of linked-list of rings */ struct ice_ring *ring; @@ -341,6 +346,7 @@ struct ice_ring_container { u16 itr_setting:13; u16 itr_reserved:2; u16 itr_mode:1; + enum ice_container_type type; }; struct ice_coalesce_stored { From e72bba21355dbb67512a0d666fec9f4b56dbfc2f Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Thu, 19 Aug 2021 13:59:58 +0200 Subject: [PATCH 229/440] ice: split ice_ring onto Tx/Rx separate structs While it was convenient to have a generic ring structure that served both Tx and Rx sides, next commits are going to introduce several Tx-specific fields, so in order to avoid hurting the Rx side, let's pull out the Tx ring onto new ice_tx_ring and ice_rx_ring structs. Rx ring could be handled by the old ice_ring which would reduce the code churn within this patch, but this would make things asymmetric. Make the union out of the ring container within ice_q_vector so that it is possible to iterate over newly introduced ice_tx_ring. Remove the @size as it's only accessed from control path and it can be calculated pretty easily. Change definitions of ice_update_ring_stats and ice_fetch_u64_stats_per_ring so that they are ring agnostic and can be used for both Rx and Tx rings. Sizes of Rx and Tx ring structs are 256 and 192 bytes, respectively. In Rx ring xdp_rxq_info occupies its own cacheline, so it's the major difference now. Signed-off-by: Maciej Fijalkowski Tested-by: Gurucharan G Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice.h | 33 +++-- drivers/net/ethernet/intel/ice/ice_base.c | 57 ++++---- drivers/net/ethernet/intel/ice/ice_base.h | 8 +- drivers/net/ethernet/intel/ice/ice_dcb_lib.c | 5 +- drivers/net/ethernet/intel/ice/ice_dcb_lib.h | 10 +- drivers/net/ethernet/intel/ice/ice_eswitch.c | 35 ++--- drivers/net/ethernet/intel/ice/ice_eswitch.h | 4 +- drivers/net/ethernet/intel/ice/ice_ethtool.c | 49 ++++--- drivers/net/ethernet/intel/ice/ice_lib.c | 64 +++++---- drivers/net/ethernet/intel/ice/ice_lib.h | 6 +- drivers/net/ethernet/intel/ice/ice_main.c | 69 +++++---- drivers/net/ethernet/intel/ice/ice_ptp.c | 2 +- drivers/net/ethernet/intel/ice/ice_ptp.h | 4 +- drivers/net/ethernet/intel/ice/ice_trace.h | 28 ++-- drivers/net/ethernet/intel/ice/ice_txrx.c | 131 ++++++++++-------- drivers/net/ethernet/intel/ice/ice_txrx.h | 121 ++++++++++------ drivers/net/ethernet/intel/ice/ice_txrx_lib.c | 18 +-- drivers/net/ethernet/intel/ice/ice_txrx_lib.h | 14 +- .../net/ethernet/intel/ice/ice_virtchnl_pf.c | 2 +- drivers/net/ethernet/intel/ice/ice_xsk.c | 49 ++++--- drivers/net/ethernet/intel/ice/ice_xsk.h | 20 +-- 21 files changed, 402 insertions(+), 327 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 30cc748337dd..35cf1865beb4 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -275,8 +275,8 @@ struct ice_vsi { struct ice_sw *vsw; /* switch this VSI is on */ struct ice_pf *back; /* back pointer to PF */ struct ice_port_info *port_info; /* back pointer to port_info */ - struct ice_ring **rx_rings; /* Rx ring array */ - struct ice_ring **tx_rings; /* Tx ring array */ + struct ice_rx_ring **rx_rings; /* Rx ring array */ + struct ice_tx_ring **tx_rings; /* Tx ring array */ struct ice_q_vector **q_vectors; /* q_vector array */ irqreturn_t (*irq_handler)(int irq, void *data); @@ -349,7 +349,7 @@ struct ice_vsi { u16 qset_handle[ICE_MAX_TRAFFIC_CLASS]; struct ice_tc_cfg tc_cfg; struct bpf_prog *xdp_prog; - struct ice_ring **xdp_rings; /* XDP ring array */ + struct ice_tx_ring **xdp_rings; /* XDP ring array */ unsigned long *af_xdp_zc_qps; /* tracks AF_XDP ZC enabled qps */ u16 num_xdp_txq; /* Used XDP queues */ u8 xdp_mapping_mode; /* ICE_MAP_MODE_[CONTIG|SCATTER] */ @@ -580,25 +580,42 @@ static inline bool ice_is_xdp_ena_vsi(struct ice_vsi *vsi) return !!vsi->xdp_prog; } -static inline void ice_set_ring_xdp(struct ice_ring *ring) +static inline void ice_set_ring_xdp(struct ice_tx_ring *ring) { ring->flags |= ICE_TX_FLAGS_RING_XDP; } /** * ice_xsk_pool - get XSK buffer pool bound to a ring - * @ring: ring to use + * @ring: Rx ring to use * * Returns a pointer to xdp_umem structure if there is a buffer pool present, * NULL otherwise. */ -static inline struct xsk_buff_pool *ice_xsk_pool(struct ice_ring *ring) +static inline struct xsk_buff_pool *ice_xsk_pool(struct ice_rx_ring *ring) { struct ice_vsi *vsi = ring->vsi; u16 qid = ring->q_index; - if (ice_ring_is_xdp(ring)) - qid -= vsi->num_xdp_txq; + if (!ice_is_xdp_ena_vsi(vsi) || !test_bit(qid, vsi->af_xdp_zc_qps)) + return NULL; + + return xsk_get_pool_from_qid(vsi->netdev, qid); +} + +/** + * ice_tx_xsk_pool - get XSK buffer pool bound to a ring + * @ring: Tx ring to use + * + * Returns a pointer to xdp_umem structure if there is a buffer pool present, + * NULL otherwise. Tx equivalent of ice_xsk_pool. + */ +static inline struct xsk_buff_pool *ice_tx_xsk_pool(struct ice_tx_ring *ring) +{ + struct ice_vsi *vsi = ring->vsi; + u16 qid; + + qid = ring->q_index - vsi->num_xdp_txq; if (!ice_is_xdp_ena_vsi(vsi) || !test_bit(qid, vsi->af_xdp_zc_qps)) return NULL; diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c index b932759d6347..be625977addf 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.c +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -148,7 +148,8 @@ static void ice_free_q_vector(struct ice_vsi *vsi, int v_idx) { struct ice_q_vector *q_vector; struct ice_pf *pf = vsi->back; - struct ice_ring *ring; + struct ice_tx_ring *tx_ring; + struct ice_rx_ring *rx_ring; struct device *dev; dev = ice_pf_to_dev(pf); @@ -158,10 +159,10 @@ static void ice_free_q_vector(struct ice_vsi *vsi, int v_idx) } q_vector = vsi->q_vectors[v_idx]; - ice_for_each_ring(ring, q_vector->tx) - ring->q_vector = NULL; - ice_for_each_ring(ring, q_vector->rx) - ring->q_vector = NULL; + ice_for_each_tx_ring(tx_ring, q_vector->tx) + tx_ring->q_vector = NULL; + ice_for_each_rx_ring(rx_ring, q_vector->rx) + rx_ring->q_vector = NULL; /* only VSI with an associated netdev is set up with NAPI */ if (vsi->netdev) @@ -203,12 +204,12 @@ static void ice_cfg_itr_gran(struct ice_hw *hw) } /** - * ice_calc_q_handle - calculate the queue handle + * ice_calc_txq_handle - calculate the queue handle * @vsi: VSI that ring belongs to * @ring: ring to get the absolute queue index * @tc: traffic class number */ -static u16 ice_calc_q_handle(struct ice_vsi *vsi, struct ice_ring *ring, u8 tc) +static u16 ice_calc_txq_handle(struct ice_vsi *vsi, struct ice_tx_ring *ring, u8 tc) { WARN_ONCE(ice_ring_is_xdp(ring) && tc, "XDP ring can't belong to TC other than 0\n"); @@ -220,7 +221,7 @@ static u16 ice_calc_q_handle(struct ice_vsi *vsi, struct ice_ring *ring, u8 tc) } /** - * ice_eswitch_calc_q_handle + * ice_eswitch_calc_txq_handle * @ring: pointer to ring which unique index is needed * * To correctly work with many netdevs ring->q_index of Tx rings on switchdev @@ -230,7 +231,7 @@ static u16 ice_calc_q_handle(struct ice_vsi *vsi, struct ice_ring *ring, u8 tc) * Return ICE_INVAL_Q_INDEX when index wasn't found. Should never happen, * because VSI is get from ring->vsi, so it has to be present in this VSI. */ -static u16 ice_eswitch_calc_q_handle(struct ice_ring *ring) +static u16 ice_eswitch_calc_txq_handle(struct ice_tx_ring *ring) { struct ice_vsi *vsi = ring->vsi; int i; @@ -250,7 +251,7 @@ static u16 ice_eswitch_calc_q_handle(struct ice_ring *ring) * This enables/disables XPS for a given Tx descriptor ring * based on the TCs enabled for the VSI that ring belongs to. */ -static void ice_cfg_xps_tx_ring(struct ice_ring *ring) +static void ice_cfg_xps_tx_ring(struct ice_tx_ring *ring) { if (!ring->q_vector || !ring->netdev) return; @@ -272,7 +273,7 @@ static void ice_cfg_xps_tx_ring(struct ice_ring *ring) * Configure the Tx descriptor ring in TLAN context. */ static void -ice_setup_tx_ctx(struct ice_ring *ring, struct ice_tlan_ctx *tlan_ctx, u16 pf_q) +ice_setup_tx_ctx(struct ice_tx_ring *ring, struct ice_tlan_ctx *tlan_ctx, u16 pf_q) { struct ice_vsi *vsi = ring->vsi; struct ice_hw *hw = &vsi->back->hw; @@ -284,7 +285,7 @@ ice_setup_tx_ctx(struct ice_ring *ring, struct ice_tlan_ctx *tlan_ctx, u16 pf_q) /* Transmit Queue Length */ tlan_ctx->qlen = ring->count; - ice_set_cgd_num(tlan_ctx, ring); + ice_set_cgd_num(tlan_ctx, ring->dcb_tc); /* PF number */ tlan_ctx->pf_num = hw->pf_id; @@ -341,7 +342,7 @@ ice_setup_tx_ctx(struct ice_ring *ring, struct ice_tlan_ctx *tlan_ctx, u16 pf_q) * * Returns the offset value for ring into the data buffer. */ -static unsigned int ice_rx_offset(struct ice_ring *rx_ring) +static unsigned int ice_rx_offset(struct ice_rx_ring *rx_ring) { if (ice_ring_uses_build_skb(rx_ring)) return ICE_SKB_PAD; @@ -357,7 +358,7 @@ static unsigned int ice_rx_offset(struct ice_ring *rx_ring) * * Configure the Rx descriptor ring in RLAN context. */ -static int ice_setup_rx_ctx(struct ice_ring *ring) +static int ice_setup_rx_ctx(struct ice_rx_ring *ring) { int chain_len = ICE_MAX_CHAINED_RX_BUFS; struct ice_vsi *vsi = ring->vsi; @@ -468,7 +469,7 @@ static int ice_setup_rx_ctx(struct ice_ring *ring) * * Return 0 on success and a negative value on error. */ -int ice_vsi_cfg_rxq(struct ice_ring *ring) +int ice_vsi_cfg_rxq(struct ice_rx_ring *ring) { struct device *dev = ice_pf_to_dev(ring->vsi->back); u16 num_bufs = ICE_DESC_UNUSED(ring); @@ -689,16 +690,16 @@ void ice_vsi_map_rings_to_vectors(struct ice_vsi *vsi) tx_rings_per_v = (u8)DIV_ROUND_UP(tx_rings_rem, q_vectors - v_id); q_vector->num_ring_tx = tx_rings_per_v; - q_vector->tx.ring = NULL; + q_vector->tx.tx_ring = NULL; q_vector->tx.itr_idx = ICE_TX_ITR; q_base = vsi->num_txq - tx_rings_rem; for (q_id = q_base; q_id < (q_base + tx_rings_per_v); q_id++) { - struct ice_ring *tx_ring = vsi->tx_rings[q_id]; + struct ice_tx_ring *tx_ring = vsi->tx_rings[q_id]; tx_ring->q_vector = q_vector; - tx_ring->next = q_vector->tx.ring; - q_vector->tx.ring = tx_ring; + tx_ring->next = q_vector->tx.tx_ring; + q_vector->tx.tx_ring = tx_ring; } tx_rings_rem -= tx_rings_per_v; @@ -706,16 +707,16 @@ void ice_vsi_map_rings_to_vectors(struct ice_vsi *vsi) rx_rings_per_v = (u8)DIV_ROUND_UP(rx_rings_rem, q_vectors - v_id); q_vector->num_ring_rx = rx_rings_per_v; - q_vector->rx.ring = NULL; + q_vector->rx.rx_ring = NULL; q_vector->rx.itr_idx = ICE_RX_ITR; q_base = vsi->num_rxq - rx_rings_rem; for (q_id = q_base; q_id < (q_base + rx_rings_per_v); q_id++) { - struct ice_ring *rx_ring = vsi->rx_rings[q_id]; + struct ice_rx_ring *rx_ring = vsi->rx_rings[q_id]; rx_ring->q_vector = q_vector; - rx_ring->next = q_vector->rx.ring; - q_vector->rx.ring = rx_ring; + rx_ring->next = q_vector->rx.rx_ring; + q_vector->rx.rx_ring = rx_ring; } rx_rings_rem -= rx_rings_per_v; } @@ -740,7 +741,7 @@ void ice_vsi_free_q_vectors(struct ice_vsi *vsi) * @qg_buf: queue group buffer */ int -ice_vsi_cfg_txq(struct ice_vsi *vsi, struct ice_ring *ring, +ice_vsi_cfg_txq(struct ice_vsi *vsi, struct ice_tx_ring *ring, struct ice_aqc_add_tx_qgrp *qg_buf) { u8 buf_len = struct_size(qg_buf, txqs, 1); @@ -776,12 +777,12 @@ ice_vsi_cfg_txq(struct ice_vsi *vsi, struct ice_ring *ring, * TC into the VSI Tx ring */ if (vsi->type == ICE_VSI_SWITCHDEV_CTRL) { - ring->q_handle = ice_eswitch_calc_q_handle(ring); + ring->q_handle = ice_eswitch_calc_txq_handle(ring); if (ring->q_handle == ICE_INVAL_Q_INDEX) return -ENODEV; } else { - ring->q_handle = ice_calc_q_handle(vsi, ring, tc); + ring->q_handle = ice_calc_txq_handle(vsi, ring, tc); } status = ice_ena_vsi_txq(vsi->port_info, vsi->idx, tc, ring->q_handle, @@ -906,7 +907,7 @@ void ice_trigger_sw_intr(struct ice_hw *hw, struct ice_q_vector *q_vector) */ int ice_vsi_stop_tx_ring(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src, - u16 rel_vmvf_num, struct ice_ring *ring, + u16 rel_vmvf_num, struct ice_tx_ring *ring, struct ice_txq_meta *txq_meta) { struct ice_pf *pf = vsi->back; @@ -963,7 +964,7 @@ ice_vsi_stop_tx_ring(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src, * are needed for stopping Tx queue */ void -ice_fill_txq_meta(struct ice_vsi *vsi, struct ice_ring *ring, +ice_fill_txq_meta(struct ice_vsi *vsi, struct ice_tx_ring *ring, struct ice_txq_meta *txq_meta) { u8 tc; diff --git a/drivers/net/ethernet/intel/ice/ice_base.h b/drivers/net/ethernet/intel/ice/ice_base.h index 20e1c29aa68a..b67dca417acb 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.h +++ b/drivers/net/ethernet/intel/ice/ice_base.h @@ -6,7 +6,7 @@ #include "ice.h" -int ice_vsi_cfg_rxq(struct ice_ring *ring); +int ice_vsi_cfg_rxq(struct ice_rx_ring *ring); int __ice_vsi_get_qs(struct ice_qs_cfg *qs_cfg); int ice_vsi_ctrl_one_rx_ring(struct ice_vsi *vsi, bool ena, u16 rxq_idx, bool wait); @@ -15,7 +15,7 @@ int ice_vsi_alloc_q_vectors(struct ice_vsi *vsi); void ice_vsi_map_rings_to_vectors(struct ice_vsi *vsi); void ice_vsi_free_q_vectors(struct ice_vsi *vsi); int -ice_vsi_cfg_txq(struct ice_vsi *vsi, struct ice_ring *ring, +ice_vsi_cfg_txq(struct ice_vsi *vsi, struct ice_tx_ring *ring, struct ice_aqc_add_tx_qgrp *qg_buf); void ice_cfg_itr(struct ice_hw *hw, struct ice_q_vector *q_vector); void @@ -25,9 +25,9 @@ ice_cfg_rxq_interrupt(struct ice_vsi *vsi, u16 rxq, u16 msix_idx, u16 itr_idx); void ice_trigger_sw_intr(struct ice_hw *hw, struct ice_q_vector *q_vector); int ice_vsi_stop_tx_ring(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src, - u16 rel_vmvf_num, struct ice_ring *ring, + u16 rel_vmvf_num, struct ice_tx_ring *ring, struct ice_txq_meta *txq_meta); void -ice_fill_txq_meta(struct ice_vsi *vsi, struct ice_ring *ring, +ice_fill_txq_meta(struct ice_vsi *vsi, struct ice_tx_ring *ring, struct ice_txq_meta *txq_meta); #endif /* _ICE_BASE_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c index 73714685fb68..4366626e0eb4 100644 --- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c @@ -194,7 +194,8 @@ u8 ice_dcb_get_tc(struct ice_vsi *vsi, int queue_index) */ void ice_vsi_cfg_dcb_rings(struct ice_vsi *vsi) { - struct ice_ring *tx_ring, *rx_ring; + struct ice_tx_ring *tx_ring; + struct ice_rx_ring *rx_ring; u16 qoffset, qcount; int i, n; @@ -824,7 +825,7 @@ void ice_update_dcb_stats(struct ice_pf *pf) * tag will already be configured with the correct ID and priority bits */ void -ice_tx_prepare_vlan_flags_dcb(struct ice_ring *tx_ring, +ice_tx_prepare_vlan_flags_dcb(struct ice_tx_ring *tx_ring, struct ice_tx_buf *first) { struct sk_buff *skb = first->skb; diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.h b/drivers/net/ethernet/intel/ice/ice_dcb_lib.h index 3dcde1750a5e..6700e97b3b51 100644 --- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.h @@ -29,7 +29,7 @@ void ice_vsi_cfg_dcb_rings(struct ice_vsi *vsi); int ice_init_pf_dcb(struct ice_pf *pf, bool locked); void ice_update_dcb_stats(struct ice_pf *pf); void -ice_tx_prepare_vlan_flags_dcb(struct ice_ring *tx_ring, +ice_tx_prepare_vlan_flags_dcb(struct ice_tx_ring *tx_ring, struct ice_tx_buf *first); void ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf, @@ -50,9 +50,9 @@ static inline bool ice_find_q_in_range(u16 low, u16 high, unsigned int tx_q) } static inline void -ice_set_cgd_num(struct ice_tlan_ctx *tlan_ctx, struct ice_ring *ring) +ice_set_cgd_num(struct ice_tlan_ctx *tlan_ctx, u8 dcb_tc) { - tlan_ctx->cgd_num = ring->dcb_tc; + tlan_ctx->cgd_num = dcb_tc; } static inline bool ice_is_dcb_active(struct ice_pf *pf) @@ -102,7 +102,7 @@ ice_pf_dcb_cfg(struct ice_pf __always_unused *pf, } static inline int -ice_tx_prepare_vlan_flags_dcb(struct ice_ring __always_unused *tx_ring, +ice_tx_prepare_vlan_flags_dcb(struct ice_tx_ring __always_unused *tx_ring, struct ice_tx_buf __always_unused *first) { return 0; @@ -131,6 +131,6 @@ static inline void ice_update_dcb_stats(struct ice_pf *pf) { } static inline void ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf, struct ice_rq_event_info *event) { } static inline void ice_vsi_cfg_netdev_tc(struct ice_vsi *vsi, u8 ena_tc) { } -static inline void ice_set_cgd_num(struct ice_tlan_ctx *tlan_ctx, struct ice_ring *ring) { } +static inline void ice_set_cgd_num(struct ice_tlan_ctx *tlan_ctx, u8 dcb_tc) { } #endif /* CONFIG_DCB */ #endif /* _ICE_DCB_LIB_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch.c b/drivers/net/ethernet/intel/ice/ice_eswitch.c index d91a7834f91f..a20c38446e76 100644 --- a/drivers/net/ethernet/intel/ice/ice_eswitch.c +++ b/drivers/net/ethernet/intel/ice/ice_eswitch.c @@ -68,21 +68,6 @@ err_def_rx: return -ENODEV; } -/** - * ice_eswitch_remap_ring - reconfigure ring of switchdev ctrl VSI - * @ring: pointer to ring - * @q_vector: pointer of q_vector which is connected with this ring - * @netdev: netdevice connected with this ring - */ -static void -ice_eswitch_remap_ring(struct ice_ring *ring, struct ice_q_vector *q_vector, - struct net_device *netdev) -{ - ring->q_vector = q_vector; - ring->next = NULL; - ring->netdev = netdev; -} - /** * ice_eswitch_remap_rings_to_vectors - reconfigure rings of switchdev ctrl VSI * @pf: pointer to PF struct @@ -102,23 +87,27 @@ static void ice_eswitch_remap_rings_to_vectors(struct ice_pf *pf) ice_for_each_txq(vsi, q_id) { struct ice_repr *repr = pf->vf[q_id].repr; struct ice_q_vector *q_vector = repr->q_vector; - struct ice_ring *tx_ring = vsi->tx_rings[q_id]; - struct ice_ring *rx_ring = vsi->rx_rings[q_id]; + struct ice_tx_ring *tx_ring = vsi->tx_rings[q_id]; + struct ice_rx_ring *rx_ring = vsi->rx_rings[q_id]; q_vector->vsi = vsi; q_vector->reg_idx = vsi->q_vectors[0]->reg_idx; q_vector->num_ring_tx = 1; - q_vector->tx.ring = tx_ring; - ice_eswitch_remap_ring(tx_ring, q_vector, repr->netdev); + q_vector->tx.tx_ring = tx_ring; + tx_ring->q_vector = q_vector; + tx_ring->next = NULL; + tx_ring->netdev = repr->netdev; /* In switchdev mode, from OS stack perspective, there is only * one queue for given netdev, so it needs to be indexed as 0. */ tx_ring->q_index = 0; q_vector->num_ring_rx = 1; - q_vector->rx.ring = rx_ring; - ice_eswitch_remap_ring(rx_ring, q_vector, repr->netdev); + q_vector->rx.rx_ring = rx_ring; + rx_ring->q_vector = q_vector; + rx_ring->next = NULL; + rx_ring->netdev = repr->netdev; } } @@ -390,7 +379,7 @@ static void ice_eswitch_set_rxdid(struct ice_vsi *vsi, u32 rxdid) int i; ice_for_each_rxq(vsi, i) { - struct ice_ring *ring = vsi->rx_rings[i]; + struct ice_rx_ring *ring = vsi->rx_rings[i]; u16 pf_q = vsi->rxq_map[ring->q_index]; ice_write_qrxflxp_cntxt(hw, pf_q, rxdid, 0x3, true); @@ -511,7 +500,7 @@ ice_eswitch_mode_set(struct devlink *devlink, u16 mode, * context, regular netdev associated with Rx ring is returned. */ struct net_device * -ice_eswitch_get_target_netdev(struct ice_ring *rx_ring, +ice_eswitch_get_target_netdev(struct ice_rx_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc) { struct ice_32b_rx_flex_desc_nic_2 *desc; diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch.h b/drivers/net/ethernet/intel/ice/ice_eswitch.h index 23df0d400847..364cd2a79c37 100644 --- a/drivers/net/ethernet/intel/ice/ice_eswitch.h +++ b/drivers/net/ethernet/intel/ice/ice_eswitch.h @@ -22,7 +22,7 @@ void ice_eswitch_update_repr(struct ice_vsi *vsi); void ice_eswitch_stop_all_tx_queues(struct ice_pf *pf); struct net_device * -ice_eswitch_get_target_netdev(struct ice_ring *rx_ring, +ice_eswitch_get_target_netdev(struct ice_rx_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc); void ice_eswitch_set_target_vsi(struct sk_buff *skb, @@ -68,7 +68,7 @@ static inline bool ice_is_eswitch_mode_switchdev(struct ice_pf *pf) } static inline struct net_device * -ice_eswitch_get_target_netdev(struct ice_ring *rx_ring, +ice_eswitch_get_target_netdev(struct ice_rx_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc) { return rx_ring->netdev; diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index ecaf5c05eaf6..19ae045b7242 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -603,7 +603,7 @@ static bool ice_lbtest_check_frame(u8 *frame) * * Function sends loopback packets on a test Tx ring. */ -static int ice_diag_send(struct ice_ring *tx_ring, u8 *data, u16 size) +static int ice_diag_send(struct ice_tx_ring *tx_ring, u8 *data, u16 size) { struct ice_tx_desc *tx_desc; struct ice_tx_buf *tx_buf; @@ -656,7 +656,7 @@ static int ice_diag_send(struct ice_ring *tx_ring, u8 *data, u16 size) * Function receives loopback packets and verify their correctness. * Returns number of received valid frames. */ -static int ice_lbtest_receive_frames(struct ice_ring *rx_ring) +static int ice_lbtest_receive_frames(struct ice_rx_ring *rx_ring) { struct ice_rx_buf *rx_buf; int valid_frames, i; @@ -695,9 +695,10 @@ static u64 ice_loopback_test(struct net_device *netdev) struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_vsi *orig_vsi = np->vsi, *test_vsi; struct ice_pf *pf = orig_vsi->back; - struct ice_ring *tx_ring, *rx_ring; u8 broadcast[ETH_ALEN], ret = 0; int num_frames, valid_frames; + struct ice_tx_ring *tx_ring; + struct ice_rx_ring *rx_ring; struct device *dev; u8 *tx_frame; int i; @@ -1350,7 +1351,8 @@ ice_get_ethtool_stats(struct net_device *netdev, struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_vsi *vsi = ice_get_netdev_priv_vsi(np); struct ice_pf *pf = vsi->back; - struct ice_ring *ring; + struct ice_tx_ring *tx_ring; + struct ice_rx_ring *rx_ring; unsigned int j; int i = 0; char *p; @@ -1371,10 +1373,10 @@ ice_get_ethtool_stats(struct net_device *netdev, rcu_read_lock(); ice_for_each_alloc_txq(vsi, j) { - ring = READ_ONCE(vsi->tx_rings[j]); - if (ring) { - data[i++] = ring->stats.pkts; - data[i++] = ring->stats.bytes; + tx_ring = READ_ONCE(vsi->tx_rings[j]); + if (tx_ring) { + data[i++] = tx_ring->stats.pkts; + data[i++] = tx_ring->stats.bytes; } else { data[i++] = 0; data[i++] = 0; @@ -1382,10 +1384,10 @@ ice_get_ethtool_stats(struct net_device *netdev, } ice_for_each_alloc_rxq(vsi, j) { - ring = READ_ONCE(vsi->rx_rings[j]); - if (ring) { - data[i++] = ring->stats.pkts; - data[i++] = ring->stats.bytes; + rx_ring = READ_ONCE(vsi->rx_rings[j]); + if (rx_ring) { + data[i++] = rx_ring->stats.pkts; + data[i++] = rx_ring->stats.bytes; } else { data[i++] = 0; data[i++] = 0; @@ -2702,9 +2704,10 @@ ice_get_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring) static int ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring) { - struct ice_ring *tx_rings = NULL, *rx_rings = NULL; struct ice_netdev_priv *np = netdev_priv(netdev); - struct ice_ring *xdp_rings = NULL; + struct ice_tx_ring *xdp_rings = NULL; + struct ice_tx_ring *tx_rings = NULL; + struct ice_rx_ring *rx_rings = NULL; struct ice_vsi *vsi = np->vsi; struct ice_pf *pf = vsi->back; int i, timeout = 50, err = 0; @@ -3290,7 +3293,7 @@ static u32 ice_get_combined_cnt(struct ice_vsi *vsi) ice_for_each_q_vector(vsi, q_idx) { struct ice_q_vector *q_vector = vsi->q_vectors[q_idx]; - if (q_vector->rx.ring && q_vector->tx.ring) + if (q_vector->rx.rx_ring && q_vector->tx.tx_ring) combined++; } @@ -3515,21 +3518,21 @@ static int ice_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) static int ice_get_rc_coalesce(struct ethtool_coalesce *ec, struct ice_ring_container *rc) { - if (!rc->ring) + if (!rc->rx_ring) return -EINVAL; switch (rc->type) { case ICE_RX_CONTAINER: ec->use_adaptive_rx_coalesce = ITR_IS_DYNAMIC(rc); ec->rx_coalesce_usecs = rc->itr_setting; - ec->rx_coalesce_usecs_high = rc->ring->q_vector->intrl; + ec->rx_coalesce_usecs_high = rc->rx_ring->q_vector->intrl; break; case ICE_TX_CONTAINER: ec->use_adaptive_tx_coalesce = ITR_IS_DYNAMIC(rc); ec->tx_coalesce_usecs = rc->itr_setting; break; default: - dev_dbg(ice_pf_to_dev(rc->ring->vsi->back), "Invalid c_type %d\n", rc->type); + dev_dbg(ice_pf_to_dev(rc->rx_ring->vsi->back), "Invalid c_type %d\n", rc->type); return -EINVAL; } @@ -3632,7 +3635,7 @@ ice_set_rc_coalesce(struct ethtool_coalesce *ec, struct ice_pf *pf = vsi->back; u16 itr_setting; - if (!rc->ring) + if (!rc->rx_ring) return -EINVAL; switch (rc->type) { @@ -3645,15 +3648,15 @@ ice_set_rc_coalesce(struct ethtool_coalesce *ec, ICE_MAX_INTRL); return -EINVAL; } - if (ec->rx_coalesce_usecs_high != rc->ring->q_vector->intrl && + if (ec->rx_coalesce_usecs_high != rc->rx_ring->q_vector->intrl && (ec->use_adaptive_rx_coalesce || ec->use_adaptive_tx_coalesce)) { netdev_info(vsi->netdev, "Invalid value, %s-usecs-high cannot be changed if adaptive-tx or adaptive-rx is enabled\n", c_type_str); return -EINVAL; } - if (ec->rx_coalesce_usecs_high != rc->ring->q_vector->intrl) { - rc->ring->q_vector->intrl = ec->rx_coalesce_usecs_high; - ice_write_intrl(rc->ring->q_vector, + if (ec->rx_coalesce_usecs_high != rc->rx_ring->q_vector->intrl) { + rc->rx_ring->q_vector->intrl = ec->rx_coalesce_usecs_high; + ice_write_intrl(rc->rx_ring->q_vector, ec->rx_coalesce_usecs_high); } diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 472c4525e984..04155139125c 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -390,12 +390,12 @@ static irqreturn_t ice_msix_clean_ctrl_vsi(int __always_unused irq, void *data) { struct ice_q_vector *q_vector = (struct ice_q_vector *)data; - if (!q_vector->tx.ring) + if (!q_vector->tx.tx_ring) return IRQ_HANDLED; #define FDIR_RX_DESC_CLEAN_BUDGET 64 - ice_clean_rx_irq(q_vector->rx.ring, FDIR_RX_DESC_CLEAN_BUDGET); - ice_clean_ctrl_tx_irq(q_vector->tx.ring); + ice_clean_rx_irq(q_vector->rx.rx_ring, FDIR_RX_DESC_CLEAN_BUDGET); + ice_clean_ctrl_tx_irq(q_vector->tx.tx_ring); return IRQ_HANDLED; } @@ -409,7 +409,7 @@ static irqreturn_t ice_msix_clean_rings(int __always_unused irq, void *data) { struct ice_q_vector *q_vector = (struct ice_q_vector *)data; - if (!q_vector->tx.ring && !q_vector->rx.ring) + if (!q_vector->tx.tx_ring && !q_vector->rx.rx_ring) return IRQ_HANDLED; q_vector->total_events++; @@ -425,7 +425,7 @@ static irqreturn_t ice_eswitch_msix_clean_rings(int __always_unused irq, void *d struct ice_pf *pf = q_vector->vsi->back; int i; - if (!q_vector->tx.ring && !q_vector->rx.ring) + if (!q_vector->tx.tx_ring && !q_vector->rx.rx_ring) return IRQ_HANDLED; ice_for_each_vf(pf, i) @@ -1291,8 +1291,8 @@ static void ice_vsi_clear_rings(struct ice_vsi *vsi) struct ice_q_vector *q_vector = vsi->q_vectors[i]; if (q_vector) { - q_vector->tx.ring = NULL; - q_vector->rx.ring = NULL; + q_vector->tx.tx_ring = NULL; + q_vector->rx.rx_ring = NULL; } } } @@ -1328,7 +1328,7 @@ static int ice_vsi_alloc_rings(struct ice_vsi *vsi) dev = ice_pf_to_dev(pf); /* Allocate Tx rings */ for (i = 0; i < vsi->alloc_txq; i++) { - struct ice_ring *ring; + struct ice_tx_ring *ring; /* allocate with kzalloc(), free with kfree_rcu() */ ring = kzalloc(sizeof(*ring), GFP_KERNEL); @@ -1347,7 +1347,7 @@ static int ice_vsi_alloc_rings(struct ice_vsi *vsi) /* Allocate Rx rings */ for (i = 0; i < vsi->alloc_rxq; i++) { - struct ice_ring *ring; + struct ice_rx_ring *ring; /* allocate with kzalloc(), free with kfree_rcu() */ ring = kzalloc(sizeof(*ring), GFP_KERNEL); @@ -1750,7 +1750,7 @@ int ice_vsi_cfg_single_rxq(struct ice_vsi *vsi, u16 q_idx) return ice_vsi_cfg_rxq(vsi->rx_rings[q_idx]); } -int ice_vsi_cfg_single_txq(struct ice_vsi *vsi, struct ice_ring **tx_rings, u16 q_idx) +int ice_vsi_cfg_single_txq(struct ice_vsi *vsi, struct ice_tx_ring **tx_rings, u16 q_idx) { struct ice_aqc_add_tx_qgrp *qg_buf; int err; @@ -1806,7 +1806,7 @@ setup_rings: * Configure the Tx VSI for operation. */ static int -ice_vsi_cfg_txqs(struct ice_vsi *vsi, struct ice_ring **rings, u16 count) +ice_vsi_cfg_txqs(struct ice_vsi *vsi, struct ice_tx_ring **rings, u16 count) { struct ice_aqc_add_tx_qgrp *qg_buf; u16 q_idx = 0; @@ -1858,7 +1858,7 @@ int ice_vsi_cfg_xdp_txqs(struct ice_vsi *vsi) return ret; for (i = 0; i < vsi->num_xdp_txq; i++) - vsi->xdp_rings[i]->xsk_pool = ice_xsk_pool(vsi->xdp_rings[i]); + vsi->xdp_rings[i]->xsk_pool = ice_tx_xsk_pool(vsi->xdp_rings[i]); return ret; } @@ -1893,6 +1893,23 @@ void ice_write_intrl(struct ice_q_vector *q_vector, u8 intrl) ice_intrl_usec_to_reg(intrl, ICE_INTRL_GRAN_ABOVE_25)); } +static struct ice_q_vector *ice_pull_qvec_from_rc(struct ice_ring_container *rc) +{ + switch (rc->type) { + case ICE_RX_CONTAINER: + if (rc->rx_ring) + return rc->rx_ring->q_vector; + break; + case ICE_TX_CONTAINER: + if (rc->tx_ring) + return rc->tx_ring->q_vector; + default: + break; + } + + return NULL; +} + /** * __ice_write_itr - write throttle rate to register * @q_vector: pointer to interrupt data structure @@ -1917,11 +1934,10 @@ void ice_write_itr(struct ice_ring_container *rc, u16 itr) { struct ice_q_vector *q_vector; - if (!rc->ring) + q_vector = ice_pull_qvec_from_rc(rc); + if (!q_vector) return; - q_vector = rc->ring->q_vector; - __ice_write_itr(q_vector, rc, itr); } @@ -2097,7 +2113,7 @@ int ice_vsi_stop_all_rx_rings(struct ice_vsi *vsi) */ static int ice_vsi_stop_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src, - u16 rel_vmvf_num, struct ice_ring **rings, u16 count) + u16 rel_vmvf_num, struct ice_tx_ring **rings, u16 count) { u16 q_idx; @@ -3396,16 +3412,16 @@ out: /** * ice_update_ring_stats - Update ring statistics - * @ring: ring to update + * @stats: stats to be updated * @pkts: number of processed packets * @bytes: number of processed bytes * * This function assumes that caller has acquired a u64_stats_sync lock. */ -static void ice_update_ring_stats(struct ice_ring *ring, u64 pkts, u64 bytes) +static void ice_update_ring_stats(struct ice_q_stats *stats, u64 pkts, u64 bytes) { - ring->stats.bytes += bytes; - ring->stats.pkts += pkts; + stats->bytes += bytes; + stats->pkts += pkts; } /** @@ -3414,10 +3430,10 @@ static void ice_update_ring_stats(struct ice_ring *ring, u64 pkts, u64 bytes) * @pkts: number of processed packets * @bytes: number of processed bytes */ -void ice_update_tx_ring_stats(struct ice_ring *tx_ring, u64 pkts, u64 bytes) +void ice_update_tx_ring_stats(struct ice_tx_ring *tx_ring, u64 pkts, u64 bytes) { u64_stats_update_begin(&tx_ring->syncp); - ice_update_ring_stats(tx_ring, pkts, bytes); + ice_update_ring_stats(&tx_ring->stats, pkts, bytes); u64_stats_update_end(&tx_ring->syncp); } @@ -3427,10 +3443,10 @@ void ice_update_tx_ring_stats(struct ice_ring *tx_ring, u64 pkts, u64 bytes) * @pkts: number of processed packets * @bytes: number of processed bytes */ -void ice_update_rx_ring_stats(struct ice_ring *rx_ring, u64 pkts, u64 bytes) +void ice_update_rx_ring_stats(struct ice_rx_ring *rx_ring, u64 pkts, u64 bytes) { u64_stats_update_begin(&rx_ring->syncp); - ice_update_ring_stats(rx_ring, pkts, bytes); + ice_update_ring_stats(&rx_ring->stats, pkts, bytes); u64_stats_update_end(&rx_ring->syncp); } diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h index 193f96305407..b6c429c5875d 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_lib.h @@ -14,7 +14,7 @@ void ice_update_eth_stats(struct ice_vsi *vsi); int ice_vsi_cfg_single_rxq(struct ice_vsi *vsi, u16 q_idx); -int ice_vsi_cfg_single_txq(struct ice_vsi *vsi, struct ice_ring **tx_rings, u16 q_idx); +int ice_vsi_cfg_single_txq(struct ice_vsi *vsi, struct ice_tx_ring **tx_rings, u16 q_idx); int ice_vsi_cfg_rxqs(struct ice_vsi *vsi); @@ -93,9 +93,9 @@ void ice_vsi_free_tx_rings(struct ice_vsi *vsi); void ice_vsi_manage_rss_lut(struct ice_vsi *vsi, bool ena); -void ice_update_tx_ring_stats(struct ice_ring *ring, u64 pkts, u64 bytes); +void ice_update_tx_ring_stats(struct ice_tx_ring *ring, u64 pkts, u64 bytes); -void ice_update_rx_ring_stats(struct ice_ring *ring, u64 pkts, u64 bytes); +void ice_update_rx_ring_stats(struct ice_rx_ring *ring, u64 pkts, u64 bytes); void ice_vsi_cfg_frame_size(struct ice_vsi *vsi); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index da91d9f9128c..e4c12a87785c 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -62,7 +62,7 @@ bool netif_is_ice(struct net_device *dev) * ice_get_tx_pending - returns number of Tx descriptors not processed * @ring: the ring of descriptors */ -static u16 ice_get_tx_pending(struct ice_ring *ring) +static u16 ice_get_tx_pending(struct ice_tx_ring *ring) { u16 head, tail; @@ -102,7 +102,7 @@ static void ice_check_for_hang_subtask(struct ice_pf *pf) hw = &vsi->back->hw; for (i = 0; i < vsi->num_txq; i++) { - struct ice_ring *tx_ring = vsi->tx_rings[i]; + struct ice_tx_ring *tx_ring = vsi->tx_rings[i]; if (tx_ring && tx_ring->desc) { /* If packet counter has not changed the queue is @@ -2307,14 +2307,14 @@ static int ice_vsi_req_irq_msix(struct ice_vsi *vsi, char *basename) irq_num = pf->msix_entries[base + vector].vector; - if (q_vector->tx.ring && q_vector->rx.ring) { + if (q_vector->tx.tx_ring && q_vector->rx.rx_ring) { snprintf(q_vector->name, sizeof(q_vector->name) - 1, "%s-%s-%d", basename, "TxRx", rx_int_idx++); tx_int_idx++; - } else if (q_vector->rx.ring) { + } else if (q_vector->rx.rx_ring) { snprintf(q_vector->name, sizeof(q_vector->name) - 1, "%s-%s-%d", basename, "rx", rx_int_idx++); - } else if (q_vector->tx.ring) { + } else if (q_vector->tx.tx_ring) { snprintf(q_vector->name, sizeof(q_vector->name) - 1, "%s-%s-%d", basename, "tx", tx_int_idx++); } else { @@ -2376,7 +2376,7 @@ static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi) for (i = 0; i < vsi->num_xdp_txq; i++) { u16 xdp_q_idx = vsi->alloc_txq + i; - struct ice_ring *xdp_ring; + struct ice_tx_ring *xdp_ring; xdp_ring = kzalloc(sizeof(*xdp_ring), GFP_KERNEL); @@ -2393,7 +2393,7 @@ static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi) if (ice_setup_tx_ring(xdp_ring)) goto free_xdp_rings; ice_set_ring_xdp(xdp_ring); - xdp_ring->xsk_pool = ice_xsk_pool(xdp_ring); + xdp_ring->xsk_pool = ice_tx_xsk_pool(xdp_ring); } return 0; @@ -2472,11 +2472,11 @@ int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog) q_base = vsi->num_xdp_txq - xdp_rings_rem; for (q_id = q_base; q_id < (q_base + xdp_rings_per_v); q_id++) { - struct ice_ring *xdp_ring = vsi->xdp_rings[q_id]; + struct ice_tx_ring *xdp_ring = vsi->xdp_rings[q_id]; xdp_ring->q_vector = q_vector; - xdp_ring->next = q_vector->tx.ring; - q_vector->tx.ring = xdp_ring; + xdp_ring->next = q_vector->tx.tx_ring; + q_vector->tx.tx_ring = xdp_ring; } xdp_rings_rem -= xdp_rings_per_v; } @@ -2546,14 +2546,14 @@ int ice_destroy_xdp_rings(struct ice_vsi *vsi) ice_for_each_q_vector(vsi, v_idx) { struct ice_q_vector *q_vector = vsi->q_vectors[v_idx]; - struct ice_ring *ring; + struct ice_tx_ring *ring; - ice_for_each_ring(ring, q_vector->tx) + ice_for_each_tx_ring(ring, q_vector->tx) if (!ring->tx_buf || !ice_ring_is_xdp(ring)) break; /* restore the value of last node prior to XDP setup */ - q_vector->tx.ring = ring; + q_vector->tx.tx_ring = ring; } free_qmap: @@ -2602,7 +2602,7 @@ static void ice_vsi_rx_napi_schedule(struct ice_vsi *vsi) int i; ice_for_each_rxq(vsi, i) { - struct ice_ring *rx_ring = vsi->rx_rings[i]; + struct ice_rx_ring *rx_ring = vsi->rx_rings[i]; if (rx_ring->xsk_pool) napi_schedule(&rx_ring->q_vector->napi); @@ -5571,7 +5571,7 @@ static void ice_napi_enable_all(struct ice_vsi *vsi) INIT_WORK(&q_vector->rx.dim.work, ice_rx_dim_work); q_vector->rx.dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; - if (q_vector->rx.ring || q_vector->tx.ring) + if (q_vector->rx.rx_ring || q_vector->tx.tx_ring) napi_enable(&q_vector->napi); } } @@ -5631,7 +5631,8 @@ int ice_up(struct ice_vsi *vsi) /** * ice_fetch_u64_stats_per_ring - get packets and bytes stats per ring - * @ring: Tx or Rx ring to read stats from + * @syncp: pointer to u64_stats_sync + * @stats: stats that pkts and bytes count will be taken from * @pkts: packets stats counter * @bytes: bytes stats counter * @@ -5639,19 +5640,16 @@ int ice_up(struct ice_vsi *vsi) * that needs to be performed to read u64 values in 32 bit machine. */ static void -ice_fetch_u64_stats_per_ring(struct ice_ring *ring, u64 *pkts, u64 *bytes) +ice_fetch_u64_stats_per_ring(struct u64_stats_sync *syncp, struct ice_q_stats stats, + u64 *pkts, u64 *bytes) { unsigned int start; - *pkts = 0; - *bytes = 0; - if (!ring) - return; do { - start = u64_stats_fetch_begin_irq(&ring->syncp); - *pkts = ring->stats.pkts; - *bytes = ring->stats.bytes; - } while (u64_stats_fetch_retry_irq(&ring->syncp, start)); + start = u64_stats_fetch_begin_irq(syncp); + *pkts = stats.pkts; + *bytes = stats.bytes; + } while (u64_stats_fetch_retry_irq(syncp, start)); } /** @@ -5661,18 +5659,19 @@ ice_fetch_u64_stats_per_ring(struct ice_ring *ring, u64 *pkts, u64 *bytes) * @count: number of rings */ static void -ice_update_vsi_tx_ring_stats(struct ice_vsi *vsi, struct ice_ring **rings, +ice_update_vsi_tx_ring_stats(struct ice_vsi *vsi, struct ice_tx_ring **rings, u16 count) { struct rtnl_link_stats64 *vsi_stats = &vsi->net_stats; u16 i; for (i = 0; i < count; i++) { - struct ice_ring *ring; - u64 pkts, bytes; + struct ice_tx_ring *ring; + u64 pkts = 0, bytes = 0; ring = READ_ONCE(rings[i]); - ice_fetch_u64_stats_per_ring(ring, &pkts, &bytes); + if (ring) + ice_fetch_u64_stats_per_ring(&ring->syncp, ring->stats, &pkts, &bytes); vsi_stats->tx_packets += pkts; vsi_stats->tx_bytes += bytes; vsi->tx_restart += ring->tx_stats.restart_q; @@ -5711,9 +5710,9 @@ static void ice_update_vsi_ring_stats(struct ice_vsi *vsi) /* update Rx rings counters */ ice_for_each_rxq(vsi, i) { - struct ice_ring *ring = READ_ONCE(vsi->rx_rings[i]); + struct ice_rx_ring *ring = READ_ONCE(vsi->rx_rings[i]); - ice_fetch_u64_stats_per_ring(ring, &pkts, &bytes); + ice_fetch_u64_stats_per_ring(&ring->syncp, ring->stats, &pkts, &bytes); vsi_stats->rx_packets += pkts; vsi_stats->rx_bytes += bytes; vsi->rx_buf_failed += ring->rx_stats.alloc_buf_failed; @@ -5977,7 +5976,7 @@ static void ice_napi_disable_all(struct ice_vsi *vsi) ice_for_each_q_vector(vsi, q_idx) { struct ice_q_vector *q_vector = vsi->q_vectors[q_idx]; - if (q_vector->rx.ring || q_vector->tx.ring) + if (q_vector->rx.rx_ring || q_vector->tx.tx_ring) napi_disable(&q_vector->napi); cancel_work_sync(&q_vector->tx.dim.work); @@ -6062,7 +6061,7 @@ int ice_vsi_setup_tx_rings(struct ice_vsi *vsi) } ice_for_each_txq(vsi, i) { - struct ice_ring *ring = vsi->tx_rings[i]; + struct ice_tx_ring *ring = vsi->tx_rings[i]; if (!ring) return -EINVAL; @@ -6094,7 +6093,7 @@ int ice_vsi_setup_rx_rings(struct ice_vsi *vsi) } ice_for_each_rxq(vsi, i) { - struct ice_ring *ring = vsi->rx_rings[i]; + struct ice_rx_ring *ring = vsi->rx_rings[i]; if (!ring) return -EINVAL; @@ -6998,7 +6997,7 @@ ice_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh, static void ice_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct ice_netdev_priv *np = netdev_priv(netdev); - struct ice_ring *tx_ring = NULL; + struct ice_tx_ring *tx_ring = NULL; struct ice_vsi *vsi = np->vsi; struct ice_pf *pf = vsi->back; u32 i; diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index f3884dc5524e..a1be0d04a2d0 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -1287,7 +1287,7 @@ int ice_ptp_set_ts_config(struct ice_pf *pf, struct ifreq *ifr) * The timestamp is in ns, so we must convert the result first. */ void -ice_ptp_rx_hwtstamp(struct ice_ring *rx_ring, +ice_ptp_rx_hwtstamp(struct ice_rx_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc, struct sk_buff *skb) { u32 ts_high; diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.h b/drivers/net/ethernet/intel/ice/ice_ptp.h index 1b9aab7de319..f71ad317d6c8 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.h +++ b/drivers/net/ethernet/intel/ice/ice_ptp.h @@ -180,7 +180,7 @@ s8 ice_ptp_request_ts(struct ice_ptp_tx *tx, struct sk_buff *skb); void ice_ptp_process_ts(struct ice_pf *pf); void -ice_ptp_rx_hwtstamp(struct ice_ring *rx_ring, +ice_ptp_rx_hwtstamp(struct ice_rx_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc, struct sk_buff *skb); void ice_ptp_init(struct ice_pf *pf); void ice_ptp_release(struct ice_pf *pf); @@ -208,7 +208,7 @@ ice_ptp_request_ts(struct ice_ptp_tx *tx, struct sk_buff *skb) static inline void ice_ptp_process_ts(struct ice_pf *pf) { } static inline void -ice_ptp_rx_hwtstamp(struct ice_ring *rx_ring, +ice_ptp_rx_hwtstamp(struct ice_rx_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc, struct sk_buff *skb) { } static inline void ice_ptp_init(struct ice_pf *pf) { } static inline void ice_ptp_release(struct ice_pf *pf) { } diff --git a/drivers/net/ethernet/intel/ice/ice_trace.h b/drivers/net/ethernet/intel/ice/ice_trace.h index 9bc0b8fdfc77..cf685247c07a 100644 --- a/drivers/net/ethernet/intel/ice/ice_trace.h +++ b/drivers/net/ethernet/intel/ice/ice_trace.h @@ -64,15 +64,15 @@ DECLARE_EVENT_CLASS(ice_rx_dim_template, TP_ARGS(q_vector, dim), TP_STRUCT__entry(__field(struct ice_q_vector *, q_vector) __field(struct dim *, dim) - __string(devname, q_vector->rx.ring->netdev->name)), + __string(devname, q_vector->rx.rx_ring->netdev->name)), TP_fast_assign(__entry->q_vector = q_vector; __entry->dim = dim; - __assign_str(devname, q_vector->rx.ring->netdev->name);), + __assign_str(devname, q_vector->rx.rx_ring->netdev->name);), TP_printk("netdev: %s Rx-Q: %d dim-state: %d dim-profile: %d dim-tune: %d dim-st-right: %d dim-st-left: %d dim-tired: %d", __get_str(devname), - __entry->q_vector->rx.ring->q_index, + __entry->q_vector->rx.rx_ring->q_index, __entry->dim->state, __entry->dim->profile_ix, __entry->dim->tune_state, @@ -91,15 +91,15 @@ DECLARE_EVENT_CLASS(ice_tx_dim_template, TP_ARGS(q_vector, dim), TP_STRUCT__entry(__field(struct ice_q_vector *, q_vector) __field(struct dim *, dim) - __string(devname, q_vector->tx.ring->netdev->name)), + __string(devname, q_vector->tx.tx_ring->netdev->name)), TP_fast_assign(__entry->q_vector = q_vector; __entry->dim = dim; - __assign_str(devname, q_vector->tx.ring->netdev->name);), + __assign_str(devname, q_vector->tx.tx_ring->netdev->name);), TP_printk("netdev: %s Tx-Q: %d dim-state: %d dim-profile: %d dim-tune: %d dim-st-right: %d dim-st-left: %d dim-tired: %d", __get_str(devname), - __entry->q_vector->tx.ring->q_index, + __entry->q_vector->tx.tx_ring->q_index, __entry->dim->state, __entry->dim->profile_ix, __entry->dim->tune_state, @@ -115,7 +115,7 @@ DEFINE_EVENT(ice_tx_dim_template, ice_tx_dim_work, /* Events related to a vsi & ring */ DECLARE_EVENT_CLASS(ice_tx_template, - TP_PROTO(struct ice_ring *ring, struct ice_tx_desc *desc, + TP_PROTO(struct ice_tx_ring *ring, struct ice_tx_desc *desc, struct ice_tx_buf *buf), TP_ARGS(ring, desc, buf), @@ -135,7 +135,7 @@ DECLARE_EVENT_CLASS(ice_tx_template, #define DEFINE_TX_TEMPLATE_OP_EVENT(name) \ DEFINE_EVENT(ice_tx_template, name, \ - TP_PROTO(struct ice_ring *ring, \ + TP_PROTO(struct ice_tx_ring *ring, \ struct ice_tx_desc *desc, \ struct ice_tx_buf *buf), \ TP_ARGS(ring, desc, buf)) @@ -145,7 +145,7 @@ DEFINE_TX_TEMPLATE_OP_EVENT(ice_clean_tx_irq_unmap); DEFINE_TX_TEMPLATE_OP_EVENT(ice_clean_tx_irq_unmap_eop); DECLARE_EVENT_CLASS(ice_rx_template, - TP_PROTO(struct ice_ring *ring, union ice_32b_rx_flex_desc *desc), + TP_PROTO(struct ice_rx_ring *ring, union ice_32b_rx_flex_desc *desc), TP_ARGS(ring, desc), @@ -161,12 +161,12 @@ DECLARE_EVENT_CLASS(ice_rx_template, __entry->ring, __entry->desc) ); DEFINE_EVENT(ice_rx_template, ice_clean_rx_irq, - TP_PROTO(struct ice_ring *ring, union ice_32b_rx_flex_desc *desc), + TP_PROTO(struct ice_rx_ring *ring, union ice_32b_rx_flex_desc *desc), TP_ARGS(ring, desc) ); DECLARE_EVENT_CLASS(ice_rx_indicate_template, - TP_PROTO(struct ice_ring *ring, union ice_32b_rx_flex_desc *desc, + TP_PROTO(struct ice_rx_ring *ring, union ice_32b_rx_flex_desc *desc, struct sk_buff *skb), TP_ARGS(ring, desc, skb), @@ -186,13 +186,13 @@ DECLARE_EVENT_CLASS(ice_rx_indicate_template, ); DEFINE_EVENT(ice_rx_indicate_template, ice_clean_rx_irq_indicate, - TP_PROTO(struct ice_ring *ring, union ice_32b_rx_flex_desc *desc, + TP_PROTO(struct ice_rx_ring *ring, union ice_32b_rx_flex_desc *desc, struct sk_buff *skb), TP_ARGS(ring, desc, skb) ); DECLARE_EVENT_CLASS(ice_xmit_template, - TP_PROTO(struct ice_ring *ring, struct sk_buff *skb), + TP_PROTO(struct ice_tx_ring *ring, struct sk_buff *skb), TP_ARGS(ring, skb), @@ -210,7 +210,7 @@ DECLARE_EVENT_CLASS(ice_xmit_template, #define DEFINE_XMIT_TEMPLATE_OP_EVENT(name) \ DEFINE_EVENT(ice_xmit_template, name, \ - TP_PROTO(struct ice_ring *ring, struct sk_buff *skb), \ + TP_PROTO(struct ice_tx_ring *ring, struct sk_buff *skb), \ TP_ARGS(ring, skb)) DEFINE_XMIT_TEMPLATE_OP_EVENT(ice_xmit_frame_ring); diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 4da9420df1b0..b031f754c5cb 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -34,7 +34,7 @@ ice_prgm_fdir_fltr(struct ice_vsi *vsi, struct ice_fltr_desc *fdir_desc, struct ice_tx_buf *tx_buf, *first; struct ice_fltr_desc *f_desc; struct ice_tx_desc *tx_desc; - struct ice_ring *tx_ring; + struct ice_tx_ring *tx_ring; struct device *dev; dma_addr_t dma; u32 td_cmd; @@ -108,7 +108,7 @@ ice_prgm_fdir_fltr(struct ice_vsi *vsi, struct ice_fltr_desc *fdir_desc, * @tx_buf: the buffer to free */ static void -ice_unmap_and_free_tx_buf(struct ice_ring *ring, struct ice_tx_buf *tx_buf) +ice_unmap_and_free_tx_buf(struct ice_tx_ring *ring, struct ice_tx_buf *tx_buf) { if (tx_buf->skb) { if (tx_buf->tx_flags & ICE_TX_FLAGS_DUMMY_PKT) @@ -135,7 +135,7 @@ ice_unmap_and_free_tx_buf(struct ice_ring *ring, struct ice_tx_buf *tx_buf) /* tx_buf must be completely set up in the transmit path */ } -static struct netdev_queue *txring_txq(const struct ice_ring *ring) +static struct netdev_queue *txring_txq(const struct ice_tx_ring *ring) { return netdev_get_tx_queue(ring->netdev, ring->q_index); } @@ -144,8 +144,9 @@ static struct netdev_queue *txring_txq(const struct ice_ring *ring) * ice_clean_tx_ring - Free any empty Tx buffers * @tx_ring: ring to be cleaned */ -void ice_clean_tx_ring(struct ice_ring *tx_ring) +void ice_clean_tx_ring(struct ice_tx_ring *tx_ring) { + u32 size; u16 i; if (ice_ring_is_xdp(tx_ring) && tx_ring->xsk_pool) { @@ -164,8 +165,10 @@ void ice_clean_tx_ring(struct ice_ring *tx_ring) tx_skip_free: memset(tx_ring->tx_buf, 0, sizeof(*tx_ring->tx_buf) * tx_ring->count); + size = ALIGN(tx_ring->count * sizeof(struct ice_tx_desc), + PAGE_SIZE); /* Zero out the descriptor ring */ - memset(tx_ring->desc, 0, tx_ring->size); + memset(tx_ring->desc, 0, size); tx_ring->next_to_use = 0; tx_ring->next_to_clean = 0; @@ -183,14 +186,18 @@ tx_skip_free: * * Free all transmit software resources */ -void ice_free_tx_ring(struct ice_ring *tx_ring) +void ice_free_tx_ring(struct ice_tx_ring *tx_ring) { + u32 size; + ice_clean_tx_ring(tx_ring); devm_kfree(tx_ring->dev, tx_ring->tx_buf); tx_ring->tx_buf = NULL; if (tx_ring->desc) { - dmam_free_coherent(tx_ring->dev, tx_ring->size, + size = ALIGN(tx_ring->count * sizeof(struct ice_tx_desc), + PAGE_SIZE); + dmam_free_coherent(tx_ring->dev, size, tx_ring->desc, tx_ring->dma); tx_ring->desc = NULL; } @@ -203,7 +210,7 @@ void ice_free_tx_ring(struct ice_ring *tx_ring) * * Returns true if there's any budget left (e.g. the clean is finished) */ -static bool ice_clean_tx_irq(struct ice_ring *tx_ring, int napi_budget) +static bool ice_clean_tx_irq(struct ice_tx_ring *tx_ring, int napi_budget) { unsigned int total_bytes = 0, total_pkts = 0; unsigned int budget = ICE_DFLT_IRQ_WORK; @@ -331,9 +338,10 @@ static bool ice_clean_tx_irq(struct ice_ring *tx_ring, int napi_budget) * * Return 0 on success, negative on error */ -int ice_setup_tx_ring(struct ice_ring *tx_ring) +int ice_setup_tx_ring(struct ice_tx_ring *tx_ring) { struct device *dev = tx_ring->dev; + u32 size; if (!dev) return -ENOMEM; @@ -347,13 +355,13 @@ int ice_setup_tx_ring(struct ice_ring *tx_ring) return -ENOMEM; /* round up to nearest page */ - tx_ring->size = ALIGN(tx_ring->count * sizeof(struct ice_tx_desc), - PAGE_SIZE); - tx_ring->desc = dmam_alloc_coherent(dev, tx_ring->size, &tx_ring->dma, + size = ALIGN(tx_ring->count * sizeof(struct ice_tx_desc), + PAGE_SIZE); + tx_ring->desc = dmam_alloc_coherent(dev, size, &tx_ring->dma, GFP_KERNEL); if (!tx_ring->desc) { dev_err(dev, "Unable to allocate memory for the Tx descriptor ring, size=%d\n", - tx_ring->size); + size); goto err; } @@ -372,9 +380,10 @@ err: * ice_clean_rx_ring - Free Rx buffers * @rx_ring: ring to be cleaned */ -void ice_clean_rx_ring(struct ice_ring *rx_ring) +void ice_clean_rx_ring(struct ice_rx_ring *rx_ring) { struct device *dev = rx_ring->dev; + u32 size; u16 i; /* ring already cleared, nothing to do */ @@ -419,7 +428,9 @@ rx_skip_free: memset(rx_ring->rx_buf, 0, sizeof(*rx_ring->rx_buf) * rx_ring->count); /* Zero out the descriptor ring */ - memset(rx_ring->desc, 0, rx_ring->size); + size = ALIGN(rx_ring->count * sizeof(union ice_32byte_rx_desc), + PAGE_SIZE); + memset(rx_ring->desc, 0, size); rx_ring->next_to_alloc = 0; rx_ring->next_to_clean = 0; @@ -432,8 +443,10 @@ rx_skip_free: * * Free all receive software resources */ -void ice_free_rx_ring(struct ice_ring *rx_ring) +void ice_free_rx_ring(struct ice_rx_ring *rx_ring) { + u32 size; + ice_clean_rx_ring(rx_ring); if (rx_ring->vsi->type == ICE_VSI_PF) if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq)) @@ -443,7 +456,9 @@ void ice_free_rx_ring(struct ice_ring *rx_ring) rx_ring->rx_buf = NULL; if (rx_ring->desc) { - dmam_free_coherent(rx_ring->dev, rx_ring->size, + size = ALIGN(rx_ring->count * sizeof(union ice_32byte_rx_desc), + PAGE_SIZE); + dmam_free_coherent(rx_ring->dev, size, rx_ring->desc, rx_ring->dma); rx_ring->desc = NULL; } @@ -455,9 +470,10 @@ void ice_free_rx_ring(struct ice_ring *rx_ring) * * Return 0 on success, negative on error */ -int ice_setup_rx_ring(struct ice_ring *rx_ring) +int ice_setup_rx_ring(struct ice_rx_ring *rx_ring) { struct device *dev = rx_ring->dev; + u32 size; if (!dev) return -ENOMEM; @@ -471,13 +487,13 @@ int ice_setup_rx_ring(struct ice_ring *rx_ring) return -ENOMEM; /* round up to nearest page */ - rx_ring->size = ALIGN(rx_ring->count * sizeof(union ice_32byte_rx_desc), - PAGE_SIZE); - rx_ring->desc = dmam_alloc_coherent(dev, rx_ring->size, &rx_ring->dma, + size = ALIGN(rx_ring->count * sizeof(union ice_32byte_rx_desc), + PAGE_SIZE); + rx_ring->desc = dmam_alloc_coherent(dev, size, &rx_ring->dma, GFP_KERNEL); if (!rx_ring->desc) { dev_err(dev, "Unable to allocate memory for the Rx descriptor ring, size=%d\n", - rx_ring->size); + size); goto err; } @@ -501,7 +517,7 @@ err: } static unsigned int -ice_rx_frame_truesize(struct ice_ring *rx_ring, unsigned int __maybe_unused size) +ice_rx_frame_truesize(struct ice_rx_ring *rx_ring, unsigned int __maybe_unused size) { unsigned int truesize; @@ -525,10 +541,10 @@ ice_rx_frame_truesize(struct ice_ring *rx_ring, unsigned int __maybe_unused size * Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR} */ static int -ice_run_xdp(struct ice_ring *rx_ring, struct xdp_buff *xdp, +ice_run_xdp(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, struct bpf_prog *xdp_prog) { - struct ice_ring *xdp_ring; + struct ice_tx_ring *xdp_ring; int err, result; u32 act; @@ -578,7 +594,7 @@ ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, struct ice_netdev_priv *np = netdev_priv(dev); unsigned int queue_index = smp_processor_id(); struct ice_vsi *vsi = np->vsi; - struct ice_ring *xdp_ring; + struct ice_tx_ring *xdp_ring; int nxmit = 0, i; if (test_bit(ICE_VSI_DOWN, vsi->state)) @@ -616,7 +632,7 @@ ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, * reused. */ static bool -ice_alloc_mapped_page(struct ice_ring *rx_ring, struct ice_rx_buf *bi) +ice_alloc_mapped_page(struct ice_rx_ring *rx_ring, struct ice_rx_buf *bi) { struct page *page = bi->page; dma_addr_t dma; @@ -667,7 +683,7 @@ ice_alloc_mapped_page(struct ice_ring *rx_ring, struct ice_rx_buf *bi) * buffers. Then bump tail at most one time. Grouping like this lets us avoid * multiple tail writes per call. */ -bool ice_alloc_rx_bufs(struct ice_ring *rx_ring, u16 cleaned_count) +bool ice_alloc_rx_bufs(struct ice_rx_ring *rx_ring, u16 cleaned_count) { union ice_32b_rx_flex_desc *rx_desc; u16 ntu = rx_ring->next_to_use; @@ -796,7 +812,7 @@ ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf, int rx_buf_pgcnt) * The function will then update the page offset. */ static void -ice_add_rx_frag(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf, +ice_add_rx_frag(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf, struct sk_buff *skb, unsigned int size) { #if (PAGE_SIZE >= 8192) @@ -822,7 +838,7 @@ ice_add_rx_frag(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf, * Synchronizes page for reuse by the adapter */ static void -ice_reuse_rx_page(struct ice_ring *rx_ring, struct ice_rx_buf *old_buf) +ice_reuse_rx_page(struct ice_rx_ring *rx_ring, struct ice_rx_buf *old_buf) { u16 nta = rx_ring->next_to_alloc; struct ice_rx_buf *new_buf; @@ -853,7 +869,7 @@ ice_reuse_rx_page(struct ice_ring *rx_ring, struct ice_rx_buf *old_buf) * for use by the CPU. */ static struct ice_rx_buf * -ice_get_rx_buf(struct ice_ring *rx_ring, const unsigned int size, +ice_get_rx_buf(struct ice_rx_ring *rx_ring, const unsigned int size, int *rx_buf_pgcnt) { struct ice_rx_buf *rx_buf; @@ -890,7 +906,7 @@ ice_get_rx_buf(struct ice_ring *rx_ring, const unsigned int size, * to set up the skb correctly and avoid any memcpy overhead. */ static struct sk_buff * -ice_build_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf, +ice_build_skb(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf, struct xdp_buff *xdp) { u8 metasize = xdp->data - xdp->data_meta; @@ -942,7 +958,7 @@ ice_build_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf, * skb correctly. */ static struct sk_buff * -ice_construct_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf, +ice_construct_skb(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf, struct xdp_buff *xdp) { unsigned int size = xdp->data_end - xdp->data; @@ -1002,7 +1018,7 @@ ice_construct_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf, * the associated resources. */ static void -ice_put_rx_buf(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf, +ice_put_rx_buf(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf, int rx_buf_pgcnt) { u16 ntc = rx_ring->next_to_clean + 1; @@ -1038,7 +1054,7 @@ ice_put_rx_buf(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf, * otherwise return true indicating that this is in fact a non-EOP buffer. */ static bool -ice_is_non_eop(struct ice_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc) +ice_is_non_eop(struct ice_rx_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc) { /* if we are the last buffer then there is nothing else to do */ #define ICE_RXD_EOF BIT(ICE_RX_FLEX_DESC_STATUS0_EOF_S) @@ -1062,7 +1078,7 @@ ice_is_non_eop(struct ice_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc) * * Returns amount of work completed */ -int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget) +int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) { unsigned int total_rx_bytes = 0, total_rx_pkts = 0, frame_sz = 0; u16 cleaned_count = ICE_DESC_UNUSED(rx_ring); @@ -1249,9 +1265,9 @@ static void ice_net_dim(struct ice_q_vector *q_vector) if (ITR_IS_DYNAMIC(tx)) { struct dim_sample dim_sample = {}; u64 packets = 0, bytes = 0; - struct ice_ring *ring; + struct ice_tx_ring *ring; - ice_for_each_ring(ring, q_vector->tx) { + ice_for_each_tx_ring(ring, q_vector->tx) { packets += ring->stats.pkts; bytes += ring->stats.bytes; } @@ -1265,9 +1281,9 @@ static void ice_net_dim(struct ice_q_vector *q_vector) if (ITR_IS_DYNAMIC(rx)) { struct dim_sample dim_sample = {}; u64 packets = 0, bytes = 0; - struct ice_ring *ring; + struct ice_rx_ring *ring; - ice_for_each_ring(ring, q_vector->rx) { + ice_for_each_rx_ring(ring, q_vector->rx) { packets += ring->stats.pkts; bytes += ring->stats.bytes; } @@ -1389,18 +1405,19 @@ int ice_napi_poll(struct napi_struct *napi, int budget) { struct ice_q_vector *q_vector = container_of(napi, struct ice_q_vector, napi); + struct ice_tx_ring *tx_ring; + struct ice_rx_ring *rx_ring; bool clean_complete = true; - struct ice_ring *ring; int budget_per_ring; int work_done = 0; /* Since the actual Tx work is minimal, we can give the Tx a larger * budget and be more aggressive about cleaning up the Tx descriptors. */ - ice_for_each_ring(ring, q_vector->tx) { - bool wd = ring->xsk_pool ? - ice_clean_tx_irq_zc(ring, budget) : - ice_clean_tx_irq(ring, budget); + ice_for_each_tx_ring(tx_ring, q_vector->tx) { + bool wd = tx_ring->xsk_pool ? + ice_clean_tx_irq_zc(tx_ring, budget) : + ice_clean_tx_irq(tx_ring, budget); if (!wd) clean_complete = false; @@ -1421,16 +1438,16 @@ int ice_napi_poll(struct napi_struct *napi, int budget) /* Max of 1 Rx ring in this q_vector so give it the budget */ budget_per_ring = budget; - ice_for_each_ring(ring, q_vector->rx) { + ice_for_each_rx_ring(rx_ring, q_vector->rx) { int cleaned; /* A dedicated path for zero-copy allows making a single * comparison in the irq context instead of many inside the * ice_clean_rx_irq function and makes the codebase cleaner. */ - cleaned = ring->xsk_pool ? - ice_clean_rx_irq_zc(ring, budget_per_ring) : - ice_clean_rx_irq(ring, budget_per_ring); + cleaned = rx_ring->xsk_pool ? + ice_clean_rx_irq_zc(rx_ring, budget_per_ring) : + ice_clean_rx_irq(rx_ring, budget_per_ring); work_done += cleaned; /* if we clean as many as budgeted, we must not be done */ if (cleaned >= budget_per_ring) @@ -1464,7 +1481,7 @@ int ice_napi_poll(struct napi_struct *napi, int budget) * * Returns -EBUSY if a stop is needed, else 0 */ -static int __ice_maybe_stop_tx(struct ice_ring *tx_ring, unsigned int size) +static int __ice_maybe_stop_tx(struct ice_tx_ring *tx_ring, unsigned int size) { netif_stop_subqueue(tx_ring->netdev, tx_ring->q_index); /* Memory barrier before checking head and tail */ @@ -1487,7 +1504,7 @@ static int __ice_maybe_stop_tx(struct ice_ring *tx_ring, unsigned int size) * * Returns 0 if stop is not needed */ -static int ice_maybe_stop_tx(struct ice_ring *tx_ring, unsigned int size) +static int ice_maybe_stop_tx(struct ice_tx_ring *tx_ring, unsigned int size) { if (likely(ICE_DESC_UNUSED(tx_ring) >= size)) return 0; @@ -1506,7 +1523,7 @@ static int ice_maybe_stop_tx(struct ice_ring *tx_ring, unsigned int size) * it and the length into the transmit descriptor. */ static void -ice_tx_map(struct ice_ring *tx_ring, struct ice_tx_buf *first, +ice_tx_map(struct ice_tx_ring *tx_ring, struct ice_tx_buf *first, struct ice_tx_offload_params *off) { u64 td_offset, td_tag, td_cmd; @@ -1842,7 +1859,7 @@ int ice_tx_csum(struct ice_tx_buf *first, struct ice_tx_offload_params *off) * related to VLAN tagging for the HW, such as VLAN, DCB, etc. */ static void -ice_tx_prepare_vlan_flags(struct ice_ring *tx_ring, struct ice_tx_buf *first) +ice_tx_prepare_vlan_flags(struct ice_tx_ring *tx_ring, struct ice_tx_buf *first) { struct sk_buff *skb = first->skb; @@ -2148,7 +2165,7 @@ static bool ice_chk_linearize(struct sk_buff *skb, unsigned int count) * @off: Tx offload parameters */ static void -ice_tstamp(struct ice_ring *tx_ring, struct sk_buff *skb, +ice_tstamp(struct ice_tx_ring *tx_ring, struct sk_buff *skb, struct ice_tx_buf *first, struct ice_tx_offload_params *off) { s8 idx; @@ -2183,7 +2200,7 @@ ice_tstamp(struct ice_ring *tx_ring, struct sk_buff *skb, * Returns NETDEV_TX_OK if sent, else an error code */ static netdev_tx_t -ice_xmit_frame_ring(struct sk_buff *skb, struct ice_ring *tx_ring) +ice_xmit_frame_ring(struct sk_buff *skb, struct ice_tx_ring *tx_ring) { struct ice_tx_offload_params offload = { 0 }; struct ice_vsi *vsi = tx_ring->vsi; @@ -2286,7 +2303,7 @@ netdev_tx_t ice_start_xmit(struct sk_buff *skb, struct net_device *netdev) { struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_vsi *vsi = np->vsi; - struct ice_ring *tx_ring; + struct ice_tx_ring *tx_ring; tx_ring = vsi->tx_rings[skb->queue_mapping]; @@ -2336,7 +2353,7 @@ ice_select_queue(struct net_device *netdev, struct sk_buff *skb, * ice_clean_ctrl_tx_irq - interrupt handler for flow director Tx queue * @tx_ring: tx_ring to clean */ -void ice_clean_ctrl_tx_irq(struct ice_ring *tx_ring) +void ice_clean_ctrl_tx_irq(struct ice_tx_ring *tx_ring) { struct ice_vsi *vsi = tx_ring->vsi; s16 i = tx_ring->next_to_clean; diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index bf40608f9bac..dcbdfe649a09 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -154,7 +154,7 @@ struct ice_tx_buf { struct ice_tx_offload_params { u64 cd_qw1; - struct ice_ring *tx_ring; + struct ice_tx_ring *tx_ring; u32 td_cmd; u32 td_offset; u32 td_l2tag1; @@ -251,9 +251,9 @@ enum ice_dynamic_itr { #define ICE_TX_LEGACY 1 /* descriptor ring, associated with a VSI */ -struct ice_ring { +struct ice_rx_ring { /* CL1 - 1st cacheline starts here */ - struct ice_ring *next; /* pointer to next ring in q_vector */ + struct ice_rx_ring *next; /* pointer to next ring in q_vector */ void *desc; /* Descriptor ring memory */ struct device *dev; /* Used for DMA mapping */ struct net_device *netdev; /* netdev ring maps to */ @@ -261,13 +261,13 @@ struct ice_ring { struct ice_q_vector *q_vector; /* Backreference to associated vector */ u8 __iomem *tail; union { - struct ice_tx_buf *tx_buf; struct ice_rx_buf *rx_buf; struct xdp_buff **xdp_buf; }; /* CL2 - 2nd cacheline starts here */ + struct xdp_rxq_info xdp_rxq; + /* CL3 - 3rd cacheline starts here */ u16 q_index; /* Queue number of ring */ - u16 q_handle; /* Queue handle per TC */ u16 count; /* Number of descriptors */ u16 reg_idx; /* HW register index of the ring */ @@ -276,56 +276,79 @@ struct ice_ring { u16 next_to_use; u16 next_to_clean; u16 next_to_alloc; + u16 rx_offset; + u16 rx_buf_len; + + /* stats structs */ + struct ice_rxq_stats rx_stats; + struct ice_q_stats stats; + struct u64_stats_sync syncp; + + struct rcu_head rcu; /* to avoid race on free */ + /* CL4 - 3rd cacheline starts here */ + struct bpf_prog *xdp_prog; + struct xsk_buff_pool *xsk_pool; + struct sk_buff *skb; + dma_addr_t dma; /* physical address of ring */ +#define ICE_RX_FLAGS_RING_BUILD_SKB BIT(1) + u64 cached_phctime; + u8 dcb_tc; /* Traffic class of ring */ + u8 ptp_rx; + u8 flags; +} ____cacheline_internodealigned_in_smp; + +struct ice_tx_ring { + /* CL1 - 1st cacheline starts here */ + struct ice_tx_ring *next; /* pointer to next ring in q_vector */ + void *desc; /* Descriptor ring memory */ + struct device *dev; /* Used for DMA mapping */ + u8 __iomem *tail; + struct ice_tx_buf *tx_buf; + struct ice_q_vector *q_vector; /* Backreference to associated vector */ + struct net_device *netdev; /* netdev ring maps to */ + struct ice_vsi *vsi; /* Backreference to associated VSI */ + /* CL2 - 2nd cacheline starts here */ + dma_addr_t dma; /* physical address of ring */ + u16 next_to_use; + u16 next_to_clean; + u16 count; /* Number of descriptors */ + u16 q_index; /* Queue number of ring */ + struct xsk_buff_pool *xsk_pool; /* stats structs */ struct ice_q_stats stats; struct u64_stats_sync syncp; - union { - struct ice_txq_stats tx_stats; - struct ice_rxq_stats rx_stats; - }; + struct ice_txq_stats tx_stats; + /* CL3 - 3rd cacheline starts here */ struct rcu_head rcu; /* to avoid race on free */ DECLARE_BITMAP(xps_state, ICE_TX_NBITS); /* XPS Config State */ - struct bpf_prog *xdp_prog; - struct xsk_buff_pool *xsk_pool; - u16 rx_offset; - /* CL3 - 3rd cacheline starts here */ - struct xdp_rxq_info xdp_rxq; - struct sk_buff *skb; - /* CLX - the below items are only accessed infrequently and should be - * in their own cache line if possible - */ -#define ICE_TX_FLAGS_RING_XDP BIT(0) -#define ICE_RX_FLAGS_RING_BUILD_SKB BIT(1) - u8 flags; - dma_addr_t dma; /* physical address of ring */ - unsigned int size; /* length of descriptor ring in bytes */ - u32 txq_teid; /* Added Tx queue TEID */ - u16 rx_buf_len; - u8 dcb_tc; /* Traffic class of ring */ struct ice_ptp_tx *tx_tstamps; - u64 cached_phctime; - u8 ptp_rx:1; - u8 ptp_tx:1; + u32 txq_teid; /* Added Tx queue TEID */ + u16 q_handle; /* Queue handle per TC */ + u16 reg_idx; /* HW register index of the ring */ +#define ICE_TX_FLAGS_RING_XDP BIT(0) + u8 flags; + u8 dcb_tc; /* Traffic class of ring */ + u8 ptp_tx; } ____cacheline_internodealigned_in_smp; -static inline bool ice_ring_uses_build_skb(struct ice_ring *ring) +static inline bool ice_ring_uses_build_skb(struct ice_rx_ring *ring) { return !!(ring->flags & ICE_RX_FLAGS_RING_BUILD_SKB); } -static inline void ice_set_ring_build_skb_ena(struct ice_ring *ring) +static inline void ice_set_ring_build_skb_ena(struct ice_rx_ring *ring) { ring->flags |= ICE_RX_FLAGS_RING_BUILD_SKB; } -static inline void ice_clear_ring_build_skb_ena(struct ice_ring *ring) +static inline void ice_clear_ring_build_skb_ena(struct ice_rx_ring *ring) { ring->flags &= ~ICE_RX_FLAGS_RING_BUILD_SKB; } -static inline bool ice_ring_is_xdp(struct ice_ring *ring) +static inline bool ice_ring_is_xdp(struct ice_tx_ring *ring) { return !!(ring->flags & ICE_TX_FLAGS_RING_XDP); } @@ -337,7 +360,10 @@ enum ice_container_type { struct ice_ring_container { /* head of linked-list of rings */ - struct ice_ring *ring; + union { + struct ice_rx_ring *rx_ring; + struct ice_tx_ring *tx_ring; + }; struct dim dim; /* data for net_dim algorithm */ u16 itr_idx; /* index in the interrupt vector */ /* this matches the maximum number of ITR bits, but in usec @@ -358,10 +384,13 @@ struct ice_coalesce_stored { }; /* iterator for handling rings in ring container */ -#define ice_for_each_ring(pos, head) \ - for (pos = (head).ring; pos; pos = pos->next) +#define ice_for_each_rx_ring(pos, head) \ + for (pos = (head).rx_ring; pos; pos = pos->next) -static inline unsigned int ice_rx_pg_order(struct ice_ring *ring) +#define ice_for_each_tx_ring(pos, head) \ + for (pos = (head).tx_ring; pos; pos = pos->next) + +static inline unsigned int ice_rx_pg_order(struct ice_rx_ring *ring) { #if (PAGE_SIZE < 8192) if (ring->rx_buf_len > (PAGE_SIZE / 2)) @@ -374,21 +403,21 @@ static inline unsigned int ice_rx_pg_order(struct ice_ring *ring) union ice_32b_rx_flex_desc; -bool ice_alloc_rx_bufs(struct ice_ring *rxr, u16 cleaned_count); +bool ice_alloc_rx_bufs(struct ice_rx_ring *rxr, u16 cleaned_count); netdev_tx_t ice_start_xmit(struct sk_buff *skb, struct net_device *netdev); u16 ice_select_queue(struct net_device *dev, struct sk_buff *skb, struct net_device *sb_dev); -void ice_clean_tx_ring(struct ice_ring *tx_ring); -void ice_clean_rx_ring(struct ice_ring *rx_ring); -int ice_setup_tx_ring(struct ice_ring *tx_ring); -int ice_setup_rx_ring(struct ice_ring *rx_ring); -void ice_free_tx_ring(struct ice_ring *tx_ring); -void ice_free_rx_ring(struct ice_ring *rx_ring); +void ice_clean_tx_ring(struct ice_tx_ring *tx_ring); +void ice_clean_rx_ring(struct ice_rx_ring *rx_ring); +int ice_setup_tx_ring(struct ice_tx_ring *tx_ring); +int ice_setup_rx_ring(struct ice_rx_ring *rx_ring); +void ice_free_tx_ring(struct ice_tx_ring *tx_ring); +void ice_free_rx_ring(struct ice_rx_ring *rx_ring); int ice_napi_poll(struct napi_struct *napi, int budget); int ice_prgm_fdir_fltr(struct ice_vsi *vsi, struct ice_fltr_desc *fdir_desc, u8 *raw_packet); -int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget); -void ice_clean_ctrl_tx_irq(struct ice_ring *tx_ring); +int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget); +void ice_clean_ctrl_tx_irq(struct ice_tx_ring *tx_ring); #endif /* _ICE_TXRX_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c index e314a1aee0ff..d6d71f82142f 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c @@ -9,7 +9,7 @@ * @rx_ring: ring to bump * @val: new head index */ -void ice_release_rx_desc(struct ice_ring *rx_ring, u16 val) +void ice_release_rx_desc(struct ice_rx_ring *rx_ring, u16 val) { u16 prev_ntu = rx_ring->next_to_use & ~0x7; @@ -67,7 +67,7 @@ static enum pkt_hash_types ice_ptype_to_htype(u16 ptype) * @rx_ptype: the ptype value from the descriptor */ static void -ice_rx_hash(struct ice_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc, +ice_rx_hash(struct ice_rx_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc, struct sk_buff *skb, u16 rx_ptype) { struct ice_32b_rx_flex_desc_nic *nic_mdid; @@ -94,7 +94,7 @@ ice_rx_hash(struct ice_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc, * skb->protocol must be set before this function is called */ static void -ice_rx_csum(struct ice_ring *ring, struct sk_buff *skb, +ice_rx_csum(struct ice_rx_ring *ring, struct sk_buff *skb, union ice_32b_rx_flex_desc *rx_desc, u16 ptype) { struct ice_rx_ptype_decoded decoded; @@ -179,7 +179,7 @@ checksum_fail: * other fields within the skb. */ void -ice_process_skb_fields(struct ice_ring *rx_ring, +ice_process_skb_fields(struct ice_rx_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc, struct sk_buff *skb, u16 ptype) { @@ -205,7 +205,7 @@ ice_process_skb_fields(struct ice_ring *rx_ring, * gro receive functions (with/without VLAN tag) */ void -ice_receive_skb(struct ice_ring *rx_ring, struct sk_buff *skb, u16 vlan_tag) +ice_receive_skb(struct ice_rx_ring *rx_ring, struct sk_buff *skb, u16 vlan_tag) { if ((rx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_RX) && (vlan_tag & VLAN_VID_MASK)) @@ -219,7 +219,7 @@ ice_receive_skb(struct ice_ring *rx_ring, struct sk_buff *skb, u16 vlan_tag) * @size: packet data size * @xdp_ring: XDP ring for transmission */ -int ice_xmit_xdp_ring(void *data, u16 size, struct ice_ring *xdp_ring) +int ice_xmit_xdp_ring(void *data, u16 size, struct ice_tx_ring *xdp_ring) { u16 i = xdp_ring->next_to_use; struct ice_tx_desc *tx_desc; @@ -271,7 +271,7 @@ int ice_xmit_xdp_ring(void *data, u16 size, struct ice_ring *xdp_ring) * * Returns negative on failure, 0 on success. */ -int ice_xmit_xdp_buff(struct xdp_buff *xdp, struct ice_ring *xdp_ring) +int ice_xmit_xdp_buff(struct xdp_buff *xdp, struct ice_tx_ring *xdp_ring) { struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp); @@ -290,13 +290,13 @@ int ice_xmit_xdp_buff(struct xdp_buff *xdp, struct ice_ring *xdp_ring) * should be called when a batch of packets has been processed in the * napi loop. */ -void ice_finalize_xdp_rx(struct ice_ring *rx_ring, unsigned int xdp_res) +void ice_finalize_xdp_rx(struct ice_rx_ring *rx_ring, unsigned int xdp_res) { if (xdp_res & ICE_XDP_REDIR) xdp_do_flush_map(); if (xdp_res & ICE_XDP_TX) { - struct ice_ring *xdp_ring = + struct ice_tx_ring *xdp_ring = rx_ring->vsi->xdp_rings[rx_ring->q_index]; ice_xdp_ring_update_tail(xdp_ring); diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h index 05ac30752902..4e56e8e321a8 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h @@ -37,7 +37,7 @@ ice_build_ctob(u64 td_cmd, u64 td_offset, unsigned int size, u64 td_tag) * * This function updates the XDP Tx ring tail register. */ -static inline void ice_xdp_ring_update_tail(struct ice_ring *xdp_ring) +static inline void ice_xdp_ring_update_tail(struct ice_tx_ring *xdp_ring) { /* Force memory writes to complete before letting h/w * know there are new descriptors to fetch. @@ -46,14 +46,14 @@ static inline void ice_xdp_ring_update_tail(struct ice_ring *xdp_ring) writel_relaxed(xdp_ring->next_to_use, xdp_ring->tail); } -void ice_finalize_xdp_rx(struct ice_ring *rx_ring, unsigned int xdp_res); -int ice_xmit_xdp_buff(struct xdp_buff *xdp, struct ice_ring *xdp_ring); -int ice_xmit_xdp_ring(void *data, u16 size, struct ice_ring *xdp_ring); -void ice_release_rx_desc(struct ice_ring *rx_ring, u16 val); +void ice_finalize_xdp_rx(struct ice_rx_ring *xdp_ring, unsigned int xdp_res); +int ice_xmit_xdp_buff(struct xdp_buff *xdp, struct ice_tx_ring *xdp_ring); +int ice_xmit_xdp_ring(void *data, u16 size, struct ice_tx_ring *xdp_ring); +void ice_release_rx_desc(struct ice_rx_ring *rx_ring, u16 val); void -ice_process_skb_fields(struct ice_ring *rx_ring, +ice_process_skb_fields(struct ice_rx_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc, struct sk_buff *skb, u16 ptype); void -ice_receive_skb(struct ice_ring *rx_ring, struct sk_buff *skb, u16 vlan_tag); +ice_receive_skb(struct ice_rx_ring *rx_ring, struct sk_buff *skb, u16 vlan_tag); #endif /* !_ICE_TXRX_LIB_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c index 4d0b643906ff..d90a3b7be713 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c @@ -3347,7 +3347,7 @@ static int ice_vc_dis_qs_msg(struct ice_vf *vf, u8 *msg) q_map = vqs->tx_queues; for_each_set_bit(vf_q_id, &q_map, ICE_MAX_RSS_QS_PER_VF) { - struct ice_ring *ring = vsi->tx_rings[vf_q_id]; + struct ice_tx_ring *ring = vsi->tx_rings[vf_q_id]; struct ice_txq_meta txq_meta = { 0 }; if (!ice_vc_isvalid_q_id(vf, vqs->vsi_id, vf_q_id)) { diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index 7682eaa9a9ec..08ae06d6b982 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -67,7 +67,7 @@ ice_qvec_toggle_napi(struct ice_vsi *vsi, struct ice_q_vector *q_vector, * @q_vector: queue vector */ static void -ice_qvec_dis_irq(struct ice_vsi *vsi, struct ice_ring *rx_ring, +ice_qvec_dis_irq(struct ice_vsi *vsi, struct ice_rx_ring *rx_ring, struct ice_q_vector *q_vector) { struct ice_pf *pf = vsi->back; @@ -104,16 +104,17 @@ ice_qvec_cfg_msix(struct ice_vsi *vsi, struct ice_q_vector *q_vector) u16 reg_idx = q_vector->reg_idx; struct ice_pf *pf = vsi->back; struct ice_hw *hw = &pf->hw; - struct ice_ring *ring; + struct ice_tx_ring *tx_ring; + struct ice_rx_ring *rx_ring; ice_cfg_itr(hw, q_vector); - ice_for_each_ring(ring, q_vector->tx) - ice_cfg_txq_interrupt(vsi, ring->reg_idx, reg_idx, + ice_for_each_tx_ring(tx_ring, q_vector->tx) + ice_cfg_txq_interrupt(vsi, tx_ring->reg_idx, reg_idx, q_vector->tx.itr_idx); - ice_for_each_ring(ring, q_vector->rx) - ice_cfg_rxq_interrupt(vsi, ring->reg_idx, reg_idx, + ice_for_each_rx_ring(rx_ring, q_vector->rx) + ice_cfg_rxq_interrupt(vsi, rx_ring->reg_idx, reg_idx, q_vector->rx.itr_idx); ice_flush(hw); @@ -144,8 +145,9 @@ static void ice_qvec_ena_irq(struct ice_vsi *vsi, struct ice_q_vector *q_vector) static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx) { struct ice_txq_meta txq_meta = { }; - struct ice_ring *tx_ring, *rx_ring; struct ice_q_vector *q_vector; + struct ice_tx_ring *tx_ring; + struct ice_rx_ring *rx_ring; int timeout = 50; int err; @@ -171,7 +173,7 @@ static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx) if (err) return err; if (ice_is_xdp_ena_vsi(vsi)) { - struct ice_ring *xdp_ring = vsi->xdp_rings[q_idx]; + struct ice_tx_ring *xdp_ring = vsi->xdp_rings[q_idx]; memset(&txq_meta, 0, sizeof(txq_meta)); ice_fill_txq_meta(vsi, xdp_ring, &txq_meta); @@ -201,8 +203,9 @@ static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx) static int ice_qp_ena(struct ice_vsi *vsi, u16 q_idx) { struct ice_aqc_add_tx_qgrp *qg_buf; - struct ice_ring *tx_ring, *rx_ring; struct ice_q_vector *q_vector; + struct ice_tx_ring *tx_ring; + struct ice_rx_ring *rx_ring; u16 size; int err; @@ -225,7 +228,7 @@ static int ice_qp_ena(struct ice_vsi *vsi, u16 q_idx) goto free_buf; if (ice_is_xdp_ena_vsi(vsi)) { - struct ice_ring *xdp_ring = vsi->xdp_rings[q_idx]; + struct ice_tx_ring *xdp_ring = vsi->xdp_rings[q_idx]; memset(qg_buf, 0, size); qg_buf->num_txqs = 1; @@ -233,7 +236,7 @@ static int ice_qp_ena(struct ice_vsi *vsi, u16 q_idx) if (err) goto free_buf; ice_set_ring_xdp(xdp_ring); - xdp_ring->xsk_pool = ice_xsk_pool(xdp_ring); + xdp_ring->xsk_pool = ice_tx_xsk_pool(xdp_ring); } err = ice_vsi_cfg_rxq(rx_ring); @@ -360,7 +363,7 @@ xsk_pool_if_up: * * Returns true if all allocations were successful, false if any fail. */ -bool ice_alloc_rx_bufs_zc(struct ice_ring *rx_ring, u16 count) +bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count) { union ice_32b_rx_flex_desc *rx_desc; u16 ntu = rx_ring->next_to_use; @@ -403,7 +406,7 @@ bool ice_alloc_rx_bufs_zc(struct ice_ring *rx_ring, u16 count) * ice_bump_ntc - Bump the next_to_clean counter of an Rx ring * @rx_ring: Rx ring */ -static void ice_bump_ntc(struct ice_ring *rx_ring) +static void ice_bump_ntc(struct ice_rx_ring *rx_ring) { int ntc = rx_ring->next_to_clean + 1; @@ -422,7 +425,7 @@ static void ice_bump_ntc(struct ice_ring *rx_ring) * Returns the skb on success, NULL on failure. */ static struct sk_buff * -ice_construct_skb_zc(struct ice_ring *rx_ring, struct xdp_buff **xdp_arr) +ice_construct_skb_zc(struct ice_rx_ring *rx_ring, struct xdp_buff **xdp_arr) { struct xdp_buff *xdp = *xdp_arr; unsigned int metasize = xdp->data - xdp->data_meta; @@ -453,11 +456,11 @@ ice_construct_skb_zc(struct ice_ring *rx_ring, struct xdp_buff **xdp_arr) * Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR} */ static int -ice_run_xdp_zc(struct ice_ring *rx_ring, struct xdp_buff *xdp) +ice_run_xdp_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp) { int err, result = ICE_XDP_PASS; + struct ice_tx_ring *xdp_ring; struct bpf_prog *xdp_prog; - struct ice_ring *xdp_ring; u32 act; /* ZC patch is enabled only when XDP program is set, @@ -505,7 +508,7 @@ out_failure: * * Returns number of processed packets on success, remaining budget on failure. */ -int ice_clean_rx_irq_zc(struct ice_ring *rx_ring, int budget) +int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget) { unsigned int total_rx_bytes = 0, total_rx_packets = 0; u16 cleaned_count = ICE_DESC_UNUSED(rx_ring); @@ -612,7 +615,7 @@ int ice_clean_rx_irq_zc(struct ice_ring *rx_ring, int budget) * * Returns true if cleanup/transmission is done. */ -static bool ice_xmit_zc(struct ice_ring *xdp_ring, int budget) +static bool ice_xmit_zc(struct ice_tx_ring *xdp_ring, int budget) { struct ice_tx_desc *tx_desc = NULL; bool work_done = true; @@ -663,7 +666,7 @@ static bool ice_xmit_zc(struct ice_ring *xdp_ring, int budget) * @tx_buf: Tx buffer to clean */ static void -ice_clean_xdp_tx_buf(struct ice_ring *xdp_ring, struct ice_tx_buf *tx_buf) +ice_clean_xdp_tx_buf(struct ice_tx_ring *xdp_ring, struct ice_tx_buf *tx_buf) { xdp_return_frame((struct xdp_frame *)tx_buf->raw_buf); dma_unmap_single(xdp_ring->dev, dma_unmap_addr(tx_buf, dma), @@ -678,7 +681,7 @@ ice_clean_xdp_tx_buf(struct ice_ring *xdp_ring, struct ice_tx_buf *tx_buf) * * Returns true if cleanup/tranmission is done. */ -bool ice_clean_tx_irq_zc(struct ice_ring *xdp_ring, int budget) +bool ice_clean_tx_irq_zc(struct ice_tx_ring *xdp_ring, int budget) { int total_packets = 0, total_bytes = 0; s16 ntc = xdp_ring->next_to_clean; @@ -751,7 +754,7 @@ ice_xsk_wakeup(struct net_device *netdev, u32 queue_id, struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_q_vector *q_vector; struct ice_vsi *vsi = np->vsi; - struct ice_ring *ring; + struct ice_tx_ring *ring; if (test_bit(ICE_DOWN, vsi->state)) return -ENETDOWN; @@ -802,7 +805,7 @@ bool ice_xsk_any_rx_ring_ena(struct ice_vsi *vsi) * ice_xsk_clean_rx_ring - clean buffer pool queues connected to a given Rx ring * @rx_ring: ring to be cleaned */ -void ice_xsk_clean_rx_ring(struct ice_ring *rx_ring) +void ice_xsk_clean_rx_ring(struct ice_rx_ring *rx_ring) { u16 i; @@ -820,7 +823,7 @@ void ice_xsk_clean_rx_ring(struct ice_ring *rx_ring) * ice_xsk_clean_xdp_ring - Clean the XDP Tx ring and its buffer pool queues * @xdp_ring: XDP_Tx ring */ -void ice_xsk_clean_xdp_ring(struct ice_ring *xdp_ring) +void ice_xsk_clean_xdp_ring(struct ice_tx_ring *xdp_ring) { u16 ntc = xdp_ring->next_to_clean, ntu = xdp_ring->next_to_use; u32 xsk_frames = 0; diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.h b/drivers/net/ethernet/intel/ice/ice_xsk.h index ea208808623a..4c7bd8e9dfc4 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.h +++ b/drivers/net/ethernet/intel/ice/ice_xsk.h @@ -11,13 +11,13 @@ struct ice_vsi; #ifdef CONFIG_XDP_SOCKETS int ice_xsk_pool_setup(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid); -int ice_clean_rx_irq_zc(struct ice_ring *rx_ring, int budget); -bool ice_clean_tx_irq_zc(struct ice_ring *xdp_ring, int budget); +int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget); +bool ice_clean_tx_irq_zc(struct ice_tx_ring *xdp_ring, int budget); int ice_xsk_wakeup(struct net_device *netdev, u32 queue_id, u32 flags); -bool ice_alloc_rx_bufs_zc(struct ice_ring *rx_ring, u16 count); +bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count); bool ice_xsk_any_rx_ring_ena(struct ice_vsi *vsi); -void ice_xsk_clean_rx_ring(struct ice_ring *rx_ring); -void ice_xsk_clean_xdp_ring(struct ice_ring *xdp_ring); +void ice_xsk_clean_rx_ring(struct ice_rx_ring *rx_ring); +void ice_xsk_clean_xdp_ring(struct ice_tx_ring *xdp_ring); #else static inline int ice_xsk_pool_setup(struct ice_vsi __always_unused *vsi, @@ -28,21 +28,21 @@ ice_xsk_pool_setup(struct ice_vsi __always_unused *vsi, } static inline int -ice_clean_rx_irq_zc(struct ice_ring __always_unused *rx_ring, +ice_clean_rx_irq_zc(struct ice_rx_ring __always_unused *rx_ring, int __always_unused budget) { return 0; } static inline bool -ice_clean_tx_irq_zc(struct ice_ring __always_unused *xdp_ring, +ice_clean_tx_irq_zc(struct ice_tx_ring __always_unused *xdp_ring, int __always_unused budget) { return false; } static inline bool -ice_alloc_rx_bufs_zc(struct ice_ring __always_unused *rx_ring, +ice_alloc_rx_bufs_zc(struct ice_rx_ring __always_unused *rx_ring, u16 __always_unused count) { return false; @@ -60,7 +60,7 @@ ice_xsk_wakeup(struct net_device __always_unused *netdev, return -EOPNOTSUPP; } -static inline void ice_xsk_clean_rx_ring(struct ice_ring *rx_ring) { } -static inline void ice_xsk_clean_xdp_ring(struct ice_ring *xdp_ring) { } +static inline void ice_xsk_clean_rx_ring(struct ice_rx_ring *rx_ring) { } +static inline void ice_xsk_clean_xdp_ring(struct ice_tx_ring *xdp_ring) { } #endif /* CONFIG_XDP_SOCKETS */ #endif /* !_ICE_XSK_H_ */ From 0bb4f9ecadd463db28d8b1820bb1bcb96af009d2 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Thu, 19 Aug 2021 13:59:59 +0200 Subject: [PATCH 230/440] ice: unify xdp_rings accesses There has been a long lasting issue of improper xdp_rings indexing for XDP_TX and XDP_REDIRECT actions. Given that currently rx_ring->q_index is mixed with smp_processor_id(), there could be a situation where Tx descriptors are produced onto XDP Tx ring, but tail is never bumped - for example pin a particular queue id to non-matching IRQ line. Address this problem by ignoring the user ring count setting and always initialize the xdp_rings array to be of num_possible_cpus() size. Then, always use the smp_processor_id() as an index to xdp_rings array. This provides serialization as at given time only a single softirq can run on a particular CPU. Signed-off-by: Maciej Fijalkowski Tested-by: George Kuruvinakunnel Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_lib.c | 2 +- drivers/net/ethernet/intel/ice/ice_main.c | 2 +- drivers/net/ethernet/intel/ice/ice_txrx_lib.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 04155139125c..58fad5f82076 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -3215,7 +3215,7 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi) ice_vsi_map_rings_to_vectors(vsi); if (ice_is_xdp_ena_vsi(vsi)) { - vsi->num_xdp_txq = vsi->alloc_rxq; + vsi->num_xdp_txq = num_possible_cpus(); ret = ice_prepare_xdp_rings(vsi, vsi->xdp_prog); if (ret) goto err_vectors; diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index e4c12a87785c..856ae223bf4c 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -2638,7 +2638,7 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog, } if (!ice_is_xdp_ena_vsi(vsi) && prog) { - vsi->num_xdp_txq = vsi->alloc_rxq; + vsi->num_xdp_txq = num_possible_cpus(); xdp_ring_err = ice_prepare_xdp_rings(vsi, prog); if (xdp_ring_err) NL_SET_ERR_MSG_MOD(extack, "Setting up XDP Tx resources failed"); diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c index d6d71f82142f..bc64610df7cb 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c @@ -297,7 +297,7 @@ void ice_finalize_xdp_rx(struct ice_rx_ring *rx_ring, unsigned int xdp_res) if (xdp_res & ICE_XDP_TX) { struct ice_tx_ring *xdp_ring = - rx_ring->vsi->xdp_rings[rx_ring->q_index]; + rx_ring->vsi->xdp_rings[smp_processor_id()]; ice_xdp_ring_update_tail(xdp_ring); } From a55e16fa330acc15f35ee6a6c2aaaea4343c6c76 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Thu, 19 Aug 2021 14:00:00 +0200 Subject: [PATCH 231/440] ice: do not create xdp_frame on XDP_TX xdp_frame is not needed for XDP_TX data path in ice driver case. For this data path cleaning of sent descriptor will not happen anywhere outside of the driver, which means that carrying the information about the underlying memory model via xdp_frame will not be used. Therefore, this conversion can be simply dropped, which would relieve CPU a bit. Signed-off-by: Maciej Fijalkowski Tested-by: George Kuruvinakunnel Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_txrx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index b031f754c5cb..7739cd379052 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -554,7 +554,7 @@ ice_run_xdp(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, return ICE_XDP_PASS; case XDP_TX: xdp_ring = rx_ring->vsi->xdp_rings[smp_processor_id()]; - result = ice_xmit_xdp_buff(xdp, xdp_ring); + result = ice_xmit_xdp_ring(xdp->data, xdp->data_end - xdp->data, xdp_ring); if (result == ICE_XDP_CONSUMED) goto out_failure; return result; From eb087cd828648d5322954c86c3e18b2fc98b5700 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Thu, 19 Aug 2021 14:00:01 +0200 Subject: [PATCH 232/440] ice: propagate xdp_ring onto rx_ring With rings being split, it is now convenient to introduce a pointer to XDP ring within the Rx ring. For XDP_TX workloads this means that xdp_rings array access will be skipped, which was executed per each processed frame. Also, read the XDP prog once per NAPI and if prog is present, set up the local xdp_ring pointer. Reading prog a single time was discussed in [1] with some concern raised by Toke around dispatcher handling and having the need for going through the RCU grace period in the ndo_bpf driver callback, but ice currently is torning down NAPI instances regardless of the prog presence on VSI. Although the pointer to XDP ring introduced to Rx ring makes things a lot slimmer/simpler, I still feel that single prog read per NAPI lifetime is beneficial. Further patch that will introduce the fallback path will also get a profit from that as xdp_ring pointer will be set during the XDP rings setup. [1]: https://lore.kernel.org/bpf/87k0oseo6e.fsf@toke.dk/ Signed-off-by: Maciej Fijalkowski Tested-by: George Kuruvinakunnel Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_main.c | 3 +++ drivers/net/ethernet/intel/ice/ice_txrx.c | 23 +++++++++-------- drivers/net/ethernet/intel/ice/ice_txrx.h | 1 + drivers/net/ethernet/intel/ice/ice_txrx_lib.c | 10 +++----- drivers/net/ethernet/intel/ice/ice_txrx_lib.h | 2 +- drivers/net/ethernet/intel/ice/ice_xsk.c | 25 +++++++++++-------- 6 files changed, 35 insertions(+), 29 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 856ae223bf4c..366602c973b7 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -2396,6 +2396,9 @@ static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi) xdp_ring->xsk_pool = ice_tx_xsk_pool(xdp_ring); } + ice_for_each_rxq(vsi, i) + vsi->rx_rings[i]->xdp_ring = vsi->xdp_rings[i]; + return 0; free_xdp_rings: diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 7739cd379052..b4aaed34f10a 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -537,15 +537,15 @@ ice_rx_frame_truesize(struct ice_rx_ring *rx_ring, unsigned int __maybe_unused s * @rx_ring: Rx ring * @xdp: xdp_buff used as input to the XDP program * @xdp_prog: XDP program to run + * @xdp_ring: ring to be used for XDP_TX action * * Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR} */ static int ice_run_xdp(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, - struct bpf_prog *xdp_prog) + struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring) { - struct ice_tx_ring *xdp_ring; - int err, result; + int err; u32 act; act = bpf_prog_run_xdp(xdp_prog, xdp); @@ -553,11 +553,10 @@ ice_run_xdp(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, case XDP_PASS: return ICE_XDP_PASS; case XDP_TX: - xdp_ring = rx_ring->vsi->xdp_rings[smp_processor_id()]; - result = ice_xmit_xdp_ring(xdp->data, xdp->data_end - xdp->data, xdp_ring); - if (result == ICE_XDP_CONSUMED) + err = ice_xmit_xdp_ring(xdp->data, xdp->data_end - xdp->data, xdp_ring); + if (err == ICE_XDP_CONSUMED) goto out_failure; - return result; + return err; case XDP_REDIRECT: err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog); if (err) @@ -1083,6 +1082,7 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) unsigned int total_rx_bytes = 0, total_rx_pkts = 0, frame_sz = 0; u16 cleaned_count = ICE_DESC_UNUSED(rx_ring); unsigned int offset = rx_ring->rx_offset; + struct ice_tx_ring *xdp_ring = NULL; unsigned int xdp_res, xdp_xmit = 0; struct sk_buff *skb = rx_ring->skb; struct bpf_prog *xdp_prog = NULL; @@ -1095,6 +1095,10 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) #endif xdp_init_buff(&xdp, frame_sz, &rx_ring->xdp_rxq); + xdp_prog = READ_ONCE(rx_ring->xdp_prog); + if (xdp_prog) + xdp_ring = rx_ring->xdp_ring; + /* start the loop to process Rx packets bounded by 'budget' */ while (likely(total_rx_pkts < (unsigned int)budget)) { union ice_32b_rx_flex_desc *rx_desc; @@ -1158,11 +1162,10 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) xdp.frame_sz = ice_rx_frame_truesize(rx_ring, size); #endif - xdp_prog = READ_ONCE(rx_ring->xdp_prog); if (!xdp_prog) goto construct_skb; - xdp_res = ice_run_xdp(rx_ring, &xdp, xdp_prog); + xdp_res = ice_run_xdp(rx_ring, &xdp, xdp_prog, xdp_ring); if (!xdp_res) goto construct_skb; if (xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR)) { @@ -1239,7 +1242,7 @@ construct_skb: failure = ice_alloc_rx_bufs(rx_ring, cleaned_count); if (xdp_prog) - ice_finalize_xdp_rx(rx_ring, xdp_xmit); + ice_finalize_xdp_rx(xdp_ring, xdp_xmit); rx_ring->skb = skb; ice_update_rx_ring_stats(rx_ring, total_rx_pkts, total_rx_bytes); diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index dcbdfe649a09..732e0dd05655 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -287,6 +287,7 @@ struct ice_rx_ring { struct rcu_head rcu; /* to avoid race on free */ /* CL4 - 3rd cacheline starts here */ struct bpf_prog *xdp_prog; + struct ice_tx_ring *xdp_ring; struct xsk_buff_pool *xsk_pool; struct sk_buff *skb; dma_addr_t dma; /* physical address of ring */ diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c index bc64610df7cb..5c2319e0c66d 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c @@ -283,22 +283,18 @@ int ice_xmit_xdp_buff(struct xdp_buff *xdp, struct ice_tx_ring *xdp_ring) /** * ice_finalize_xdp_rx - Bump XDP Tx tail and/or flush redirect map - * @rx_ring: Rx ring + * @xdp_ring: XDP ring * @xdp_res: Result of the receive batch * * This function bumps XDP Tx tail and/or flush redirect map, and * should be called when a batch of packets has been processed in the * napi loop. */ -void ice_finalize_xdp_rx(struct ice_rx_ring *rx_ring, unsigned int xdp_res) +void ice_finalize_xdp_rx(struct ice_tx_ring *xdp_ring, unsigned int xdp_res) { if (xdp_res & ICE_XDP_REDIR) xdp_do_flush_map(); - if (xdp_res & ICE_XDP_TX) { - struct ice_tx_ring *xdp_ring = - rx_ring->vsi->xdp_rings[smp_processor_id()]; - + if (xdp_res & ICE_XDP_TX) ice_xdp_ring_update_tail(xdp_ring); - } } diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h index 4e56e8e321a8..11b6c1601986 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h @@ -46,7 +46,7 @@ static inline void ice_xdp_ring_update_tail(struct ice_tx_ring *xdp_ring) writel_relaxed(xdp_ring->next_to_use, xdp_ring->tail); } -void ice_finalize_xdp_rx(struct ice_rx_ring *xdp_ring, unsigned int xdp_res); +void ice_finalize_xdp_rx(struct ice_tx_ring *xdp_ring, unsigned int xdp_res); int ice_xmit_xdp_buff(struct xdp_buff *xdp, struct ice_tx_ring *xdp_ring); int ice_xmit_xdp_ring(void *data, u16 size, struct ice_tx_ring *xdp_ring); void ice_release_rx_desc(struct ice_rx_ring *rx_ring, u16 val); diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index 08ae06d6b982..d9dfcfc2c6f9 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -452,22 +452,18 @@ ice_construct_skb_zc(struct ice_rx_ring *rx_ring, struct xdp_buff **xdp_arr) * ice_run_xdp_zc - Executes an XDP program in zero-copy path * @rx_ring: Rx ring * @xdp: xdp_buff used as input to the XDP program + * @xdp_prog: XDP program to run + * @xdp_ring: ring to be used for XDP_TX action * * Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR} */ static int -ice_run_xdp_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp) +ice_run_xdp_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, + struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring) { int err, result = ICE_XDP_PASS; - struct ice_tx_ring *xdp_ring; - struct bpf_prog *xdp_prog; u32 act; - /* ZC patch is enabled only when XDP program is set, - * so here it can not be NULL - */ - xdp_prog = READ_ONCE(rx_ring->xdp_prog); - act = bpf_prog_run_xdp(xdp_prog, xdp); if (likely(act == XDP_REDIRECT)) { @@ -481,7 +477,6 @@ ice_run_xdp_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp) case XDP_PASS: break; case XDP_TX: - xdp_ring = rx_ring->vsi->xdp_rings[rx_ring->q_index]; result = ice_xmit_xdp_buff(xdp, xdp_ring); if (result == ICE_XDP_CONSUMED) goto out_failure; @@ -512,9 +507,17 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget) { unsigned int total_rx_bytes = 0, total_rx_packets = 0; u16 cleaned_count = ICE_DESC_UNUSED(rx_ring); + struct ice_tx_ring *xdp_ring; unsigned int xdp_xmit = 0; + struct bpf_prog *xdp_prog; bool failure = false; + /* ZC patch is enabled only when XDP program is set, + * so here it can not be NULL + */ + xdp_prog = READ_ONCE(rx_ring->xdp_prog); + xdp_ring = rx_ring->xdp_ring; + while (likely(total_rx_packets < (unsigned int)budget)) { union ice_32b_rx_flex_desc *rx_desc; unsigned int size, xdp_res = 0; @@ -545,7 +548,7 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget) xsk_buff_set_size(*xdp, size); xsk_buff_dma_sync_for_cpu(*xdp, rx_ring->xsk_pool); - xdp_res = ice_run_xdp_zc(rx_ring, *xdp); + xdp_res = ice_run_xdp_zc(rx_ring, *xdp, xdp_prog, xdp_ring); if (xdp_res) { if (xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR)) xdp_xmit |= xdp_res; @@ -593,7 +596,7 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget) if (cleaned_count >= ICE_RX_BUF_WRITE) failure = !ice_alloc_rx_bufs_zc(rx_ring, cleaned_count); - ice_finalize_xdp_rx(rx_ring, xdp_xmit); + ice_finalize_xdp_rx(xdp_ring, xdp_xmit); ice_update_rx_ring_stats(rx_ring, total_rx_packets, total_rx_bytes); if (xsk_uses_need_wakeup(rx_ring->xsk_pool)) { From 9610bd988df9882ccbd7a617e71fe303cddad591 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Thu, 19 Aug 2021 14:00:02 +0200 Subject: [PATCH 233/440] ice: optimize XDP_TX workloads Optimize Tx descriptor cleaning for XDP. Current approach doesn't really scale and chokes when multiple flows are handled. Introduce two ring fields, @next_dd and @next_rs that will keep track of descriptor that should be looked at when the need for cleaning arise and the descriptor that should have the RS bit set, respectively. Note that at this point the threshold is a constant (32), but it is something that we could make configurable. First thing is to get away from setting RS bit on each descriptor. Let's do this only once NTU is higher than the currently @next_rs value. In such case, grab the tx_desc[next_rs], set the RS bit in descriptor and advance the @next_rs by a 32. Second thing is to clean the Tx ring only when there are less than 32 free entries. For that case, look up the tx_desc[next_dd] for a DD bit. This bit is written back by HW to let the driver know that xmit was successful. It will happen only for those descriptors that had RS bit set. Clean only 32 descriptors and advance the DD bit. Actual cleaning routine is moved from ice_napi_poll() down to the ice_xmit_xdp_ring(). It is safe to do so as XDP ring will not get any SKBs in there that would rely on interrupts for the cleaning. Nice side effect is that for rare case of Tx fallback path (that next patch is going to introduce) we don't have to trigger the SW irq to clean the ring. With those two concepts, ring is kept at being almost full, but it is guaranteed that driver will be able to produce Tx descriptors. This approach seems to work out well even though the Tx descriptors are produced in one-by-one manner. Test was conducted with the ice HW bombarded with packets from HW generator, configured to generate 30 flows. Xdp2 sample yields the following results: proto 17: 79973066 pkt/s proto 17: 80018911 pkt/s proto 17: 80004654 pkt/s proto 17: 79992395 pkt/s proto 17: 79975162 pkt/s proto 17: 79955054 pkt/s proto 17: 79869168 pkt/s proto 17: 79823947 pkt/s proto 17: 79636971 pkt/s As that sample reports the Rx'ed frames, let's look at sar output. It says that what we Rx'ed we do actually Tx, no noticeable drops. Average: IFACE rxpck/s txpck/s rxkB/s txkB/s rxcmp/s txcmp/s rxmcst/s %ifutil Average: ens4f1 79842324.00 79842310.40 4678261.17 4678260.38 0.00 0.00 0.00 38.32 with tx_busy staying calm. When compared to a state before: Average: IFACE rxpck/s txpck/s rxkB/s txkB/s rxcmp/s txcmp/s rxmcst/s %ifutil Average: ens4f1 90919711.60 42233822.60 5327326.85 2474638.04 0.00 0.00 0.00 43.64 it can be observed that the amount of txpck/s is almost doubled, meaning that the performance is improved by around 90%. All of this due to the drops in the driver, previously the tx_busy stat was bumped at a 7mpps rate. Signed-off-by: Maciej Fijalkowski Tested-by: George Kuruvinakunnel Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_main.c | 9 ++- drivers/net/ethernet/intel/ice/ice_txrx.c | 21 +++--- drivers/net/ethernet/intel/ice/ice_txrx.h | 10 ++- drivers/net/ethernet/intel/ice/ice_txrx_lib.c | 73 ++++++++++++++++--- 4 files changed, 88 insertions(+), 25 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 366602c973b7..214eebb7bbd8 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -2372,7 +2372,8 @@ free_q_irqs: static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi) { struct device *dev = ice_pf_to_dev(vsi->back); - int i; + struct ice_tx_desc *tx_desc; + int i, j; for (i = 0; i < vsi->num_xdp_txq; i++) { u16 xdp_q_idx = vsi->alloc_txq + i; @@ -2387,6 +2388,8 @@ static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi) xdp_ring->reg_idx = vsi->txq_map[xdp_q_idx]; xdp_ring->vsi = vsi; xdp_ring->netdev = NULL; + xdp_ring->next_dd = ICE_TX_THRESH - 1; + xdp_ring->next_rs = ICE_TX_THRESH - 1; xdp_ring->dev = dev; xdp_ring->count = vsi->num_tx_desc; WRITE_ONCE(vsi->xdp_rings[i], xdp_ring); @@ -2394,6 +2397,10 @@ static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi) goto free_xdp_rings; ice_set_ring_xdp(xdp_ring); xdp_ring->xsk_pool = ice_tx_xsk_pool(xdp_ring); + for (j = 0; j < xdp_ring->count; j++) { + tx_desc = ICE_TX_DESC(xdp_ring, j); + tx_desc->cmd_type_offset_bsz = cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE); + } } ice_for_each_rxq(vsi, i) diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index b4aaed34f10a..927517e18ce3 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -247,11 +247,8 @@ static bool ice_clean_tx_irq(struct ice_tx_ring *tx_ring, int napi_budget) total_bytes += tx_buf->bytecount; total_pkts += tx_buf->gso_segs; - if (ice_ring_is_xdp(tx_ring)) - page_frag_free(tx_buf->raw_buf); - else - /* free the skb */ - napi_consume_skb(tx_buf->skb, napi_budget); + /* free the skb */ + napi_consume_skb(tx_buf->skb, napi_budget); /* unmap skb header data */ dma_unmap_single(tx_ring->dev, @@ -307,9 +304,6 @@ static bool ice_clean_tx_irq(struct ice_tx_ring *tx_ring, int napi_budget) ice_update_tx_ring_stats(tx_ring, total_pkts, total_bytes); - if (ice_ring_is_xdp(tx_ring)) - return !!budget; - netdev_tx_completed_queue(txring_txq(tx_ring), total_pkts, total_bytes); @@ -1418,9 +1412,14 @@ int ice_napi_poll(struct napi_struct *napi, int budget) * budget and be more aggressive about cleaning up the Tx descriptors. */ ice_for_each_tx_ring(tx_ring, q_vector->tx) { - bool wd = tx_ring->xsk_pool ? - ice_clean_tx_irq_zc(tx_ring, budget) : - ice_clean_tx_irq(tx_ring, budget); + bool wd; + + if (tx_ring->xsk_pool) + wd = ice_clean_tx_irq_zc(tx_ring, budget); + else if (ice_ring_is_xdp(tx_ring)) + wd = true; + else + wd = ice_clean_tx_irq(tx_ring, budget); if (!wd) clean_complete = false; diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index 732e0dd05655..22de016adbca 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -13,6 +13,7 @@ #define ICE_MAX_CHAINED_RX_BUFS 5 #define ICE_MAX_BUF_TXD 8 #define ICE_MIN_TX_LEN 17 +#define ICE_TX_THRESH 32 /* The size limit for a transmit buffer in a descriptor is (16K - 1). * In order to align with the read requests we will align the value to @@ -310,12 +311,15 @@ struct ice_tx_ring { struct ice_vsi *vsi; /* Backreference to associated VSI */ /* CL2 - 2nd cacheline starts here */ dma_addr_t dma; /* physical address of ring */ + struct xsk_buff_pool *xsk_pool; u16 next_to_use; u16 next_to_clean; + u16 next_rs; + u16 next_dd; + u16 q_handle; /* Queue handle per TC */ + u16 reg_idx; /* HW register index of the ring */ u16 count; /* Number of descriptors */ u16 q_index; /* Queue number of ring */ - struct xsk_buff_pool *xsk_pool; - /* stats structs */ struct ice_q_stats stats; struct u64_stats_sync syncp; @@ -326,8 +330,6 @@ struct ice_tx_ring { DECLARE_BITMAP(xps_state, ICE_TX_NBITS); /* XPS Config State */ struct ice_ptp_tx *tx_tstamps; u32 txq_teid; /* Added Tx queue TEID */ - u16 q_handle; /* Queue handle per TC */ - u16 reg_idx; /* HW register index of the ring */ #define ICE_TX_FLAGS_RING_XDP BIT(0) u8 flags; u8 dcb_tc; /* Traffic class of ring */ diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c index 5c2319e0c66d..d55db9cedc9b 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c @@ -3,6 +3,7 @@ #include "ice_txrx_lib.h" #include "ice_eswitch.h" +#include "ice_lib.h" /** * ice_release_rx_desc - Store the new tail and head values @@ -213,6 +214,52 @@ ice_receive_skb(struct ice_rx_ring *rx_ring, struct sk_buff *skb, u16 vlan_tag) napi_gro_receive(&rx_ring->q_vector->napi, skb); } +/** + * ice_clean_xdp_irq - Reclaim resources after transmit completes on XDP ring + * @xdp_ring: XDP ring to clean + */ +static void ice_clean_xdp_irq(struct ice_tx_ring *xdp_ring) +{ + unsigned int total_bytes = 0, total_pkts = 0; + u16 ntc = xdp_ring->next_to_clean; + struct ice_tx_desc *next_dd_desc; + u16 next_dd = xdp_ring->next_dd; + struct ice_tx_buf *tx_buf; + int i; + + next_dd_desc = ICE_TX_DESC(xdp_ring, next_dd); + if (!(next_dd_desc->cmd_type_offset_bsz & + cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE))) + return; + + for (i = 0; i < ICE_TX_THRESH; i++) { + tx_buf = &xdp_ring->tx_buf[ntc]; + + total_bytes += tx_buf->bytecount; + /* normally tx_buf->gso_segs was taken but at this point + * it's always 1 for us + */ + total_pkts++; + + page_frag_free(tx_buf->raw_buf); + dma_unmap_single(xdp_ring->dev, dma_unmap_addr(tx_buf, dma), + dma_unmap_len(tx_buf, len), DMA_TO_DEVICE); + dma_unmap_len_set(tx_buf, len, 0); + tx_buf->raw_buf = NULL; + + ntc++; + if (ntc >= xdp_ring->count) + ntc = 0; + } + + next_dd_desc->cmd_type_offset_bsz = 0; + xdp_ring->next_dd = xdp_ring->next_dd + ICE_TX_THRESH; + if (xdp_ring->next_dd > xdp_ring->count) + xdp_ring->next_dd = ICE_TX_THRESH - 1; + xdp_ring->next_to_clean = ntc; + ice_update_tx_ring_stats(xdp_ring, total_pkts, total_bytes); +} + /** * ice_xmit_xdp_ring - submit single packet to XDP ring for transmission * @data: packet data pointer @@ -226,6 +273,9 @@ int ice_xmit_xdp_ring(void *data, u16 size, struct ice_tx_ring *xdp_ring) struct ice_tx_buf *tx_buf; dma_addr_t dma; + if (ICE_DESC_UNUSED(xdp_ring) < ICE_TX_THRESH) + ice_clean_xdp_irq(xdp_ring); + if (!unlikely(ICE_DESC_UNUSED(xdp_ring))) { xdp_ring->tx_stats.tx_busy++; return ICE_XDP_CONSUMED; @@ -246,21 +296,26 @@ int ice_xmit_xdp_ring(void *data, u16 size, struct ice_tx_ring *xdp_ring) tx_desc = ICE_TX_DESC(xdp_ring, i); tx_desc->buf_addr = cpu_to_le64(dma); - tx_desc->cmd_type_offset_bsz = ice_build_ctob(ICE_TXD_LAST_DESC_CMD, 0, + tx_desc->cmd_type_offset_bsz = ice_build_ctob(ICE_TX_DESC_CMD_EOP, 0, size, 0); - /* Make certain all of the status bits have been updated - * before next_to_watch is written. - */ - smp_wmb(); - i++; - if (i == xdp_ring->count) + if (i == xdp_ring->count) { i = 0; - - tx_buf->next_to_watch = tx_desc; + tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_rs); + tx_desc->cmd_type_offset_bsz |= + cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S); + xdp_ring->next_rs = ICE_TX_THRESH - 1; + } xdp_ring->next_to_use = i; + if (i > xdp_ring->next_rs) { + tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_rs); + tx_desc->cmd_type_offset_bsz |= + cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S); + xdp_ring->next_rs += ICE_TX_THRESH; + } + return ICE_XDP_TX; } From 22bf877e528f683bffc2fc932d148e840f7cc27d Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Thu, 19 Aug 2021 14:00:03 +0200 Subject: [PATCH 234/440] ice: introduce XDP_TX fallback path Under rare circumstances there might be a situation where a requirement of having XDP Tx queue per CPU could not be fulfilled and some of the Tx resources have to be shared between CPUs. This yields a need for placing accesses to xdp_ring inside a critical section protected by spinlock. These accesses happen to be in the hot path, so let's introduce the static branch that will be triggered from the control plane when driver could not provide Tx queue dedicated for XDP on each CPU. Currently, the design that has been picked is to allow any number of XDP Tx queues that is at least half of a count of CPUs that platform has. For lower number driver will bail out with a response to user that there were not enough Tx resources that would allow configuring XDP. The sharing of rings is signalled via static branch enablement which in turn indicates that lock for xdp_ring accesses needs to be taken in hot path. Approach based on static branch has no impact on performance of a non-fallback path. One thing that is needed to be mentioned is a fact that the static branch will act as a global driver switch, meaning that if one PF got out of Tx resources, then other PFs that ice driver is servicing will suffer. However, given the fact that HW that ice driver is handling has 1024 Tx queues per each PF, this is currently an unlikely scenario. Signed-off-by: Maciej Fijalkowski Tested-by: George Kuruvinakunnel Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice.h | 3 ++ drivers/net/ethernet/intel/ice/ice_lib.c | 4 +- drivers/net/ethernet/intel/ice/ice_main.c | 53 ++++++++++++++++--- drivers/net/ethernet/intel/ice/ice_txrx.c | 16 +++++- drivers/net/ethernet/intel/ice/ice_txrx.h | 1 + drivers/net/ethernet/intel/ice/ice_txrx_lib.c | 7 ++- 6 files changed, 75 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 35cf1865beb4..aeac6cd0e74f 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -167,6 +167,8 @@ enum ice_feature { ICE_F_MAX }; +DECLARE_STATIC_KEY_FALSE(ice_xdp_locking_key); + struct ice_txq_meta { u32 q_teid; /* Tx-scheduler element identifier */ u16 q_id; /* Entry in VSI's txq_map bitmap */ @@ -716,6 +718,7 @@ int ice_up(struct ice_vsi *vsi); int ice_down(struct ice_vsi *vsi); int ice_vsi_cfg(struct ice_vsi *vsi); struct ice_vsi *ice_lb_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi); +int ice_vsi_determine_xdp_res(struct ice_vsi *vsi); int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog); int ice_destroy_xdp_rings(struct ice_vsi *vsi); int diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 58fad5f82076..e4dc4435c8f5 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -3215,7 +3215,9 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi) ice_vsi_map_rings_to_vectors(vsi); if (ice_is_xdp_ena_vsi(vsi)) { - vsi->num_xdp_txq = num_possible_cpus(); + ret = ice_vsi_determine_xdp_res(vsi); + if (ret) + goto err_vectors; ret = ice_prepare_xdp_rings(vsi, vsi->xdp_prog); if (ret) goto err_vectors; diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 214eebb7bbd8..ccd9b9514001 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -44,6 +44,8 @@ MODULE_PARM_DESC(debug, "netif level (0=none,...,16=all)"); #endif /* !CONFIG_DYNAMIC_DEBUG */ static DEFINE_IDA(ice_aux_ida); +DEFINE_STATIC_KEY_FALSE(ice_xdp_locking_key); +EXPORT_SYMBOL(ice_xdp_locking_key); static struct workqueue_struct *ice_wq; static const struct net_device_ops ice_netdev_safe_mode_ops; @@ -2397,14 +2399,19 @@ static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi) goto free_xdp_rings; ice_set_ring_xdp(xdp_ring); xdp_ring->xsk_pool = ice_tx_xsk_pool(xdp_ring); + spin_lock_init(&xdp_ring->tx_lock); for (j = 0; j < xdp_ring->count; j++) { tx_desc = ICE_TX_DESC(xdp_ring, j); tx_desc->cmd_type_offset_bsz = cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE); } } - ice_for_each_rxq(vsi, i) - vsi->rx_rings[i]->xdp_ring = vsi->xdp_rings[i]; + ice_for_each_rxq(vsi, i) { + if (static_key_enabled(&ice_xdp_locking_key)) + vsi->rx_rings[i]->xdp_ring = vsi->xdp_rings[i % vsi->num_xdp_txq]; + else + vsi->rx_rings[i]->xdp_ring = vsi->xdp_rings[i]; + } return 0; @@ -2469,6 +2476,10 @@ int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog) if (__ice_vsi_get_qs(&xdp_qs_cfg)) goto err_map_xdp; + if (static_key_enabled(&ice_xdp_locking_key)) + netdev_warn(vsi->netdev, + "Could not allocate one XDP Tx ring per CPU, XDP_TX/XDP_REDIRECT actions will be slower\n"); + if (ice_xdp_alloc_setup_rings(vsi)) goto clear_xdp_rings; @@ -2585,6 +2596,9 @@ free_qmap: devm_kfree(ice_pf_to_dev(pf), vsi->xdp_rings); vsi->xdp_rings = NULL; + if (static_key_enabled(&ice_xdp_locking_key)) + static_branch_dec(&ice_xdp_locking_key); + if (ice_is_reset_in_progress(pf->state) || !vsi->q_vectors[0]) return 0; @@ -2619,6 +2633,29 @@ static void ice_vsi_rx_napi_schedule(struct ice_vsi *vsi) } } +/** + * ice_vsi_determine_xdp_res - figure out how many Tx qs can XDP have + * @vsi: VSI to determine the count of XDP Tx qs + * + * returns 0 if Tx qs count is higher than at least half of CPU count, + * -ENOMEM otherwise + */ +int ice_vsi_determine_xdp_res(struct ice_vsi *vsi) +{ + u16 avail = ice_get_avail_txq_count(vsi->back); + u16 cpus = num_possible_cpus(); + + if (avail < cpus / 2) + return -ENOMEM; + + vsi->num_xdp_txq = min_t(u16, avail, cpus); + + if (vsi->num_xdp_txq < cpus) + static_branch_inc(&ice_xdp_locking_key); + + return 0; +} + /** * ice_xdp_setup_prog - Add or remove XDP eBPF program * @vsi: VSI to setup XDP for @@ -2648,10 +2685,14 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog, } if (!ice_is_xdp_ena_vsi(vsi) && prog) { - vsi->num_xdp_txq = num_possible_cpus(); - xdp_ring_err = ice_prepare_xdp_rings(vsi, prog); - if (xdp_ring_err) - NL_SET_ERR_MSG_MOD(extack, "Setting up XDP Tx resources failed"); + xdp_ring_err = ice_vsi_determine_xdp_res(vsi); + if (xdp_ring_err) { + NL_SET_ERR_MSG_MOD(extack, "Not enough Tx resources for XDP"); + } else { + xdp_ring_err = ice_prepare_xdp_rings(vsi, prog); + if (xdp_ring_err) + NL_SET_ERR_MSG_MOD(extack, "Setting up XDP Tx resources failed"); + } } else if (ice_is_xdp_ena_vsi(vsi) && !prog) { xdp_ring_err = ice_destroy_xdp_rings(vsi); if (xdp_ring_err) diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 927517e18ce3..01ae331927bd 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -547,7 +547,11 @@ ice_run_xdp(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, case XDP_PASS: return ICE_XDP_PASS; case XDP_TX: + if (static_branch_unlikely(&ice_xdp_locking_key)) + spin_lock(&xdp_ring->tx_lock); err = ice_xmit_xdp_ring(xdp->data, xdp->data_end - xdp->data, xdp_ring); + if (static_branch_unlikely(&ice_xdp_locking_key)) + spin_unlock(&xdp_ring->tx_lock); if (err == ICE_XDP_CONSUMED) goto out_failure; return err; @@ -599,7 +603,14 @@ ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) return -EINVAL; - xdp_ring = vsi->xdp_rings[queue_index]; + if (static_branch_unlikely(&ice_xdp_locking_key)) { + queue_index %= vsi->num_xdp_txq; + xdp_ring = vsi->xdp_rings[queue_index]; + spin_lock(&xdp_ring->tx_lock); + } else { + xdp_ring = vsi->xdp_rings[queue_index]; + } + for (i = 0; i < n; i++) { struct xdp_frame *xdpf = frames[i]; int err; @@ -613,6 +624,9 @@ ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, if (unlikely(flags & XDP_XMIT_FLUSH)) ice_xdp_ring_update_tail(xdp_ring); + if (static_branch_unlikely(&ice_xdp_locking_key)) + spin_unlock(&xdp_ring->tx_lock); + return nxmit; } diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index 22de016adbca..c759a02bfce4 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -329,6 +329,7 @@ struct ice_tx_ring { struct rcu_head rcu; /* to avoid race on free */ DECLARE_BITMAP(xps_state, ICE_TX_NBITS); /* XPS Config State */ struct ice_ptp_tx *tx_tstamps; + spinlock_t tx_lock; u32 txq_teid; /* Added Tx queue TEID */ #define ICE_TX_FLAGS_RING_XDP BIT(0) u8 flags; diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c index d55db9cedc9b..1dd7e84f41f8 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c @@ -350,6 +350,11 @@ void ice_finalize_xdp_rx(struct ice_tx_ring *xdp_ring, unsigned int xdp_res) if (xdp_res & ICE_XDP_REDIR) xdp_do_flush_map(); - if (xdp_res & ICE_XDP_TX) + if (xdp_res & ICE_XDP_TX) { + if (static_branch_unlikely(&ice_xdp_locking_key)) + spin_lock(&xdp_ring->tx_lock); ice_xdp_ring_update_tail(xdp_ring); + if (static_branch_unlikely(&ice_xdp_locking_key)) + spin_unlock(&xdp_ring->tx_lock); + } } From 2faf63b650bb2f6f4549227406631f6004b14101 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Thu, 19 Aug 2021 14:00:04 +0200 Subject: [PATCH 235/440] ice: make use of ice_for_each_* macros Go through the code base and use ice_for_each_* macros. While at it, introduce ice_for_each_xdp_txq() macro that can be used for looping over xdp_rings array. Commit is not introducing any new functionality. Signed-off-by: Maciej Fijalkowski Tested-by: Gurucharan G Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice.h | 5 ++++- drivers/net/ethernet/intel/ice/ice_arfs.c | 2 +- drivers/net/ethernet/intel/ice/ice_dcb_lib.c | 4 ++-- drivers/net/ethernet/intel/ice/ice_ethtool.c | 10 ++++----- drivers/net/ethernet/intel/ice/ice_lib.c | 22 ++++++++++---------- drivers/net/ethernet/intel/ice/ice_main.c | 14 ++++++------- 6 files changed, 30 insertions(+), 27 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index aeac6cd0e74f..80f14886b5b1 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -125,10 +125,13 @@ #define ice_for_each_vsi(pf, i) \ for ((i) = 0; (i) < (pf)->num_alloc_vsi; (i)++) -/* Macros for each Tx/Rx ring in a VSI */ +/* Macros for each Tx/Xdp/Rx ring in a VSI */ #define ice_for_each_txq(vsi, i) \ for ((i) = 0; (i) < (vsi)->num_txq; (i)++) +#define ice_for_each_xdp_txq(vsi, i) \ + for ((i) = 0; (i) < (vsi)->num_xdp_txq; (i)++) + #define ice_for_each_rxq(vsi, i) \ for ((i) = 0; (i) < (vsi)->num_rxq; (i)++) diff --git a/drivers/net/ethernet/intel/ice/ice_arfs.c b/drivers/net/ethernet/intel/ice/ice_arfs.c index 3071b8e79499..5daade32ea62 100644 --- a/drivers/net/ethernet/intel/ice/ice_arfs.c +++ b/drivers/net/ethernet/intel/ice/ice_arfs.c @@ -614,7 +614,7 @@ int ice_set_cpu_rx_rmap(struct ice_vsi *vsi) return -EINVAL; base_idx = vsi->base_vector; - for (i = 0; i < vsi->num_q_vectors; i++) + ice_for_each_q_vector(vsi, i) if (irq_cpu_rmap_add(netdev->rx_cpu_rmap, pf->msix_entries[base_idx + i].vector)) { ice_free_cpu_rx_rmap(vsi); diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c index 4366626e0eb4..4284526e9e24 100644 --- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c @@ -201,11 +201,11 @@ void ice_vsi_cfg_dcb_rings(struct ice_vsi *vsi) if (!test_bit(ICE_FLAG_DCB_ENA, vsi->back->flags)) { /* Reset the TC information */ - for (i = 0; i < vsi->num_txq; i++) { + ice_for_each_txq(vsi, i) { tx_ring = vsi->tx_rings[i]; tx_ring->dcb_tc = 0; } - for (i = 0; i < vsi->num_rxq; i++) { + ice_for_each_rxq(vsi, i) { rx_ring = vsi->rx_rings[i]; rx_ring->dcb_tc = 0; } diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index 19ae045b7242..6e0af72c2020 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -2756,12 +2756,12 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring) /* set for the next time the netdev is started */ if (!netif_running(vsi->netdev)) { - for (i = 0; i < vsi->alloc_txq; i++) + ice_for_each_alloc_txq(vsi, i) vsi->tx_rings[i]->count = new_tx_cnt; - for (i = 0; i < vsi->alloc_rxq; i++) + ice_for_each_alloc_rxq(vsi, i) vsi->rx_rings[i]->count = new_rx_cnt; if (ice_is_xdp_ena_vsi(vsi)) - for (i = 0; i < vsi->num_xdp_txq; i++) + ice_for_each_xdp_txq(vsi, i) vsi->xdp_rings[i]->count = new_tx_cnt; vsi->num_tx_desc = (u16)new_tx_cnt; vsi->num_rx_desc = (u16)new_rx_cnt; @@ -2810,7 +2810,7 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring) goto free_tx; } - for (i = 0; i < vsi->num_xdp_txq; i++) { + ice_for_each_xdp_txq(vsi, i) { /* clone ring and setup updated count */ xdp_rings[i] = *vsi->xdp_rings[i]; xdp_rings[i].count = new_tx_cnt; @@ -2904,7 +2904,7 @@ process_link: } if (xdp_rings) { - for (i = 0; i < vsi->num_xdp_txq; i++) { + ice_for_each_xdp_txq(vsi, i) { ice_free_tx_ring(vsi->xdp_rings[i]); *vsi->xdp_rings[i] = xdp_rings[i]; } diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index e4dc4435c8f5..f981e77f72ad 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -46,12 +46,12 @@ static int ice_vsi_ctrl_all_rx_rings(struct ice_vsi *vsi, bool ena) int ret = 0; u16 i; - for (i = 0; i < vsi->num_rxq; i++) + ice_for_each_rxq(vsi, i) ice_vsi_ctrl_one_rx_ring(vsi, ena, i, false); ice_flush(&vsi->back->hw); - for (i = 0; i < vsi->num_rxq; i++) { + ice_for_each_rxq(vsi, i) { ret = ice_vsi_wait_one_rx_ring(vsi, ena, i); if (ret) break; @@ -639,12 +639,12 @@ static void ice_vsi_put_qs(struct ice_vsi *vsi) mutex_lock(&pf->avail_q_mutex); - for (i = 0; i < vsi->alloc_txq; i++) { + ice_for_each_alloc_txq(vsi, i) { clear_bit(vsi->txq_map[i], pf->avail_txqs); vsi->txq_map[i] = ICE_INVAL_Q_INDEX; } - for (i = 0; i < vsi->alloc_rxq; i++) { + ice_for_each_alloc_rxq(vsi, i) { clear_bit(vsi->rxq_map[i], pf->avail_rxqs); vsi->rxq_map[i] = ICE_INVAL_Q_INDEX; } @@ -1298,7 +1298,7 @@ static void ice_vsi_clear_rings(struct ice_vsi *vsi) } if (vsi->tx_rings) { - for (i = 0; i < vsi->alloc_txq; i++) { + ice_for_each_alloc_txq(vsi, i) { if (vsi->tx_rings[i]) { kfree_rcu(vsi->tx_rings[i], rcu); WRITE_ONCE(vsi->tx_rings[i], NULL); @@ -1306,7 +1306,7 @@ static void ice_vsi_clear_rings(struct ice_vsi *vsi) } } if (vsi->rx_rings) { - for (i = 0; i < vsi->alloc_rxq; i++) { + ice_for_each_alloc_rxq(vsi, i) { if (vsi->rx_rings[i]) { kfree_rcu(vsi->rx_rings[i], rcu); WRITE_ONCE(vsi->rx_rings[i], NULL); @@ -1327,7 +1327,7 @@ static int ice_vsi_alloc_rings(struct ice_vsi *vsi) dev = ice_pf_to_dev(pf); /* Allocate Tx rings */ - for (i = 0; i < vsi->alloc_txq; i++) { + ice_for_each_alloc_txq(vsi, i) { struct ice_tx_ring *ring; /* allocate with kzalloc(), free with kfree_rcu() */ @@ -1346,7 +1346,7 @@ static int ice_vsi_alloc_rings(struct ice_vsi *vsi) } /* Allocate Rx rings */ - for (i = 0; i < vsi->alloc_rxq; i++) { + ice_for_each_alloc_rxq(vsi, i) { struct ice_rx_ring *ring; /* allocate with kzalloc(), free with kfree_rcu() */ @@ -1857,7 +1857,7 @@ int ice_vsi_cfg_xdp_txqs(struct ice_vsi *vsi) if (ret) return ret; - for (i = 0; i < vsi->num_xdp_txq; i++) + ice_for_each_xdp_txq(vsi, i) vsi->xdp_rings[i]->xsk_pool = ice_tx_xsk_pool(vsi->xdp_rings[i]); return ret; @@ -1955,7 +1955,7 @@ void ice_vsi_cfg_msix(struct ice_vsi *vsi) u16 txq = 0, rxq = 0; int i, q; - for (i = 0; i < vsi->num_q_vectors; i++) { + ice_for_each_q_vector(vsi, i) { struct ice_q_vector *q_vector = vsi->q_vectors[i]; u16 reg_idx = q_vector->reg_idx; @@ -2649,7 +2649,7 @@ static void ice_vsi_release_msix(struct ice_vsi *vsi) u32 rxq = 0; int i, q; - for (i = 0; i < vsi->num_q_vectors; i++) { + ice_for_each_q_vector(vsi, i) { struct ice_q_vector *q_vector = vsi->q_vectors[i]; ice_write_intrl(q_vector, 0); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index ccd9b9514001..f531691a3e12 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -103,7 +103,7 @@ static void ice_check_for_hang_subtask(struct ice_pf *pf) hw = &vsi->back->hw; - for (i = 0; i < vsi->num_txq; i++) { + ice_for_each_txq(vsi, i) { struct ice_tx_ring *tx_ring = vsi->tx_rings[i]; if (tx_ring && tx_ring->desc) { @@ -2377,7 +2377,7 @@ static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi) struct ice_tx_desc *tx_desc; int i, j; - for (i = 0; i < vsi->num_xdp_txq; i++) { + ice_for_each_xdp_txq(vsi, i) { u16 xdp_q_idx = vsi->alloc_txq + i; struct ice_tx_ring *xdp_ring; @@ -2526,7 +2526,7 @@ int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog) return 0; clear_xdp_rings: - for (i = 0; i < vsi->num_xdp_txq; i++) + ice_for_each_xdp_txq(vsi, i) if (vsi->xdp_rings[i]) { kfree_rcu(vsi->xdp_rings[i], rcu); vsi->xdp_rings[i] = NULL; @@ -2534,7 +2534,7 @@ clear_xdp_rings: err_map_xdp: mutex_lock(&pf->avail_q_mutex); - for (i = 0; i < vsi->num_xdp_txq; i++) { + ice_for_each_xdp_txq(vsi, i) { clear_bit(vsi->txq_map[i + vsi->alloc_txq], pf->avail_txqs); vsi->txq_map[i + vsi->alloc_txq] = ICE_INVAL_Q_INDEX; } @@ -2579,13 +2579,13 @@ int ice_destroy_xdp_rings(struct ice_vsi *vsi) free_qmap: mutex_lock(&pf->avail_q_mutex); - for (i = 0; i < vsi->num_xdp_txq; i++) { + ice_for_each_xdp_txq(vsi, i) { clear_bit(vsi->txq_map[i + vsi->alloc_txq], pf->avail_txqs); vsi->txq_map[i + vsi->alloc_txq] = ICE_INVAL_Q_INDEX; } mutex_unlock(&pf->avail_q_mutex); - for (i = 0; i < vsi->num_xdp_txq; i++) + ice_for_each_xdp_txq(vsi, i) if (vsi->xdp_rings[i]) { if (vsi->xdp_rings[i]->desc) ice_free_tx_ring(vsi->xdp_rings[i]); @@ -7066,7 +7066,7 @@ static void ice_tx_timeout(struct net_device *netdev, unsigned int txqueue) } /* now that we have an index, find the tx_ring struct */ - for (i = 0; i < vsi->num_txq; i++) + ice_for_each_txq(vsi, i) if (vsi->tx_rings[i] && vsi->tx_rings[i]->desc) if (txqueue == vsi->tx_rings[i]->q_index) { tx_ring = vsi->tx_rings[i]; From 4b2c5fa9c9902ce34ecea6711558d9af96351b31 Mon Sep 17 00:00:00 2001 From: Amir Tzin Date: Wed, 21 Jul 2021 16:14:12 +0300 Subject: [PATCH 236/440] net/mlx5: Add layout to support default timeouts register Add needed structures and defines for DTOR (default timeouts register). This will be used to get timeouts values from FW instead of hard coded values in the driver code thus enabling support for slower devices which need longer timeouts. Signed-off-by: Amir Tzin Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- include/linux/mlx5/device.h | 4 +++- include/linux/mlx5/driver.h | 1 + include/linux/mlx5/mlx5_ifc.h | 37 ++++++++++++++++++++++++++++++++++- 3 files changed, 40 insertions(+), 2 deletions(-) diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 66eaf0aa7f69..109cc8106d16 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -577,7 +577,9 @@ struct mlx5_init_seg { __be32 rsvd1[120]; __be32 initializing; struct health_buffer health; - __be32 rsvd2[880]; + __be32 rsvd2[878]; + __be32 cmd_exec_to; + __be32 cmd_q_init_to; __be32 internal_timer_h; __be32 internal_timer_l; __be32 rsvd3[2]; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 0ca719c00824..ccbd87fbd3bf 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -156,6 +156,7 @@ enum { MLX5_REG_MIRC = 0x9162, MLX5_REG_SBCAM = 0xB01F, MLX5_REG_RESOURCE_DUMP = 0xC000, + MLX5_REG_DTOR = 0xC00E, }; enum mlx5_qpts_trust_state { diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 993204a6c1a1..b8bff5109656 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1306,7 +1306,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 vhca_resource_manager[0x1]; u8 hca_cap_2[0x1]; - u8 reserved_at_21[0x2]; + u8 reserved_at_21[0x1]; + u8 dtor[0x1]; u8 event_on_vhca_state_teardown_request[0x1]; u8 event_on_vhca_state_in_use[0x1]; u8 event_on_vhca_state_active[0x1]; @@ -2807,6 +2808,40 @@ struct mlx5_ifc_dropped_packet_logged_bits { u8 reserved_at_0[0xe0]; }; +struct mlx5_ifc_default_timeout_bits { + u8 to_multiplier[0x3]; + u8 reserved_at_3[0x9]; + u8 to_value[0x14]; +}; + +struct mlx5_ifc_dtor_reg_bits { + u8 reserved_at_0[0x20]; + + struct mlx5_ifc_default_timeout_bits pcie_toggle_to; + + u8 reserved_at_40[0x60]; + + struct mlx5_ifc_default_timeout_bits health_poll_to; + + struct mlx5_ifc_default_timeout_bits full_crdump_to; + + struct mlx5_ifc_default_timeout_bits fw_reset_to; + + struct mlx5_ifc_default_timeout_bits flush_on_err_to; + + struct mlx5_ifc_default_timeout_bits pci_sync_update_to; + + struct mlx5_ifc_default_timeout_bits tear_down_to; + + struct mlx5_ifc_default_timeout_bits fsm_reactivate_to; + + struct mlx5_ifc_default_timeout_bits reclaim_pages_to; + + struct mlx5_ifc_default_timeout_bits reclaim_vfs_pages_to; + + u8 reserved_at_1c0[0x40]; +}; + enum { MLX5_CQ_ERROR_SYNDROME_CQ_OVERRUN = 0x1, MLX5_CQ_ERROR_SYNDROME_CQ_ACCESS_VIOLATION_ERROR = 0x2, From 5945e1adeab527ec96c75a786213c146d4d482a4 Mon Sep 17 00:00:00 2001 From: Amir Tzin Date: Thu, 7 Oct 2021 18:00:27 +0300 Subject: [PATCH 237/440] net/mlx5: Read timeout values from init segment Replace hard coded timeouts with values stored in firmware's init segment. Timeouts are read from init segment during driver load. If init segment timeouts are not supported then fallback to hard coded defaults instead. Also move pre initialization timeouts which cannot be read from firmware to the new mechanism. Signed-off-by: Amir Tzin Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/Makefile | 2 +- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 18 +++- .../ethernet/mellanox/mlx5/core/lib/tout.c | 96 +++++++++++++++++++ .../ethernet/mellanox/mlx5/core/lib/tout.h | 28 ++++++ .../net/ethernet/mellanox/mlx5/core/main.c | 38 ++++---- include/linux/mlx5/driver.h | 5 +- 6 files changed, 161 insertions(+), 26 deletions(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 63032cd6efb1..a151575be51f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -17,7 +17,7 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ fs_counters.o fs_ft_pool.o rl.o lag.o dev.o events.o wq.o lib/gid.o \ lib/devcom.o lib/pci_vsc.o lib/dm.o lib/fs_ttc.o diag/fs_tracepoint.o \ diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o \ - fw_reset.o qos.o + fw_reset.o qos.o lib/tout.o # # Netdev basic diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 4dc3a822907a..f71ec4d9d68e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -45,6 +45,7 @@ #include "mlx5_core.h" #include "lib/eq.h" +#include "lib/tout.h" enum { CMD_IF_REV = 5, @@ -225,9 +226,13 @@ static void set_signature(struct mlx5_cmd_work_ent *ent, int csum) static void poll_timeout(struct mlx5_cmd_work_ent *ent) { - unsigned long poll_end = jiffies + msecs_to_jiffies(MLX5_CMD_TIMEOUT_MSEC + 1000); + struct mlx5_core_dev *dev = container_of(ent->cmd, struct mlx5_core_dev, cmd); + u64 cmd_to_ms = mlx5_tout_ms(dev, CMD); + unsigned long poll_end; u8 own; + poll_end = jiffies + msecs_to_jiffies(cmd_to_ms + 1000); + do { own = READ_ONCE(ent->lay->status_own); if (!(own & CMD_OWNER_HW)) { @@ -925,15 +930,18 @@ static void cmd_work_handler(struct work_struct *work) { struct mlx5_cmd_work_ent *ent = container_of(work, struct mlx5_cmd_work_ent, work); struct mlx5_cmd *cmd = ent->cmd; - struct mlx5_core_dev *dev = container_of(cmd, struct mlx5_core_dev, cmd); - unsigned long cb_timeout = msecs_to_jiffies(MLX5_CMD_TIMEOUT_MSEC); + bool poll_cmd = ent->polling; struct mlx5_cmd_layout *lay; + struct mlx5_core_dev *dev; + unsigned long cb_timeout; struct semaphore *sem; unsigned long flags; - bool poll_cmd = ent->polling; int alloc_ret; int cmd_mode; + dev = container_of(cmd, struct mlx5_core_dev, cmd); + cb_timeout = msecs_to_jiffies(mlx5_tout_ms(dev, CMD)); + complete(&ent->handling); sem = ent->page_queue ? &cmd->pages_sem : &cmd->sem; down(sem); @@ -1073,7 +1081,7 @@ static void wait_func_handle_exec_timeout(struct mlx5_core_dev *dev, static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent) { - unsigned long timeout = msecs_to_jiffies(MLX5_CMD_TIMEOUT_MSEC); + unsigned long timeout = msecs_to_jiffies(mlx5_tout_ms(dev, CMD)); struct mlx5_cmd *cmd = &dev->cmd; int err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c new file mode 100644 index 000000000000..ee266e0d122a --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c @@ -0,0 +1,96 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include +#include "lib/tout.h" + +struct mlx5_timeouts { + u64 to[MAX_TIMEOUT_TYPES]; +}; + +static const u32 tout_def_sw_val[MAX_TIMEOUT_TYPES] = { + [MLX5_TO_FW_PRE_INIT_TIMEOUT_MS] = 120000, + [MLX5_TO_FW_PRE_INIT_WARN_MESSAGE_INTERVAL_MS] = 20000, + [MLX5_TO_FW_PRE_INIT_WAIT_MS] = 2, + [MLX5_TO_FW_INIT_MS] = 2000, + [MLX5_TO_CMD_MS] = 60000 +}; + +static void tout_set(struct mlx5_core_dev *dev, u64 val, enum mlx5_timeouts_types type) +{ + dev->timeouts->to[type] = val; +} + +static void tout_set_def_val(struct mlx5_core_dev *dev) +{ + int i; + + for (i = MLX5_TO_FW_PRE_INIT_TIMEOUT_MS; i < MAX_TIMEOUT_TYPES; i++) + tout_set(dev, tout_def_sw_val[i], i); +} + +int mlx5_tout_init(struct mlx5_core_dev *dev) +{ + dev->timeouts = kmalloc(sizeof(*dev->timeouts), GFP_KERNEL); + if (!dev->timeouts) + return -ENOMEM; + + tout_set_def_val(dev); + return 0; +} + +void mlx5_tout_cleanup(struct mlx5_core_dev *dev) +{ + kfree(dev->timeouts); +} + +/* Time register consists of two fields to_multiplier(time out multiplier) + * and to_value(time out value). to_value is the quantity of the time units and + * to_multiplier is the type and should be one off these four values. + * 0x0: millisecond + * 0x1: seconds + * 0x2: minutes + * 0x3: hours + * this function converts the time stored in the two register fields into + * millisecond. + */ +static u64 tout_convert_reg_field_to_ms(u32 to_mul, u32 to_val) +{ + u64 msec = to_val; + + to_mul &= 0x3; + /* convert hours/minutes/seconds to miliseconds */ + if (to_mul) + msec *= 1000 * int_pow(60, to_mul - 1); + + return msec; +} + +static u64 tout_convert_iseg_to_ms(u32 iseg_to) +{ + return tout_convert_reg_field_to_ms(iseg_to >> 29, iseg_to & 0xfffff); +} + +static bool tout_is_supported(struct mlx5_core_dev *dev) +{ + return !!ioread32be(&dev->iseg->cmd_q_init_to); +} + +void mlx5_tout_query_iseg(struct mlx5_core_dev *dev) +{ + u32 to; + + if (!tout_is_supported(dev)) + return; + + to = ioread32be(&dev->iseg->cmd_q_init_to); + tout_set(dev, tout_convert_iseg_to_ms(to), MLX5_TO_FW_INIT_MS); + + to = ioread32be(&dev->iseg->cmd_exec_to); + tout_set(dev, tout_convert_iseg_to_ms(to), MLX5_TO_CMD_MS); +} + +u64 _mlx5_tout_ms(struct mlx5_core_dev *dev, enum mlx5_timeouts_types type) +{ + return dev->timeouts->to[type]; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h new file mode 100644 index 000000000000..7e6fc61c5b45 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef MLX5_TIMEOUTS_H +#define MLX5_TIMEOUTS_H + +enum mlx5_timeouts_types { + /* pre init timeouts (not read from FW) */ + MLX5_TO_FW_PRE_INIT_TIMEOUT_MS, + MLX5_TO_FW_PRE_INIT_WARN_MESSAGE_INTERVAL_MS, + MLX5_TO_FW_PRE_INIT_WAIT_MS, + + /* init segment timeouts */ + MLX5_TO_FW_INIT_MS, + MLX5_TO_CMD_MS, + + MAX_TIMEOUT_TYPES +}; + +struct mlx5_core_dev; +int mlx5_tout_init(struct mlx5_core_dev *dev); +void mlx5_tout_cleanup(struct mlx5_core_dev *dev); +void mlx5_tout_query_iseg(struct mlx5_core_dev *dev); +u64 _mlx5_tout_ms(struct mlx5_core_dev *dev, enum mlx5_timeouts_types type); + +#define mlx5_tout_ms(dev, type) _mlx5_tout_ms(dev, MLX5_TO_##type##_MS) + +# endif /* MLX5_TIMEOUTS_H */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 65313448a47c..b4893eac6ed6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -60,6 +60,7 @@ #include "devlink.h" #include "fw_reset.h" #include "lib/mlx5.h" +#include "lib/tout.h" #include "fpga/core.h" #include "fpga/ipsec.h" #include "accel/ipsec.h" @@ -176,11 +177,6 @@ static struct mlx5_profile profile[] = { }, }; -#define FW_INIT_TIMEOUT_MILI 2000 -#define FW_INIT_WAIT_MS 2 -#define FW_PRE_INIT_TIMEOUT_MILI 120000 -#define FW_INIT_WARN_MESSAGE_INTERVAL 20000 - static int fw_initializing(struct mlx5_core_dev *dev) { return ioread32be(&dev->iseg->initializing) >> 31; @@ -193,8 +189,6 @@ static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili, unsigned long end = jiffies + msecs_to_jiffies(max_wait_mili); int err = 0; - BUILD_BUG_ON(FW_PRE_INIT_TIMEOUT_MILI < FW_INIT_WARN_MESSAGE_INTERVAL); - while (fw_initializing(dev)) { if (time_after(jiffies, end)) { err = -EBUSY; @@ -205,7 +199,7 @@ static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili, jiffies_to_msecs(end - warn) / 1000); warn = jiffies + msecs_to_jiffies(warn_time_mili); } - msleep(FW_INIT_WAIT_MS); + msleep(mlx5_tout_ms(dev, FW_PRE_INIT_WAIT)); } return err; @@ -975,25 +969,34 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot) if (mlx5_core_is_pf(dev)) pcie_print_link_status(dev->pdev); + err = mlx5_tout_init(dev); + if (err) { + mlx5_core_err(dev, "Failed initializing timeouts, aborting\n"); + return err; + } + /* wait for firmware to accept initialization segments configurations */ - err = wait_fw_init(dev, FW_PRE_INIT_TIMEOUT_MILI, FW_INIT_WARN_MESSAGE_INTERVAL); + err = wait_fw_init(dev, mlx5_tout_ms(dev, FW_PRE_INIT_TIMEOUT), + mlx5_tout_ms(dev, FW_PRE_INIT_WARN_MESSAGE_INTERVAL)); if (err) { - mlx5_core_err(dev, "Firmware over %d MS in pre-initializing state, aborting\n", - FW_PRE_INIT_TIMEOUT_MILI); - return err; + mlx5_core_err(dev, "Firmware over %llu MS in pre-initializing state, aborting\n", + mlx5_tout_ms(dev, FW_PRE_INIT_TIMEOUT)); + goto err_tout_cleanup; } err = mlx5_cmd_init(dev); if (err) { mlx5_core_err(dev, "Failed initializing command interface, aborting\n"); - return err; + goto err_tout_cleanup; } - err = wait_fw_init(dev, FW_INIT_TIMEOUT_MILI, 0); + mlx5_tout_query_iseg(dev); + + err = wait_fw_init(dev, mlx5_tout_ms(dev, FW_INIT), 0); if (err) { - mlx5_core_err(dev, "Firmware over %d MS in initializing state, aborting\n", - FW_INIT_TIMEOUT_MILI); + mlx5_core_err(dev, "Firmware over %llu MS in initializing state, aborting\n", + mlx5_tout_ms(dev, FW_INIT)); goto err_cmd_cleanup; } @@ -1062,6 +1065,8 @@ err_disable_hca: err_cmd_cleanup: mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_DOWN); mlx5_cmd_cleanup(dev); +err_tout_cleanup: + mlx5_tout_cleanup(dev); return err; } @@ -1080,6 +1085,7 @@ static int mlx5_function_teardown(struct mlx5_core_dev *dev, bool boot) mlx5_core_disable_hca(dev, 0); mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_DOWN); mlx5_cmd_cleanup(dev); + mlx5_tout_cleanup(dev); return 0; } diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index ccbd87fbd3bf..fb06e8870aee 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -66,10 +66,6 @@ enum { }; enum { - /* one minute for the sake of bringup. Generally, commands must always - * complete and we may need to increase this timeout value - */ - MLX5_CMD_TIMEOUT_MSEC = 60 * 1000, MLX5_CMD_WQ_MAX_NAME = 32, }; @@ -755,6 +751,7 @@ struct mlx5_core_dev { u32 qcam[MLX5_ST_SZ_DW(qcam_reg)]; u8 embedded_cpu; } caps; + struct mlx5_timeouts *timeouts; u64 sys_image_guid; phys_addr_t iseg_base; struct mlx5_init_seg __iomem *iseg; From 32def4120e4876b5367ad58eb3a641bf6915979b Mon Sep 17 00:00:00 2001 From: Amir Tzin Date: Wed, 13 Oct 2021 09:07:13 +0300 Subject: [PATCH 238/440] net/mlx5: Read timeout values from DTOR Replace hard coded timeouts with values stored by firmware in default timeouts register (DTOR). Timeouts are read during driver load. If DTOR is not supported by firmware then fallback to hard coded defaults instead. Signed-off-by: Amir Tzin Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/en/health.h | 1 - .../mellanox/mlx5/core/en/reporter_rx.c | 7 +- .../mellanox/mlx5/core/en/reporter_tx.c | 7 +- drivers/net/ethernet/mellanox/mlx5/core/fw.c | 9 +-- .../ethernet/mellanox/mlx5/core/fw_reset.c | 16 ++--- .../net/ethernet/mellanox/mlx5/core/health.c | 21 +++--- .../ethernet/mellanox/mlx5/core/lib/tout.c | 68 ++++++++++++++++++- .../ethernet/mellanox/mlx5/core/lib/tout.h | 13 ++++ .../net/ethernet/mellanox/mlx5/core/main.c | 6 ++ .../ethernet/mellanox/mlx5/core/pagealloc.c | 16 ++--- 10 files changed, 124 insertions(+), 40 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h index 018262d0164b..d5b7110a4265 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h @@ -32,7 +32,6 @@ void mlx5e_reporter_rq_cqe_err(struct mlx5e_rq *rq); void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq); #define MLX5E_REPORTER_PER_Q_MAX_LEN 256 -#define MLX5E_REPORTER_FLUSH_TIMEOUT_MSEC 2000 struct mlx5e_err_ctx { int (*recover)(void *ctx); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c index 0eb125316fe2..74086eb556ae 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c @@ -6,6 +6,7 @@ #include "txrx.h" #include "devlink.h" #include "ptp.h" +#include "lib/tout.h" static int mlx5e_query_rq_state(struct mlx5_core_dev *dev, u32 rqn, u8 *state) { @@ -32,8 +33,10 @@ out: static int mlx5e_wait_for_icosq_flush(struct mlx5e_icosq *icosq) { - unsigned long exp_time = jiffies + - msecs_to_jiffies(MLX5E_REPORTER_FLUSH_TIMEOUT_MSEC); + struct mlx5_core_dev *dev = icosq->channel->mdev; + unsigned long exp_time; + + exp_time = jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, FLUSH_ON_ERROR)); while (time_before(jiffies, exp_time)) { if (icosq->cc == icosq->pc) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c index bb682fd751c9..4f4bc8726ec4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c @@ -4,11 +4,14 @@ #include "health.h" #include "en/ptp.h" #include "en/devlink.h" +#include "lib/tout.h" static int mlx5e_wait_for_sq_flush(struct mlx5e_txqsq *sq) { - unsigned long exp_time = jiffies + - msecs_to_jiffies(MLX5E_REPORTER_FLUSH_TIMEOUT_MSEC); + struct mlx5_core_dev *dev = sq->mdev; + unsigned long exp_time; + + exp_time = jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, FLUSH_ON_ERROR)); while (time_before(jiffies, exp_time)) { if (sq->cc == sq->pc) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c index 016d26f809a5..f4f8993eac17 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -35,6 +35,7 @@ #include #include "mlx5_core.h" #include "../../mlxfw/mlxfw.h" +#include "lib/tout.h" #include "accel/tls.h" enum { @@ -317,10 +318,9 @@ int mlx5_cmd_force_teardown_hca(struct mlx5_core_dev *dev) return 0; } -#define MLX5_FAST_TEARDOWN_WAIT_MS 3000 int mlx5_cmd_fast_teardown_hca(struct mlx5_core_dev *dev) { - unsigned long end, delay_ms = MLX5_FAST_TEARDOWN_WAIT_MS; + unsigned long end, delay_ms = mlx5_tout_ms(dev, TEARDOWN); u32 out[MLX5_ST_SZ_DW(teardown_hca_out)] = {}; u32 in[MLX5_ST_SZ_DW(teardown_hca_in)] = {}; int state; @@ -618,17 +618,18 @@ static void mlx5_fsm_release(struct mlxfw_dev *mlxfw_dev, u32 fwhandle) fwhandle, 0); } -#define MLX5_FSM_REACTIVATE_TOUT 5000 /* msecs */ static int mlx5_fsm_reactivate(struct mlxfw_dev *mlxfw_dev, u8 *status) { - unsigned long exp_time = jiffies + msecs_to_jiffies(MLX5_FSM_REACTIVATE_TOUT); struct mlx5_mlxfw_dev *mlx5_mlxfw_dev = container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev); struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev; u32 out[MLX5_ST_SZ_DW(mirc_reg)]; u32 in[MLX5_ST_SZ_DW(mirc_reg)]; + unsigned long exp_time; int err; + exp_time = jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, FSM_REACTIVATE)); + if (!MLX5_CAP_MCAM_REG2(dev, mirc)) return -EOPNOTSUPP; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c index 106b50e42b46..eaca79cc7b9d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c @@ -3,6 +3,7 @@ #include "fw_reset.h" #include "diag/fw_tracer.h" +#include "lib/tout.h" enum { MLX5_FW_RESET_FLAGS_RESET_REQUESTED, @@ -228,8 +229,6 @@ static void mlx5_sync_reset_request_event(struct work_struct *work) mlx5_core_warn(dev, "PCI Sync FW Update Reset Ack. Device reset is expected.\n"); } -#define MLX5_PCI_LINK_UP_TIMEOUT 2000 - static int mlx5_pci_link_toggle(struct mlx5_core_dev *dev) { struct pci_bus *bridge_bus = dev->pdev->bus; @@ -286,7 +285,7 @@ static int mlx5_pci_link_toggle(struct mlx5_core_dev *dev) goto restore; } - timeout = jiffies + msecs_to_jiffies(MLX5_PCI_LINK_UP_TIMEOUT); + timeout = jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, PCI_TOGGLE)); do { err = pci_read_config_word(bridge, cap + PCI_EXP_LNKSTA, ®16); if (err) @@ -299,8 +298,8 @@ static int mlx5_pci_link_toggle(struct mlx5_core_dev *dev) if (reg16 & PCI_EXP_LNKSTA_DLLLA) { mlx5_core_info(dev, "PCI Link up\n"); } else { - mlx5_core_err(dev, "PCI link not ready (0x%04x) after %d ms\n", - reg16, MLX5_PCI_LINK_UP_TIMEOUT); + mlx5_core_err(dev, "PCI link not ready (0x%04x) after %llu ms\n", + reg16, mlx5_tout_ms(dev, PCI_TOGGLE)); err = -ETIMEDOUT; } @@ -395,16 +394,15 @@ static int fw_reset_event_notifier(struct notifier_block *nb, unsigned long acti return NOTIFY_OK; } -#define MLX5_FW_RESET_TIMEOUT_MSEC 5000 int mlx5_fw_reset_wait_reset_done(struct mlx5_core_dev *dev) { - unsigned long timeout = msecs_to_jiffies(MLX5_FW_RESET_TIMEOUT_MSEC); + unsigned long timeout = msecs_to_jiffies(mlx5_tout_ms(dev, PCI_SYNC_UPDATE)); struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; int err; if (!wait_for_completion_timeout(&fw_reset->done, timeout)) { - mlx5_core_warn(dev, "FW sync reset timeout after %d seconds\n", - MLX5_FW_RESET_TIMEOUT_MSEC / 1000); + mlx5_core_warn(dev, "FW sync reset timeout after %llu seconds\n", + mlx5_tout_ms(dev, PCI_SYNC_UPDATE) / 1000); err = -ETIMEDOUT; goto out; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 037e18dd4be0..6a4dd7f78958 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -40,10 +40,10 @@ #include "lib/eq.h" #include "lib/mlx5.h" #include "lib/pci_vsc.h" +#include "lib/tout.h" #include "diag/fw_tracer.h" enum { - MLX5_HEALTH_POLL_INTERVAL = 2 * HZ, MAX_MISSES = 3, }; @@ -219,11 +219,9 @@ unlock: mutex_unlock(&dev->intf_state_mutex); } -#define MLX5_CRDUMP_WAIT_MS 60000 -#define MLX5_FW_RESET_WAIT_MS 1000 void mlx5_error_sw_reset(struct mlx5_core_dev *dev) { - unsigned long end, delay_ms = MLX5_FW_RESET_WAIT_MS; + unsigned long end, delay_ms = mlx5_tout_ms(dev, PCI_TOGGLE); int lock = -EBUSY; mutex_lock(&dev->intf_state_mutex); @@ -237,7 +235,7 @@ void mlx5_error_sw_reset(struct mlx5_core_dev *dev) lock = lock_sem_sw_reset(dev, true); if (lock == -EBUSY) { - delay_ms = MLX5_CRDUMP_WAIT_MS; + delay_ms = mlx5_tout_ms(dev, FULL_CRDUMP); goto recover_from_sw_reset; } /* Execute SW reset */ @@ -307,13 +305,11 @@ static void mlx5_handle_bad_state(struct mlx5_core_dev *dev) mlx5_disable_device(dev); } -/* How much time to wait until health resetting the driver (in msecs) */ -#define MLX5_RECOVERY_WAIT_MSECS 60000 int mlx5_health_wait_pci_up(struct mlx5_core_dev *dev) { unsigned long end; - end = jiffies + msecs_to_jiffies(MLX5_RECOVERY_WAIT_MSECS); + end = jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, FW_RESET)); while (sensor_pci_not_working(dev)) { if (time_after(jiffies, end)) return -ETIMEDOUT; @@ -674,13 +670,13 @@ static void mlx5_fw_reporters_destroy(struct mlx5_core_dev *dev) devlink_health_reporter_destroy(health->fw_fatal_reporter); } -static unsigned long get_next_poll_jiffies(void) +static unsigned long get_next_poll_jiffies(struct mlx5_core_dev *dev) { unsigned long next; get_random_bytes(&next, sizeof(next)); next %= HZ; - next += jiffies + MLX5_HEALTH_POLL_INTERVAL; + next += jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, HEALTH_POLL_INTERVAL)); return next; } @@ -740,11 +736,12 @@ static void poll_health(struct timer_list *t) queue_work(health->wq, &health->report_work); out: - mod_timer(&health->timer, get_next_poll_jiffies()); + mod_timer(&health->timer, get_next_poll_jiffies(dev)); } void mlx5_start_health_poll(struct mlx5_core_dev *dev) { + u64 poll_interval_ms = mlx5_tout_ms(dev, HEALTH_POLL_INTERVAL); struct mlx5_core_health *health = &dev->priv.health; timer_setup(&health->timer, poll_health, 0); @@ -753,7 +750,7 @@ void mlx5_start_health_poll(struct mlx5_core_dev *dev) health->health = &dev->iseg->health; health->health_counter = &dev->iseg->health_counter; - health->timer.expires = round_jiffies(jiffies + MLX5_HEALTH_POLL_INTERVAL); + health->timer.expires = jiffies + msecs_to_jiffies(poll_interval_ms); add_timer(&health->timer); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c index ee266e0d122a..0dd96a6b140d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c @@ -13,7 +13,17 @@ static const u32 tout_def_sw_val[MAX_TIMEOUT_TYPES] = { [MLX5_TO_FW_PRE_INIT_WARN_MESSAGE_INTERVAL_MS] = 20000, [MLX5_TO_FW_PRE_INIT_WAIT_MS] = 2, [MLX5_TO_FW_INIT_MS] = 2000, - [MLX5_TO_CMD_MS] = 60000 + [MLX5_TO_CMD_MS] = 60000, + [MLX5_TO_PCI_TOGGLE_MS] = 2000, + [MLX5_TO_HEALTH_POLL_INTERVAL_MS] = 2000, + [MLX5_TO_FULL_CRDUMP_MS] = 60000, + [MLX5_TO_FW_RESET_MS] = 60000, + [MLX5_TO_FLUSH_ON_ERROR_MS] = 2000, + [MLX5_TO_PCI_SYNC_UPDATE_MS] = 5000, + [MLX5_TO_TEARDOWN_MS] = 3000, + [MLX5_TO_FSM_REACTIVATE_MS] = 5000, + [MLX5_TO_RECLAIM_PAGES_MS] = 5000, + [MLX5_TO_RECLAIM_VFS_PAGES_MS] = 120000 }; static void tout_set(struct mlx5_core_dev *dev, u64 val, enum mlx5_timeouts_types type) @@ -94,3 +104,59 @@ u64 _mlx5_tout_ms(struct mlx5_core_dev *dev, enum mlx5_timeouts_types type) { return dev->timeouts->to[type]; } + +#define MLX5_TIMEOUT_QUERY(fld, reg_out) \ + ({ \ + struct mlx5_ifc_default_timeout_bits *time_field; \ + u32 to_multi, to_value; \ + u64 to_val_ms; \ + \ + time_field = MLX5_ADDR_OF(dtor_reg, reg_out, fld); \ + to_multi = MLX5_GET(default_timeout, time_field, to_multiplier); \ + to_value = MLX5_GET(default_timeout, time_field, to_value); \ + to_val_ms = tout_convert_reg_field_to_ms(to_multi, to_value); \ + to_val_ms; \ + }) + +#define MLX5_TIMEOUT_FILL(fld, reg_out, dev, to_type, to_extra) \ + ({ \ + u64 fw_to = MLX5_TIMEOUT_QUERY(fld, reg_out); \ + tout_set(dev, fw_to + (to_extra), to_type); \ + fw_to; \ + }) + +static int tout_query_dtor(struct mlx5_core_dev *dev) +{ + u64 pcie_toggle_to_val, tear_down_to_val; + u32 out[MLX5_ST_SZ_DW(dtor_reg)] = {}; + u32 in[MLX5_ST_SZ_DW(dtor_reg)] = {}; + int err; + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_REG_DTOR, 0, 0); + if (err) + return err; + + pcie_toggle_to_val = MLX5_TIMEOUT_FILL(pcie_toggle_to, out, dev, MLX5_TO_PCI_TOGGLE_MS, 0); + MLX5_TIMEOUT_FILL(fw_reset_to, out, dev, MLX5_TO_FW_RESET_MS, pcie_toggle_to_val); + + tear_down_to_val = MLX5_TIMEOUT_FILL(tear_down_to, out, dev, MLX5_TO_TEARDOWN_MS, 0); + MLX5_TIMEOUT_FILL(pci_sync_update_to, out, dev, MLX5_TO_PCI_SYNC_UPDATE_MS, + tear_down_to_val); + + MLX5_TIMEOUT_FILL(health_poll_to, out, dev, MLX5_TO_HEALTH_POLL_INTERVAL_MS, 0); + MLX5_TIMEOUT_FILL(full_crdump_to, out, dev, MLX5_TO_FULL_CRDUMP_MS, 0); + MLX5_TIMEOUT_FILL(flush_on_err_to, out, dev, MLX5_TO_FLUSH_ON_ERROR_MS, 0); + MLX5_TIMEOUT_FILL(fsm_reactivate_to, out, dev, MLX5_TO_FSM_REACTIVATE_MS, 0); + MLX5_TIMEOUT_FILL(reclaim_pages_to, out, dev, MLX5_TO_RECLAIM_PAGES_MS, 0); + MLX5_TIMEOUT_FILL(reclaim_vfs_pages_to, out, dev, MLX5_TO_RECLAIM_VFS_PAGES_MS, 0); + + return 0; +} + +int mlx5_tout_query_dtor(struct mlx5_core_dev *dev) +{ + if (tout_is_supported(dev)) + return tout_query_dtor(dev); + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h index 7e6fc61c5b45..31faa5c17aa9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h @@ -14,6 +14,18 @@ enum mlx5_timeouts_types { MLX5_TO_FW_INIT_MS, MLX5_TO_CMD_MS, + /* DTOR timeouts */ + MLX5_TO_PCI_TOGGLE_MS, + MLX5_TO_HEALTH_POLL_INTERVAL_MS, + MLX5_TO_FULL_CRDUMP_MS, + MLX5_TO_FW_RESET_MS, + MLX5_TO_FLUSH_ON_ERROR_MS, + MLX5_TO_PCI_SYNC_UPDATE_MS, + MLX5_TO_TEARDOWN_MS, + MLX5_TO_FSM_REACTIVATE_MS, + MLX5_TO_RECLAIM_PAGES_MS, + MLX5_TO_RECLAIM_VFS_PAGES_MS, + MAX_TIMEOUT_TYPES }; @@ -21,6 +33,7 @@ struct mlx5_core_dev; int mlx5_tout_init(struct mlx5_core_dev *dev); void mlx5_tout_cleanup(struct mlx5_core_dev *dev); void mlx5_tout_query_iseg(struct mlx5_core_dev *dev); +int mlx5_tout_query_dtor(struct mlx5_core_dev *dev); u64 _mlx5_tout_ms(struct mlx5_core_dev *dev, enum mlx5_timeouts_types type); #define mlx5_tout_ms(dev, type) _mlx5_tout_ms(dev, MLX5_TO_##type##_MS) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index b4893eac6ed6..75d284272119 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1020,6 +1020,12 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot) goto err_disable_hca; } + err = mlx5_tout_query_dtor(dev); + if (err) { + mlx5_core_err(dev, "failed to read dtor\n"); + goto reclaim_boot_pages; + } + err = set_hca_ctrl(dev); if (err) { mlx5_core_err(dev, "set_hca_ctrl failed\n"); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c index 110c0837f95b..f6b5451328fc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -38,6 +38,7 @@ #include #include "mlx5_core.h" #include "lib/eq.h" +#include "lib/tout.h" enum { MLX5_PAGES_CANT_GIVE = 0, @@ -64,11 +65,6 @@ struct fw_page { unsigned int free_count; }; -enum { - MAX_RECLAIM_TIME_MSECS = 5000, - MAX_RECLAIM_VFS_PAGES_TIME_MSECS = 2 * 1000 * 60, -}; - enum { MLX5_MAX_RECLAIM_TIME_MILI = 5000, MLX5_NUM_4K_IN_PAGE = PAGE_SIZE / MLX5_ADAPTER_PAGE_SIZE, @@ -641,7 +637,8 @@ static int optimal_reclaimed_pages(void) static int mlx5_reclaim_root_pages(struct mlx5_core_dev *dev, struct rb_root *root, u16 func_id) { - unsigned long end = jiffies + msecs_to_jiffies(MAX_RECLAIM_TIME_MSECS); + u64 recl_pages_to_jiffies = msecs_to_jiffies(mlx5_tout_ms(dev, RECLAIM_PAGES)); + unsigned long end = jiffies + recl_pages_to_jiffies; while (!RB_EMPTY_ROOT(root)) { int nclaimed; @@ -656,7 +653,7 @@ static int mlx5_reclaim_root_pages(struct mlx5_core_dev *dev, } if (nclaimed) - end = jiffies + msecs_to_jiffies(MAX_RECLAIM_TIME_MSECS); + end = jiffies + recl_pages_to_jiffies; if (time_after(jiffies, end)) { mlx5_core_warn(dev, "FW did not return all pages. giving up...\n"); @@ -727,7 +724,8 @@ void mlx5_pagealloc_stop(struct mlx5_core_dev *dev) int mlx5_wait_for_pages(struct mlx5_core_dev *dev, int *pages) { - unsigned long end = jiffies + msecs_to_jiffies(MAX_RECLAIM_VFS_PAGES_TIME_MSECS); + u64 recl_vf_pages_to_jiffies = msecs_to_jiffies(mlx5_tout_ms(dev, RECLAIM_VFS_PAGES)); + unsigned long end = jiffies + recl_vf_pages_to_jiffies; int prev_pages = *pages; /* In case of internal error we will free the pages manually later */ @@ -743,7 +741,7 @@ int mlx5_wait_for_pages(struct mlx5_core_dev *dev, int *pages) return -ETIMEDOUT; } if (*pages < prev_pages) { - end = jiffies + msecs_to_jiffies(MAX_RECLAIM_VFS_PAGES_TIME_MSECS); + end = jiffies + recl_vf_pages_to_jiffies; prev_pages = *pages; } msleep(50); From 17ac528d886841a0b183f4d4a0205176eccfd158 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Tue, 12 Oct 2021 14:49:15 +0300 Subject: [PATCH 239/440] net/mlx5: Bridge, provide flow source hints Currently, SMFS mode doesn't support rx-loopback flows which causes bridge egress rules to be rejected because without hint rules for both rx and tx destinations are created by default. Provide explicit flow source hints for compatibility with SMFS. Signed-off-by: Vlad Buslov Reviewed-by: Roi Dayan Reviewed-by: Yevgeny Kliteynik Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c index ed72246d1d83..588622ba38c1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c @@ -677,6 +677,10 @@ mlx5_esw_bridge_egress_flow_create(u16 vport_num, u16 esw_owner_vhca_id, const u if (!rule_spec) return ERR_PTR(-ENOMEM); + if (MLX5_CAP_ESW_FLOWTABLE(bridge->br_offloads->esw->dev, flow_source) && + vport_num == MLX5_VPORT_UPLINK) + rule_spec->flow_context.flow_source = + MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT; rule_spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; dmac_v = MLX5_ADDR_OF(fte_match_param, rule_spec->match_value, From 9fbe1c25ecca87f1c390d1bfd347df92749941c5 Mon Sep 17 00:00:00 2001 From: Moosa Baransi Date: Sun, 26 Sep 2021 17:59:52 +0300 Subject: [PATCH 240/440] net/mlx5i: Enable Rx steering for IPoIB via ethtool Enable steering IPoIB packets via ethtool, the same way it is done today for Ethernet packets. Signed-off-by: Moosa Baransi Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en/fs.h | 8 ++--- .../ethernet/mellanox/mlx5/core/en_ethtool.c | 6 ++-- .../mellanox/mlx5/core/en_fs_ethtool.c | 6 ++-- .../mellanox/mlx5/core/ipoib/ethtool.c | 30 +++++++++++++++++++ .../ethernet/mellanox/mlx5/core/ipoib/ipoib.c | 3 ++ 5 files changed, 43 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h index 41684a6c44e9..1c23453a041d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h @@ -125,15 +125,15 @@ struct mlx5e_ethtool_steering { void mlx5e_ethtool_init_steering(struct mlx5e_priv *priv); void mlx5e_ethtool_cleanup_steering(struct mlx5e_priv *priv); -int mlx5e_ethtool_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd); -int mlx5e_ethtool_get_rxnfc(struct net_device *dev, +int mlx5e_ethtool_set_rxnfc(struct mlx5e_priv *priv, struct ethtool_rxnfc *cmd); +int mlx5e_ethtool_get_rxnfc(struct mlx5e_priv *priv, struct ethtool_rxnfc *info, u32 *rule_locs); #else static inline void mlx5e_ethtool_init_steering(struct mlx5e_priv *priv) { } static inline void mlx5e_ethtool_cleanup_steering(struct mlx5e_priv *priv) { } -static inline int mlx5e_ethtool_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) +static inline int mlx5e_ethtool_set_rxnfc(struct mlx5e_priv *priv, struct ethtool_rxnfc *cmd) { return -EOPNOTSUPP; } -static inline int mlx5e_ethtool_get_rxnfc(struct net_device *dev, +static inline int mlx5e_ethtool_get_rxnfc(struct mlx5e_priv *priv, struct ethtool_rxnfc *info, u32 *rule_locs) { return -EOPNOTSUPP; } #endif /* CONFIG_MLX5_EN_RXNFC */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 7a97e0e21fd7..25926e581d18 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -2137,12 +2137,14 @@ int mlx5e_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, return 0; } - return mlx5e_ethtool_get_rxnfc(dev, info, rule_locs); + return mlx5e_ethtool_get_rxnfc(priv, info, rule_locs); } int mlx5e_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) { - return mlx5e_ethtool_set_rxnfc(dev, cmd); + struct mlx5e_priv *priv = netdev_priv(dev); + + return mlx5e_ethtool_set_rxnfc(priv, cmd); } static int query_port_status_opcode(struct mlx5_core_dev *mdev, u32 *status_opcode) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c index 03693fa74a70..81ebf281cdb4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c @@ -937,9 +937,8 @@ static int mlx5e_get_rss_hash_opt(struct mlx5e_priv *priv, return 0; } -int mlx5e_ethtool_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) +int mlx5e_ethtool_set_rxnfc(struct mlx5e_priv *priv, struct ethtool_rxnfc *cmd) { - struct mlx5e_priv *priv = netdev_priv(dev); int err = 0; switch (cmd->cmd) { @@ -960,10 +959,9 @@ int mlx5e_ethtool_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) return err; } -int mlx5e_ethtool_get_rxnfc(struct net_device *dev, +int mlx5e_ethtool_get_rxnfc(struct mlx5e_priv *priv, struct ethtool_rxnfc *info, u32 *rule_locs) { - struct mlx5e_priv *priv = netdev_priv(dev); int err = 0; switch (info->cmd) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c index 0c8594c7df21..ee0eb4a4b819 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c @@ -33,6 +33,11 @@ #include "en.h" #include "ipoib.h" +static u32 mlx5i_flow_type_mask(u32 flow_type) +{ + return flow_type & ~(FLOW_EXT | FLOW_MAC_EXT | FLOW_RSS); +} + static void mlx5i_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *drvinfo) { @@ -217,6 +222,27 @@ static int mlx5i_get_link_ksettings(struct net_device *netdev, return 0; } +#ifdef CONFIG_MLX5_EN_RXNFC +static int mlx5i_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) +{ + struct mlx5e_priv *priv = mlx5i_epriv(dev); + struct ethtool_rx_flow_spec *fs = &cmd->fs; + + if (mlx5i_flow_type_mask(fs->flow_type) == ETHER_FLOW) + return -EINVAL; + + return mlx5e_ethtool_set_rxnfc(priv, cmd); +} + +static int mlx5i_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, + u32 *rule_locs) +{ + struct mlx5e_priv *priv = mlx5i_epriv(dev); + + return mlx5e_ethtool_get_rxnfc(priv, info, rule_locs); +} +#endif + const struct ethtool_ops mlx5i_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_USECS | ETHTOOL_COALESCE_MAX_FRAMES | @@ -233,6 +259,10 @@ const struct ethtool_ops mlx5i_ethtool_ops = { .get_coalesce = mlx5i_get_coalesce, .set_coalesce = mlx5i_set_coalesce, .get_ts_info = mlx5i_get_ts_info, +#ifdef CONFIG_MLX5_EN_RXNFC + .get_rxnfc = mlx5i_get_rxnfc, + .set_rxnfc = mlx5i_set_rxnfc, +#endif .get_link_ksettings = mlx5i_get_link_ksettings, .get_link = ethtool_op_get_link, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index f7ebc1f9283f..3b8d8ada1a01 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -336,6 +336,8 @@ static int mlx5i_create_flow_steering(struct mlx5e_priv *priv) goto err_destroy_arfs_tables; } + mlx5e_ethtool_init_steering(priv); + return 0; err_destroy_arfs_tables: @@ -348,6 +350,7 @@ static void mlx5i_destroy_flow_steering(struct mlx5e_priv *priv) { mlx5e_destroy_ttc_table(priv); mlx5e_arfs_destroy_tables(priv); + mlx5e_ethtool_cleanup_steering(priv); } static int mlx5i_init_rx(struct mlx5e_priv *priv) From fbfa97b4d79f26042f188b84959065213e9d3e99 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Wed, 18 Aug 2021 13:21:30 +0300 Subject: [PATCH 241/440] net/mlx5: Disable roce at HCA level Currently, when a user disables roce via the devlink param, this change isn't passed down to the device. If device allows disabling RoCE at device level, make use of it. This instructs the device to skip memory allocations related to RoCE functionality which otherwise is done by the device. Signed-off-by: Shay Drory Reviewed-by: Parav Pandit Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/devlink.c | 3 ++- .../net/ethernet/mellanox/mlx5/core/main.c | 25 ++++++++++++++++++- include/linux/mlx5/driver.h | 9 ++++--- include/linux/mlx5/mlx5_ifc.h | 3 ++- 4 files changed, 33 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index a85341a41cd0..1c98652b244a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -454,7 +454,8 @@ static int mlx5_devlink_enable_roce_validate(struct devlink *devlink, u32 id, struct mlx5_core_dev *dev = devlink_priv(devlink); bool new_state = val.vbool; - if (new_state && !MLX5_CAP_GEN(dev, roce)) { + if (new_state && !MLX5_CAP_GEN(dev, roce) && + !MLX5_CAP_GEN(dev, roce_rw_supported)) { NL_SET_ERR_MSG_MOD(extack, "Device doesn't support RoCE"); return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 75d284272119..47d92fb459ed 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -558,15 +558,38 @@ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx) MLX5_SET(cmd_hca_cap, set_hca_cap, num_total_dynamic_vf_msix, MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix)); + if (MLX5_CAP_GEN(dev, roce_rw_supported)) + MLX5_SET(cmd_hca_cap, set_hca_cap, roce, mlx5_is_roce_init_enabled(dev)); + return set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE); } +/* Cached MLX5_CAP_GEN(dev, roce) can be out of sync this early in the + * boot process. + * In case RoCE cap is writable in FW and user/devlink requested to change the + * cap, we are yet to query the final state of the above cap. + * Hence, the need for this function. + * + * Returns + * True: + * 1) RoCE cap is read only in FW and already disabled + * OR: + * 2) RoCE cap is writable in FW and user/devlink requested it off. + * + * In any other case, return False. + */ +static bool is_roce_fw_disabled(struct mlx5_core_dev *dev) +{ + return (MLX5_CAP_GEN(dev, roce_rw_supported) && !mlx5_is_roce_init_enabled(dev)) || + (!MLX5_CAP_GEN(dev, roce_rw_supported) && !MLX5_CAP_GEN(dev, roce)); +} + static int handle_hca_cap_roce(struct mlx5_core_dev *dev, void *set_ctx) { void *set_hca_cap; int err; - if (!MLX5_CAP_GEN(dev, roce)) + if (is_roce_fw_disabled(dev)) return 0; err = mlx5_core_get_caps(dev, MLX5_CAP_ROCE); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index fb06e8870aee..7c8b5f06c2cd 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1251,11 +1251,12 @@ static inline bool mlx5_is_roce_init_enabled(struct mlx5_core_dev *dev) { struct devlink *devlink = priv_to_devlink(dev); union devlink_param_value val; + int err; - devlink_param_driverinit_value_get(devlink, - DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE, - &val); - return val.vbool; + err = devlink_param_driverinit_value_get(devlink, + DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE, + &val); + return err ? MLX5_CAP_GEN(dev, roce) : val.vbool; } #endif /* MLX5_DRIVER_H */ diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index b8bff5109656..c614ad1da44d 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1588,7 +1588,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 log_max_tis_per_sq[0x5]; u8 ext_stride_num_range[0x1]; - u8 reserved_at_3a1[0x2]; + u8 roce_rw_supported[0x1]; + u8 reserved_at_3a2[0x1]; u8 log_max_stride_sz_rq[0x5]; u8 reserved_at_3a8[0x3]; u8 log_min_stride_sz_rq[0x5]; From 88594d83314ad06314c9743b1ec49d0a95a5d4c7 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Thu, 30 Sep 2021 14:23:32 +0300 Subject: [PATCH 242/440] net/mlx5: CT: Fix missing cleanup of ct nat table on init failure If CT fails to initialize it's rhashtables, it doesn't destroy the ct nat global table. Destroy the ct nat global table on ct init failure. Fixes: d7cade513752 ("net/mlx5e: check return value of rhashtable_init") Signed-off-by: Paul Blakey Reviewed-by: Oz Shlomo Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index 225748a9e52a..740cd6f088b8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -2141,6 +2141,7 @@ err_ct_tuples_nat_ht: err_ct_tuples_ht: rhashtable_destroy(&ct_priv->zone_ht); err_ct_zone_ht: + mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat); err_ct_nat_tbl: mlx5_chains_destroy_global_table(chains, ct_priv->ct); err_ct_tbl: From 0885ae1a9d34d946e12c1cf9834463ee3541a63a Mon Sep 17 00:00:00 2001 From: Abhiram R N Date: Wed, 22 Sep 2021 12:00:07 +0530 Subject: [PATCH 243/440] net/mlx5e: Add extack msgs related to TC for better debug As multiple places EOPNOTSUPP and EINVAL is returned from driver it becomes difficult to understand the reason only with error code. With the netlink extack message exact reason will be known and will aid in debugging. Signed-off-by: Abhiram R N Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en_tc.c | 106 +++++++++++++----- 1 file changed, 76 insertions(+), 30 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index d92ee2f37c22..420b3ec0eb04 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1891,8 +1891,10 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv, bool needs_mapping, sets_mapping; int err; - if (!mlx5e_is_eswitch_flow(flow)) + if (!mlx5e_is_eswitch_flow(flow)) { + NL_SET_ERR_MSG_MOD(extack, "Match on tunnel is not supported"); return -EOPNOTSUPP; + } needs_mapping = !!flow->attr->chain; sets_mapping = flow_requires_tunnel_mapping(flow->attr->chain, f); @@ -2264,8 +2266,10 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, addr_type = match.key->addr_type; /* the HW doesn't support frag first/later */ - if (match.mask->flags & FLOW_DIS_FIRST_FRAG) + if (match.mask->flags & FLOW_DIS_FIRST_FRAG) { + NL_SET_ERR_MSG_MOD(extack, "Match on frag first/later is not supported"); return -EOPNOTSUPP; + } if (match.mask->flags & FLOW_DIS_IS_FRAGMENT) { MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1); @@ -2432,8 +2436,11 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, switch (ip_proto) { case IPPROTO_ICMP: if (!(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) & - MLX5_FLEX_PROTO_ICMP)) + MLX5_FLEX_PROTO_ICMP)) { + NL_SET_ERR_MSG_MOD(extack, + "Match on Flex protocols for ICMP is not supported"); return -EOPNOTSUPP; + } MLX5_SET(fte_match_set_misc3, misc_c_3, icmp_type, match.mask->type); MLX5_SET(fte_match_set_misc3, misc_v_3, icmp_type, @@ -2445,8 +2452,11 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, break; case IPPROTO_ICMPV6: if (!(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) & - MLX5_FLEX_PROTO_ICMPV6)) + MLX5_FLEX_PROTO_ICMPV6)) { + NL_SET_ERR_MSG_MOD(extack, + "Match on Flex protocols for ICMPV6 is not supported"); return -EOPNOTSUPP; + } MLX5_SET(fte_match_set_misc3, misc_c_3, icmpv6_type, match.mask->type); MLX5_SET(fte_match_set_misc3, misc_v_3, icmpv6_type, @@ -2552,15 +2562,19 @@ static int pedit_header_offsets[] = { #define pedit_header(_ph, _htype) ((void *)(_ph) + pedit_header_offsets[_htype]) static int set_pedit_val(u8 hdr_type, u32 mask, u32 val, u32 offset, - struct pedit_headers_action *hdrs) + struct pedit_headers_action *hdrs, + struct netlink_ext_ack *extack) { u32 *curr_pmask, *curr_pval; curr_pmask = (u32 *)(pedit_header(&hdrs->masks, hdr_type) + offset); curr_pval = (u32 *)(pedit_header(&hdrs->vals, hdr_type) + offset); - if (*curr_pmask & mask) /* disallow acting twice on the same location */ + if (*curr_pmask & mask) { /* disallow acting twice on the same location */ + NL_SET_ERR_MSG_MOD(extack, + "curr_pmask and new mask same. Acting twice on same location"); goto out_err; + } *curr_pmask |= mask; *curr_pval |= (val & mask); @@ -2893,7 +2907,7 @@ parse_pedit_to_modify_hdr(struct mlx5e_priv *priv, val = act->mangle.val; offset = act->mangle.offset; - err = set_pedit_val(htype, ~mask, val, offset, &hdrs[cmd]); + err = set_pedit_val(htype, ~mask, val, offset, &hdrs[cmd], extack); if (err) goto out_err; @@ -2912,8 +2926,10 @@ parse_pedit_to_reformat(const struct flow_action_entry *act, u32 mask, val, offset; u32 *p; - if (act->id != FLOW_ACTION_MANGLE) + if (act->id != FLOW_ACTION_MANGLE) { + NL_SET_ERR_MSG_MOD(extack, "Unsupported action id"); return -EOPNOTSUPP; + } if (act->mangle.htype != FLOW_ACT_MANGLE_HDR_TYPE_ETH) { NL_SET_ERR_MSG_MOD(extack, "Only Ethernet modification is supported"); @@ -3429,12 +3445,16 @@ parse_tc_nic_actions(struct mlx5e_priv *priv, u32 action = 0; int err, i; - if (!flow_action_has_entries(flow_action)) + if (!flow_action_has_entries(flow_action)) { + NL_SET_ERR_MSG_MOD(extack, "Flow action doesn't have any entries"); return -EINVAL; + } if (!flow_action_hw_stats_check(flow_action, extack, - FLOW_ACTION_HW_STATS_DELAYED_BIT)) + FLOW_ACTION_HW_STATS_DELAYED_BIT)) { + NL_SET_ERR_MSG_MOD(extack, "Flow action HW stats type is not supported"); return -EOPNOTSUPP; + } nic_attr = attr->nic_attr; nic_attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG; @@ -3524,7 +3544,8 @@ parse_tc_nic_actions(struct mlx5e_priv *priv, flow_flag_set(flow, CT); break; default: - NL_SET_ERR_MSG_MOD(extack, "The offload action is not supported"); + NL_SET_ERR_MSG_MOD(extack, + "The offload action is not supported in NIC action"); return -EOPNOTSUPP; } } @@ -3562,19 +3583,25 @@ static bool is_merged_eswitch_vfs(struct mlx5e_priv *priv, static int parse_tc_vlan_action(struct mlx5e_priv *priv, const struct flow_action_entry *act, struct mlx5_esw_flow_attr *attr, - u32 *action) + u32 *action, + struct netlink_ext_ack *extack) { u8 vlan_idx = attr->total_vlan; - if (vlan_idx >= MLX5_FS_VLAN_DEPTH) + if (vlan_idx >= MLX5_FS_VLAN_DEPTH) { + NL_SET_ERR_MSG_MOD(extack, "Total vlans used is greater than supported"); return -EOPNOTSUPP; + } switch (act->id) { case FLOW_ACTION_VLAN_POP: if (vlan_idx) { if (!mlx5_eswitch_vlan_actions_supported(priv->mdev, - MLX5_FS_VLAN_DEPTH)) + MLX5_FS_VLAN_DEPTH)) { + NL_SET_ERR_MSG_MOD(extack, + "vlan pop action is not supported"); return -EOPNOTSUPP; + } *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP_2; } else { @@ -3590,20 +3617,27 @@ static int parse_tc_vlan_action(struct mlx5e_priv *priv, if (vlan_idx) { if (!mlx5_eswitch_vlan_actions_supported(priv->mdev, - MLX5_FS_VLAN_DEPTH)) + MLX5_FS_VLAN_DEPTH)) { + NL_SET_ERR_MSG_MOD(extack, + "vlan push action is not supported for vlan depth > 1"); return -EOPNOTSUPP; + } *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2; } else { if (!mlx5_eswitch_vlan_actions_supported(priv->mdev, 1) && (act->vlan.proto != htons(ETH_P_8021Q) || - act->vlan.prio)) + act->vlan.prio)) { + NL_SET_ERR_MSG_MOD(extack, + "vlan push action is not supported"); return -EOPNOTSUPP; + } *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH; } break; default: + NL_SET_ERR_MSG_MOD(extack, "Unexpected action id for VLAN"); return -EINVAL; } @@ -3637,7 +3671,8 @@ static struct net_device *get_fdb_out_dev(struct net_device *uplink_dev, static int add_vlan_push_action(struct mlx5e_priv *priv, struct mlx5_flow_attr *attr, struct net_device **out_dev, - u32 *action) + u32 *action, + struct netlink_ext_ack *extack) { struct net_device *vlan_dev = *out_dev; struct flow_action_entry vlan_act = { @@ -3648,7 +3683,7 @@ static int add_vlan_push_action(struct mlx5e_priv *priv, }; int err; - err = parse_tc_vlan_action(priv, &vlan_act, attr->esw_attr, action); + err = parse_tc_vlan_action(priv, &vlan_act, attr->esw_attr, action, extack); if (err) return err; @@ -3659,14 +3694,15 @@ static int add_vlan_push_action(struct mlx5e_priv *priv, return -ENODEV; if (is_vlan_dev(*out_dev)) - err = add_vlan_push_action(priv, attr, out_dev, action); + err = add_vlan_push_action(priv, attr, out_dev, action, extack); return err; } static int add_vlan_pop_action(struct mlx5e_priv *priv, struct mlx5_flow_attr *attr, - u32 *action) + u32 *action, + struct netlink_ext_ack *extack) { struct flow_action_entry vlan_act = { .id = FLOW_ACTION_VLAN_POP, @@ -3676,7 +3712,7 @@ static int add_vlan_pop_action(struct mlx5e_priv *priv, nest_level = attr->parse_attr->filter_dev->lower_level - priv->netdev->lower_level; while (nest_level--) { - err = parse_tc_vlan_action(priv, &vlan_act, attr->esw_attr, action); + err = parse_tc_vlan_action(priv, &vlan_act, attr->esw_attr, action, extack); if (err) return err; } @@ -3798,12 +3834,16 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, int err, i, if_count = 0; bool mpls_push = false; - if (!flow_action_has_entries(flow_action)) + if (!flow_action_has_entries(flow_action)) { + NL_SET_ERR_MSG_MOD(extack, "Flow action doesn't have any entries"); return -EINVAL; + } if (!flow_action_hw_stats_check(flow_action, extack, - FLOW_ACTION_HW_STATS_DELAYED_BIT)) + FLOW_ACTION_HW_STATS_DELAYED_BIT)) { + NL_SET_ERR_MSG_MOD(extack, "Flow action HW stats type is not supported"); return -EOPNOTSUPP; + } esw_attr = attr->esw_attr; parse_attr = attr->parse_attr; @@ -3952,14 +3992,14 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, if (is_vlan_dev(out_dev)) { err = add_vlan_push_action(priv, attr, &out_dev, - &action); + &action, extack); if (err) return err; } if (is_vlan_dev(parse_attr->filter_dev)) { err = add_vlan_pop_action(priv, attr, - &action); + &action, extack); if (err) return err; } @@ -4008,10 +4048,13 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, break; case FLOW_ACTION_TUNNEL_ENCAP: info = act->tunnel; - if (info) + if (info) { encap = true; - else + } else { + NL_SET_ERR_MSG_MOD(extack, + "Zero tunnel attributes is not supported"); return -EOPNOTSUPP; + } break; case FLOW_ACTION_VLAN_PUSH: @@ -4025,7 +4068,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, act, parse_attr, hdrs, &action, extack); } else { - err = parse_tc_vlan_action(priv, act, esw_attr, &action); + err = parse_tc_vlan_action(priv, act, esw_attr, &action, extack); } if (err) return err; @@ -4079,7 +4122,8 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, flow_flag_set(flow, SAMPLE); break; default: - NL_SET_ERR_MSG_MOD(extack, "The offload action is not supported"); + NL_SET_ERR_MSG_MOD(extack, + "The offload action is not supported in FDB action"); return -EOPNOTSUPP; } } @@ -4753,8 +4797,10 @@ static int scan_tc_matchall_fdb_actions(struct mlx5e_priv *priv, return -EOPNOTSUPP; } - if (!flow_action_basic_hw_stats_check(flow_action, extack)) + if (!flow_action_basic_hw_stats_check(flow_action, extack)) { + NL_SET_ERR_MSG_MOD(extack, "Flow action HW stats type is not supported"); return -EOPNOTSUPP; + } flow_action_for_each(i, act, flow_action) { switch (act->id) { From 0e6f3ef469bbf69ea6840aa4d15dcfc8ce978760 Mon Sep 17 00:00:00 2001 From: Len Baker Date: Sun, 5 Sep 2021 09:49:36 +0200 Subject: [PATCH 244/440] net/mlx5: DR, Prefer kcalloc over open coded arithmetic As noted in the "Deprecated Interfaces, Language Features, Attributes, and Conventions" documentation [1], size calculations (especially multiplication) should not be performed in memory allocator (or similar) function arguments due to the risk of them overflowing. This could lead to values wrapping around and a smaller allocation being made than the caller was expecting. Using those allocations could lead to linear overflows of heap memory and other misbehaviors. So, refactor the code a bit to use the purpose specific kcalloc() function instead of the argument size * count in the kzalloc() function. [1] https://www.kernel.org/doc/html/v5.14/process/deprecated.html#open-coded-arithmetic-in-allocator-arguments Signed-off-by: Len Baker Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/steering/dr_action.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c index 50630112c8ff..07936841ce99 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c @@ -854,6 +854,7 @@ mlx5dr_action_create_mult_dest_tbl(struct mlx5dr_domain *dmn, struct mlx5dr_action *action; bool reformat_req = false; u32 num_of_ref = 0; + u32 ref_act_cnt; int ret; int i; @@ -862,11 +863,14 @@ mlx5dr_action_create_mult_dest_tbl(struct mlx5dr_domain *dmn, return NULL; } - hw_dests = kzalloc(sizeof(*hw_dests) * num_of_dests, GFP_KERNEL); + hw_dests = kcalloc(num_of_dests, sizeof(*hw_dests), GFP_KERNEL); if (!hw_dests) return NULL; - ref_actions = kzalloc(sizeof(*ref_actions) * num_of_dests * 2, GFP_KERNEL); + if (unlikely(check_mul_overflow(num_of_dests, 2u, &ref_act_cnt))) + goto free_hw_dests; + + ref_actions = kcalloc(ref_act_cnt, sizeof(*ref_actions), GFP_KERNEL); if (!ref_actions) goto free_hw_dests; From 7b1b6d35f045d677f34f8085ac02827fe4080d7e Mon Sep 17 00:00:00 2001 From: Rongwei Liu Date: Fri, 8 Oct 2021 09:02:39 +0300 Subject: [PATCH 245/440] net/mlx5: Check return status first when querying system_image_guid When querying system_image_guid from firmware, we should check return value first. The buffer content is valid only if query succeed. Signed-off-by: Rongwei Liu Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/vport.c | 21 +++++++++++-------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index 4c1440a95ad7..8846d30a380a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -421,19 +421,21 @@ int mlx5_query_nic_vport_system_image_guid(struct mlx5_core_dev *mdev, { u32 *out; int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out); + int err; out = kvzalloc(outlen, GFP_KERNEL); if (!out) return -ENOMEM; - mlx5_query_nic_vport_context(mdev, 0, out); + err = mlx5_query_nic_vport_context(mdev, 0, out); + if (err) + goto out; *system_image_guid = MLX5_GET64(query_nic_vport_context_out, out, nic_vport_context.system_image_guid); - +out: kvfree(out); - - return 0; + return err; } EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_system_image_guid); @@ -1133,19 +1135,20 @@ EXPORT_SYMBOL_GPL(mlx5_nic_vport_unaffiliate_multiport); u64 mlx5_query_nic_system_image_guid(struct mlx5_core_dev *mdev) { int port_type_cap = MLX5_CAP_GEN(mdev, port_type); - u64 tmp = 0; + u64 tmp; + int err; if (mdev->sys_image_guid) return mdev->sys_image_guid; if (port_type_cap == MLX5_CAP_PORT_TYPE_ETH) - mlx5_query_nic_vport_system_image_guid(mdev, &tmp); + err = mlx5_query_nic_vport_system_image_guid(mdev, &tmp); else - mlx5_query_hca_vport_system_image_guid(mdev, &tmp); + err = mlx5_query_hca_vport_system_image_guid(mdev, &tmp); - mdev->sys_image_guid = tmp; + mdev->sys_image_guid = err ? 0 : tmp; - return tmp; + return mdev->sys_image_guid; } EXPORT_SYMBOL_GPL(mlx5_query_nic_system_image_guid); From 2ec16ddde1fa31a83aee04320b248e94348d9152 Mon Sep 17 00:00:00 2001 From: Rongwei Liu Date: Thu, 16 Sep 2021 10:46:17 +0300 Subject: [PATCH 246/440] net/mlx5: Introduce new device index wrapper Downstream patches. Signed-off-by: Rongwei Liu Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c | 4 ++-- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/lag.c | 4 ++-- drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c | 2 +- include/linux/mlx5/driver.h | 5 +++++ 6 files changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c index 86e079310ac3..ae52e7f38306 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c @@ -24,7 +24,7 @@ int mlx5e_devlink_port_register(struct mlx5e_priv *priv) if (mlx5_core_is_pf(priv->mdev)) { attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL; - attrs.phys.port_number = PCI_FUNC(priv->mdev->pdev->devfn); + attrs.phys.port_number = mlx5_get_dev_index(priv->mdev); if (MLX5_ESWITCH_MANAGER(priv->mdev)) { mlx5e_devlink_get_port_parent_id(priv->mdev, &ppid); memcpy(attrs.switch_id.id, ppid.id, ppid.id_len); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c index 20af557ae30c..7f9b96d9537e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c @@ -36,7 +36,7 @@ static struct devlink_port *mlx5_esw_dl_port_alloc(struct mlx5_eswitch *esw, u16 return NULL; mlx5_esw_get_port_parent_id(dev, &ppid); - pfnum = PCI_FUNC(dev->pdev->devfn); + pfnum = mlx5_get_dev_index(dev); external = mlx5_core_is_ecpf_esw_manager(dev); if (external) controller_num = dev->priv.eswitch->offloads.host_number + 1; @@ -149,7 +149,7 @@ int mlx5_esw_devlink_sf_port_register(struct mlx5_eswitch *esw, struct devlink_p if (IS_ERR(vport)) return PTR_ERR(vport); - pfnum = PCI_FUNC(dev->pdev->devfn); + pfnum = mlx5_get_dev_index(dev); mlx5_esw_get_port_parent_id(dev, &ppid); memcpy(dl_port->attrs.switch_id.id, &ppid.id[0], ppid.id_len); dl_port->attrs.switch_id.id_len = ppid.id_len; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index ca7e31a1a431..3e5a7d74020b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -2798,7 +2798,7 @@ u32 mlx5_esw_match_metadata_alloc(struct mlx5_eswitch *esw) int id; /* Only 4 bits of pf_num */ - pf_num = PCI_FUNC(esw->dev->pdev->devfn); + pf_num = mlx5_get_dev_index(esw->dev); if (pf_num > max_pf_num) return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c index ca5690b0a7ab..f35c8ba48aac 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c @@ -688,7 +688,7 @@ static void mlx5_ldev_add_netdev(struct mlx5_lag *ldev, struct mlx5_core_dev *dev, struct net_device *netdev) { - unsigned int fn = PCI_FUNC(dev->pdev->devfn); + unsigned int fn = mlx5_get_dev_index(dev); if (fn >= MLX5_MAX_PORTS) return; @@ -718,7 +718,7 @@ static void mlx5_ldev_remove_netdev(struct mlx5_lag *ldev, static void mlx5_ldev_add_mdev(struct mlx5_lag *ldev, struct mlx5_core_dev *dev) { - unsigned int fn = PCI_FUNC(dev->pdev->devfn); + unsigned int fn = mlx5_get_dev_index(dev); if (fn >= MLX5_MAX_PORTS) return; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c index 13891fdc607e..e1bb3acf45e6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c @@ -323,7 +323,7 @@ mlx5_sf_new_check_attr(struct mlx5_core_dev *dev, const struct devlink_port_new_ NL_SET_ERR_MSG_MOD(extack, "External controller is unsupported"); return -EOPNOTSUPP; } - if (new_attr->pfnum != PCI_FUNC(dev->pdev->devfn)) { + if (new_attr->pfnum != mlx5_get_dev_index(dev)) { NL_SET_ERR_MSG_MOD(extack, "Invalid pfnum supplied"); return -EOPNOTSUPP; } diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 7c8b5f06c2cd..aecc38b90de5 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1243,6 +1243,11 @@ static inline int mlx5_core_native_port_num(struct mlx5_core_dev *dev) return MLX5_CAP_GEN(dev, native_port_num); } +static inline int mlx5_get_dev_index(struct mlx5_core_dev *dev) +{ + return PCI_FUNC(dev->pdev->devfn); +} + enum { MLX5_TRIGGERED_CMD_COMP = (u64)1 << 32, }; From 1021d0645d593ea86193c5fc371e33e5b208e14d Mon Sep 17 00:00:00 2001 From: Rongwei Liu Date: Tue, 12 Oct 2021 10:40:52 +0300 Subject: [PATCH 247/440] net/mlx5: Use native_port_num as 1st option of device index Using "native_port_num" can support more NICs. Fallback to PCIe IDs if "native_port_num" query fails. Signed-off-by: Rongwei Liu Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index aecc38b90de5..cf508685abca 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1245,7 +1245,12 @@ static inline int mlx5_core_native_port_num(struct mlx5_core_dev *dev) static inline int mlx5_get_dev_index(struct mlx5_core_dev *dev) { - return PCI_FUNC(dev->pdev->devfn); + int idx = MLX5_CAP_GEN(dev, native_port_num); + + if (idx >= 1 && idx <= MLX5_MAX_PORTS) + return idx - 1; + else + return PCI_FUNC(dev->pdev->devfn); } enum { From 8a543184d79c83d0887c25cf202a43559ba39583 Mon Sep 17 00:00:00 2001 From: Rongwei Liu Date: Tue, 12 Oct 2021 10:53:00 +0300 Subject: [PATCH 248/440] net/mlx5: Use system_image_guid to determine bonding With specific NICs, the PFs may have different PCIe ids like 0001:01:00.0/1 and 0002:02:00:00/1. For PFs with the same system_image_guid, driver should consider them under the same physical NIC and they are legal to bond together. If firmware doesn't support system_image_guid, set it to zero and fallback to use PCIe ids. Signed-off-by: Rongwei Liu Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/dev.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c index e8093c4e09d4..a8b84d53dfb0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@ -33,6 +33,7 @@ #include #include #include +#include #include "mlx5_core.h" /* intf dev list mutex */ @@ -537,6 +538,16 @@ int mlx5_rescan_drivers_locked(struct mlx5_core_dev *dev) return add_drivers(dev); } +static bool mlx5_same_hw_devs(struct mlx5_core_dev *dev, struct mlx5_core_dev *peer_dev) +{ + u64 fsystem_guid, psystem_guid; + + fsystem_guid = mlx5_query_nic_system_image_guid(dev); + psystem_guid = mlx5_query_nic_system_image_guid(peer_dev); + + return (fsystem_guid && psystem_guid && fsystem_guid == psystem_guid); +} + static u32 mlx5_gen_pci_id(const struct mlx5_core_dev *dev) { return (u32)((pci_domain_nr(dev->pdev->bus) << 16) | @@ -556,7 +567,8 @@ static int next_phys_dev(struct device *dev, const void *data) if (mdev == curr) return 0; - if (mlx5_gen_pci_id(mdev) != mlx5_gen_pci_id(curr)) + if (!mlx5_same_hw_devs(mdev, (struct mlx5_core_dev *)curr) && + mlx5_gen_pci_id(mdev) != mlx5_gen_pci_id(curr)) return 0; return 1; From 29211e7db28ab12a4a5aaab4bcc080a3ac67ec78 Mon Sep 17 00:00:00 2001 From: Tim Gardner Date: Fri, 15 Oct 2021 16:05:50 -0700 Subject: [PATCH 249/440] mptcp: Avoid NULL dereference in mptcp_getsockopt_subflow_addrs() Coverity complains of a possible NULL dereference in mptcp_getsockopt_subflow_addrs(): 861 } else if (sk->sk_family == AF_INET6) { 3. returned_null: inet6_sk returns NULL. [show details] 4. var_assigned: Assigning: np = NULL return value from inet6_sk. 862 const struct ipv6_pinfo *np = inet6_sk(sk); Fix this by checking for NULL. Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/231 Fixes: c11c5906bc0a ("mptcp: add MPTCP_SUBFLOW_ADDRS getsockopt support") Cc: Florian Westphal Signed-off-by: Tim Gardner [mjm: Added WARN_ON_ONCE() to the unexpected case] Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/sockopt.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index 8137cc3a4296..0f1e661c2032 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -861,6 +861,9 @@ static void mptcp_get_sub_addrs(const struct sock *sk, struct mptcp_subflow_addr } else if (sk->sk_family == AF_INET6) { const struct ipv6_pinfo *np = inet6_sk(sk); + if (WARN_ON_ONCE(!np)) + return; + a->sin6_local.sin6_family = AF_INET6; a->sin6_local.sin6_port = inet->inet_sport; From 72bcbc46a5c385961b9c167db79ad1f38f04e6a7 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 15 Oct 2021 16:05:51 -0700 Subject: [PATCH 250/440] mptcp: increase default max additional subflows to 2 The current default does not allowing additional subflows, mostly as a safety restriction to avoid uncontrolled resource consumption on busy servers. Still the system admin and/or the application have to opt-in to MPTCP explicitly. After that, they need to change (increase) the default maximum number of additional subflows. Let set that to reasonable default, and make end-users life easier. Additionally we need to update some self-tests accordingly. Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/pm_netlink.c | 3 +++ tools/testing/selftests/net/mptcp/mptcp_join.sh | 5 ++++- tools/testing/selftests/net/mptcp/pm_netlink.sh | 6 +++--- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 050eea231528..f7d33a9abd57 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -2052,6 +2052,9 @@ static int __net_init pm_nl_init_net(struct net *net) struct pm_nl_pernet *pernet = net_generic(net, pm_nl_pernet_id); INIT_LIST_HEAD_RCU(&pernet->local_addr_list); + + /* Cit. 2 subflows ought to be enough for anybody. */ + pernet->subflows_max = 2; pernet->next_id = 1; pernet->stale_loss_cnt = 4; spin_lock_init(&pernet->lock); diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 255793c5ac4f..293d349e21fe 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -945,12 +945,15 @@ subflows_tests() # subflow limited by client reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 0 + ip netns exec $ns2 ./pm_nl_ctl limits 0 0 ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 chk_join_nr "single subflow, limited by client" 0 0 0 # subflow limited by server reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 0 ip netns exec $ns2 ./pm_nl_ctl limits 0 1 ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 @@ -973,7 +976,7 @@ subflows_tests() run_tests $ns1 $ns2 10.0.1.1 chk_join_nr "multiple subflows" 2 2 2 - # multiple subflows limited by serverf + # multiple subflows limited by server reset ip netns exec $ns1 ./pm_nl_ctl limits 0 1 ip netns exec $ns2 ./pm_nl_ctl limits 0 2 diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh index 3c741abe034e..cbacf9f6538b 100755 --- a/tools/testing/selftests/net/mptcp/pm_netlink.sh +++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh @@ -70,7 +70,7 @@ check() check "ip netns exec $ns1 ./pm_nl_ctl dump" "" "defaults addr list" check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 0 -subflows 0" "defaults limits" +subflows 2" "defaults limits" ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.1 ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.2 flags subflow dev lo @@ -118,11 +118,11 @@ check "ip netns exec $ns1 ./pm_nl_ctl dump" "" "flush addrs" ip netns exec $ns1 ./pm_nl_ctl limits 9 1 check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 0 -subflows 0" "rcv addrs above hard limit" +subflows 2" "rcv addrs above hard limit" ip netns exec $ns1 ./pm_nl_ctl limits 1 9 check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 0 -subflows 0" "subflows above hard limit" +subflows 2" "subflows above hard limit" ip netns exec $ns1 ./pm_nl_ctl limits 8 8 check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 8 From 3828c514726fce7d97063155c4749eafefd9fbd2 Mon Sep 17 00:00:00 2001 From: Mat Martineau Date: Fri, 15 Oct 2021 16:05:52 -0700 Subject: [PATCH 251/440] mptcp: Make mptcp_pm_nl_mp_prio_send_ack() static This function is only used within pm_netlink.c now. Fixes: 067065422fcd ("mptcp: add the outgoing MP_PRIO support") Reviewed-by: Matthieu Baerts Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/pm_netlink.c | 6 +++--- net/mptcp/protocol.h | 3 --- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index f7d33a9abd57..7b96be1e9f14 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -654,9 +654,9 @@ void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk) } } -int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk, - struct mptcp_addr_info *addr, - u8 bkup) +static int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk, + struct mptcp_addr_info *addr, + u8 bkup) { struct mptcp_subflow_context *subflow; diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 7379ab580a7e..284fdcec067e 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -736,9 +736,6 @@ void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk); void mptcp_pm_rm_addr_received(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list); void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup); -int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk, - struct mptcp_addr_info *addr, - u8 bkup); void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq); void mptcp_pm_free_anno_list(struct mptcp_sock *msk); bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk); From 1c5b5b3f0eab5eb8d954816dbb3124d2411fd800 Mon Sep 17 00:00:00 2001 From: Jean Sacren Date: Fri, 15 Oct 2021 23:41:34 -0600 Subject: [PATCH 252/440] net: macvtap: fix template string argument of device_create() call The last argument of device_create() call should be a template string. The tap_name variable should be the argument to the string, but not the argument of the call itself. We should add the template string and turn tap_name into its argument. Signed-off-by: Jean Sacren Signed-off-by: David S. Miller --- drivers/net/macvtap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 694e2f5dbbe5..6b12902a803f 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -169,7 +169,7 @@ static int macvtap_device_event(struct notifier_block *unused, devt = MKDEV(MAJOR(macvtap_major), vlantap->tap.minor); classdev = device_create(&macvtap_class, &dev->dev, devt, - dev, tap_name); + dev, "%s", tap_name); if (IS_ERR(classdev)) { tap_free_minor(macvtap_major, &vlantap->tap); return notifier_from_errno(PTR_ERR(classdev)); From a07a296bba9ded457b853a3f11f3a23b1e5978d1 Mon Sep 17 00:00:00 2001 From: Jean Sacren Date: Fri, 15 Oct 2021 23:41:35 -0600 Subject: [PATCH 253/440] net: ipvtap: fix template string argument of device_create() call The last argument of device_create() call should be a template string. The tap_name variable should be the argument to the string, but not the argument of the call itself. We should add the template string and turn tap_name into its argument. Signed-off-by: Jean Sacren Signed-off-by: David S. Miller --- drivers/net/ipvlan/ipvtap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ipvlan/ipvtap.c b/drivers/net/ipvlan/ipvtap.c index 1cedb634f4f7..ef02f2cf5ce1 100644 --- a/drivers/net/ipvlan/ipvtap.c +++ b/drivers/net/ipvlan/ipvtap.c @@ -162,7 +162,7 @@ static int ipvtap_device_event(struct notifier_block *unused, devt = MKDEV(MAJOR(ipvtap_major), vlantap->tap.minor); classdev = device_create(&ipvtap_class, &dev->dev, devt, - dev, tap_name); + dev, "%s", tap_name); if (IS_ERR(classdev)) { tap_free_minor(ipvtap_major, &vlantap->tap); return notifier_from_errno(PTR_ERR(classdev)); From 8ec53ed9af1f7baa8f359bfd0286c59dbc7bdb41 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 15 Oct 2021 15:16:41 -0700 Subject: [PATCH 254/440] ethernet: adaptec: use eth_hw_addr_set() Commit 406f42fa0d3c ("net-next: When a bond have a massive amount of VLANs...") introduced a rbtree for faster Ethernet address look up. To maintain netdev->dev_addr in this tree we need to make all the writes to it got through appropriate helpers. Read the address into an array on the stack, then call eth_hw_addr_set(). Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/adaptec/starfire.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/adaptec/starfire.c b/drivers/net/ethernet/adaptec/starfire.c index 16b6b83f670b..c6982f7caf9b 100644 --- a/drivers/net/ethernet/adaptec/starfire.c +++ b/drivers/net/ethernet/adaptec/starfire.c @@ -641,6 +641,7 @@ static int starfire_init_one(struct pci_dev *pdev, struct netdev_private *np; int i, irq, chip_idx = ent->driver_data; struct net_device *dev; + u8 addr[ETH_ALEN]; long ioaddr; void __iomem *base; int drv_flags, io_size; @@ -696,7 +697,8 @@ static int starfire_init_one(struct pci_dev *pdev, /* Serial EEPROM reads are hidden by the hardware. */ for (i = 0; i < 6; i++) - dev->dev_addr[i] = readb(base + EEPROMCtrl + 20 - i); + addr[i] = readb(base + EEPROMCtrl + 20 - i); + eth_hw_addr_set(dev, addr); #if ! defined(final_version) /* Dump the EEPROM contents during development. */ if (debug > 4) From 0d4c7517159fad5f507df7559dee1e4738c67d58 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 15 Oct 2021 15:16:42 -0700 Subject: [PATCH 255/440] ethernet: aeroflex: use eth_hw_addr_set() Commit 406f42fa0d3c ("net-next: When a bond have a massive amount of VLANs...") introduced a rbtree for faster Ethernet address look up. To maintain netdev->dev_addr in this tree we need to make all the writes to it got through appropriate helpers. macaddr[] is a module param, and int, so copy the address into an array of u8 on the stack, then call eth_hw_addr_set(). Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/aeroflex/greth.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/aeroflex/greth.c b/drivers/net/ethernet/aeroflex/greth.c index cc34eaf0491f..447dc64a17e5 100644 --- a/drivers/net/ethernet/aeroflex/greth.c +++ b/drivers/net/ethernet/aeroflex/greth.c @@ -1346,6 +1346,7 @@ static int greth_of_probe(struct platform_device *ofdev) int i; int err; int tmp; + u8 addr[ETH_ALEN]; unsigned long timeout; dev = alloc_etherdev(sizeof(struct greth_private)); @@ -1449,8 +1450,6 @@ static int greth_of_probe(struct platform_device *ofdev) break; } if (i == 6) { - u8 addr[ETH_ALEN]; - err = of_get_mac_address(ofdev->dev.of_node, addr); if (!err) { for (i = 0; i < 6; i++) @@ -1464,7 +1463,8 @@ static int greth_of_probe(struct platform_device *ofdev) } for (i = 0; i < 6; i++) - dev->dev_addr[i] = macaddr[i]; + addr[i] = macaddr[i]; + eth_hw_addr_set(dev, addr); macaddr[5]++; From ffaeca68fb5fd5cbf935bf297f78a523506246ab Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 15 Oct 2021 15:16:43 -0700 Subject: [PATCH 256/440] ethernet: alteon: use eth_hw_addr_set() Commit 406f42fa0d3c ("net-next: When a bond have a massive amount of VLANs...") introduced a rbtree for faster Ethernet address look up. To maintain netdev->dev_addr in this tree we need to make all the writes to it got through appropriate helpers. Break the address apart into an array on the stack, then call eth_hw_addr_set(). Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/alteon/acenic.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/alteon/acenic.c b/drivers/net/ethernet/alteon/acenic.c index eeb86bd851f9..732da15a3827 100644 --- a/drivers/net/ethernet/alteon/acenic.c +++ b/drivers/net/ethernet/alteon/acenic.c @@ -869,6 +869,7 @@ static int ace_init(struct net_device *dev) int board_idx, ecode = 0; short i; unsigned char cache_size; + u8 addr[ETH_ALEN]; ap = netdev_priv(dev); regs = ap->regs; @@ -988,12 +989,13 @@ static int ace_init(struct net_device *dev) writel(mac1, ®s->MacAddrHi); writel(mac2, ®s->MacAddrLo); - dev->dev_addr[0] = (mac1 >> 8) & 0xff; - dev->dev_addr[1] = mac1 & 0xff; - dev->dev_addr[2] = (mac2 >> 24) & 0xff; - dev->dev_addr[3] = (mac2 >> 16) & 0xff; - dev->dev_addr[4] = (mac2 >> 8) & 0xff; - dev->dev_addr[5] = mac2 & 0xff; + addr[0] = (mac1 >> 8) & 0xff; + addr[1] = mac1 & 0xff; + addr[2] = (mac2 >> 24) & 0xff; + addr[3] = (mac2 >> 16) & 0xff; + addr[4] = (mac2 >> 8) & 0xff; + addr[5] = mac2 & 0xff; + eth_hw_addr_set(dev, addr); printk("MAC: %pM\n", dev->dev_addr); From f98c50509a20114d443551926357b1cb08424ec5 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 15 Oct 2021 15:16:44 -0700 Subject: [PATCH 257/440] ethernet: amd: use eth_hw_addr_set() Commit 406f42fa0d3c ("net-next: When a bond have a massive amount of VLANs...") introduced a rbtree for faster Ethernet address look up. To maintain netdev->dev_addr in this tree we need to make all the writes to it got through appropriate helpers. Read the address into an array on the stack, then call eth_hw_addr_set(). Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/amd/amd8111e.c | 4 +++- drivers/net/ethernet/amd/pcnet32.c | 13 +++++++++---- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/amd/amd8111e.c b/drivers/net/ethernet/amd/amd8111e.c index 0b493467b042..9421afb950f7 100644 --- a/drivers/net/ethernet/amd/amd8111e.c +++ b/drivers/net/ethernet/amd/amd8111e.c @@ -1743,6 +1743,7 @@ static int amd8111e_probe_one(struct pci_dev *pdev, unsigned long reg_addr, reg_len; struct amd8111e_priv *lp; struct net_device *dev; + u8 addr[ETH_ALEN]; err = pci_enable_device(pdev); if (err) { @@ -1809,7 +1810,8 @@ static int amd8111e_probe_one(struct pci_dev *pdev, /* Initializing MAC address */ for (i = 0; i < ETH_ALEN; i++) - dev->dev_addr[i] = readb(lp->mmio + PADR + i); + addr[i] = readb(lp->mmio + PADR + i); + eth_hw_addr_set(dev, addr); /* Setting user defined parametrs */ lp->ext_phy_option = speed_duplex[card_idx]; diff --git a/drivers/net/ethernet/amd/pcnet32.c b/drivers/net/ethernet/amd/pcnet32.c index 820baa2604ac..f5c50ff377ff 100644 --- a/drivers/net/ethernet/amd/pcnet32.c +++ b/drivers/net/ethernet/amd/pcnet32.c @@ -1595,6 +1595,7 @@ pcnet32_probe1(unsigned long ioaddr, int shared, struct pci_dev *pdev) struct net_device *dev; const struct pcnet32_access *a = NULL; u8 promaddr[ETH_ALEN]; + u8 addr[ETH_ALEN]; int ret = -ENODEV; /* reset the chip */ @@ -1760,9 +1761,10 @@ pcnet32_probe1(unsigned long ioaddr, int shared, struct pci_dev *pdev) unsigned int val; val = a->read_csr(ioaddr, i + 12) & 0x0ffff; /* There may be endianness issues here. */ - dev->dev_addr[2 * i] = val & 0x0ff; - dev->dev_addr[2 * i + 1] = (val >> 8) & 0x0ff; + addr[2 * i] = val & 0x0ff; + addr[2 * i + 1] = (val >> 8) & 0x0ff; } + eth_hw_addr_set(dev, addr); /* read PROM address and compare with CSR address */ for (i = 0; i < ETH_ALEN; i++) @@ -1780,8 +1782,11 @@ pcnet32_probe1(unsigned long ioaddr, int shared, struct pci_dev *pdev) } /* if the ethernet address is not valid, force to 00:00:00:00:00:00 */ - if (!is_valid_ether_addr(dev->dev_addr)) - eth_zero_addr(dev->dev_addr); + if (!is_valid_ether_addr(dev->dev_addr)) { + static const u8 zero_addr[ETH_ALEN] = {}; + + eth_hw_addr_set(dev, zero_addr); + } if (pcnet32_debug & NETIF_MSG_PROBE) { pr_cont(" %pM", dev->dev_addr); From 698c33d8b48999bdec6671d664dbd926bb27e474 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 15 Oct 2021 15:16:45 -0700 Subject: [PATCH 258/440] ethernet: aquantia: use eth_hw_addr_set() Commit 406f42fa0d3c ("net-next: When a bond have a massive amount of VLANs...") introduced a rbtree for faster Ethernet address look up. To maintain netdev->dev_addr in this tree we need to make all the writes to it got through appropriate helpers. Use an array on the stack, then call eth_hw_addr_set(). eth_hw_addr_set() is after error checking, this should be fine, error propagates all the way to failing probe. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_nic.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c index 694aa70bcafe..1acf544afeb4 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c @@ -300,6 +300,7 @@ static bool aq_nic_is_valid_ether_addr(const u8 *addr) int aq_nic_ndev_register(struct aq_nic_s *self) { + u8 addr[ETH_ALEN]; int err = 0; if (!self->ndev) { @@ -316,12 +317,13 @@ int aq_nic_ndev_register(struct aq_nic_s *self) #endif mutex_lock(&self->fwreq_mutex); - err = self->aq_fw_ops->get_mac_permanent(self->aq_hw, - self->ndev->dev_addr); + err = self->aq_fw_ops->get_mac_permanent(self->aq_hw, addr); mutex_unlock(&self->fwreq_mutex); if (err) goto err_exit; + eth_hw_addr_set(self->ndev, addr); + if (!is_valid_ether_addr(self->ndev->dev_addr) || !aq_nic_is_valid_ether_addr(self->ndev->dev_addr)) { netdev_warn(self->ndev, "MAC is invalid, will use random."); From a85c8f9ad2f650583a8b04f0d9c5dedf64e8d0f0 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 15 Oct 2021 15:16:46 -0700 Subject: [PATCH 259/440] ethernet: bnx2x: use eth_hw_addr_set() Commit 406f42fa0d3c ("net-next: When a bond have a massive amount of VLANs...") introduced a rbtree for faster Ethernet address look up. To maintain netdev->dev_addr in this tree we need to make all the writes to it got through appropriate helpers. Read the address into an array on the stack, then call eth_hw_addr_set(). Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 27e712178f95..aec666e97683 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -11823,9 +11823,10 @@ static void bnx2x_get_mac_hwinfo(struct bnx2x *bp) u32 val, val2; int func = BP_ABS_FUNC(bp); int port = BP_PORT(bp); + u8 addr[ETH_ALEN] = {}; /* Zero primary MAC configuration */ - eth_zero_addr(bp->dev->dev_addr); + eth_hw_addr_set(bp->dev, addr); if (BP_NOMCP(bp)) { BNX2X_ERROR("warning: random MAC workaround active\n"); @@ -11834,8 +11835,10 @@ static void bnx2x_get_mac_hwinfo(struct bnx2x *bp) val2 = MF_CFG_RD(bp, func_mf_config[func].mac_upper); val = MF_CFG_RD(bp, func_mf_config[func].mac_lower); if ((val2 != FUNC_MF_CFG_UPPERMAC_DEFAULT) && - (val != FUNC_MF_CFG_LOWERMAC_DEFAULT)) - bnx2x_set_mac_buf(bp->dev->dev_addr, val, val2); + (val != FUNC_MF_CFG_LOWERMAC_DEFAULT)) { + bnx2x_set_mac_buf(addr, val, val2); + eth_hw_addr_set(bp->dev, addr); + } if (CNIC_SUPPORT(bp)) bnx2x_get_cnic_mac_hwinfo(bp); @@ -11843,7 +11846,8 @@ static void bnx2x_get_mac_hwinfo(struct bnx2x *bp) /* in SF read MACs from port configuration */ val2 = SHMEM_RD(bp, dev_info.port_hw_config[port].mac_upper); val = SHMEM_RD(bp, dev_info.port_hw_config[port].mac_lower); - bnx2x_set_mac_buf(bp->dev->dev_addr, val, val2); + bnx2x_set_mac_buf(addr, val, val2); + eth_hw_addr_set(bp->dev, addr); if (CNIC_SUPPORT(bp)) bnx2x_get_cnic_mac_hwinfo(bp); @@ -12291,7 +12295,9 @@ static int bnx2x_init_bp(struct bnx2x *bp) if (rc) return rc; } else { - eth_zero_addr(bp->dev->dev_addr); + static const u8 zero_addr[ETH_ALEN] = {}; + + eth_hw_addr_set(bp->dev, zero_addr); } bnx2x_set_modes_bitmap(bp); From 0c9e0c7931c610b9c2edce0a7a60b8de97e31885 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 15 Oct 2021 15:16:47 -0700 Subject: [PATCH 260/440] ethernet: bcmgenet: use eth_hw_addr_set() Commit 406f42fa0d3c ("net-next: When a bond have a massive amount of VLANs...") introduced a rbtree for faster Ethernet address look up. To maintain netdev->dev_addr in this tree we need to make all the writes to it got through appropriate helpers. Read the address into an array on the stack, then call eth_hw_addr_set(). Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index ed53859b6f7d..5da9c00b43b1 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -4085,8 +4085,12 @@ static int bcmgenet_probe(struct platform_device *pdev) eth_hw_addr_set(dev, pd->mac_address); else if (device_get_ethdev_address(&pdev->dev, dev)) - if (has_acpi_companion(&pdev->dev)) - bcmgenet_get_hw_addr(priv, dev->dev_addr); + if (has_acpi_companion(&pdev->dev)) { + u8 addr[ETH_ALEN]; + + bcmgenet_get_hw_addr(priv, addr); + eth_hw_addr_set(dev, addr); + } if (!is_valid_ether_addr(dev->dev_addr)) { dev_warn(&pdev->dev, "using random Ethernet MAC\n"); From 41edfff572d9ed0a9b8f8d136f2cfa213244746e Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 15 Oct 2021 15:16:48 -0700 Subject: [PATCH 261/440] ethernet: enic: use eth_hw_addr_set() Commit 406f42fa0d3c ("net-next: When a bond have a massive amount of VLANs...") introduced a rbtree for faster Ethernet address look up. To maintain netdev->dev_addr in this tree we need to make all the writes to it got through appropriate helpers. Use a zero'ed array on the stack, then call eth_hw_addr_set(). Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/cisco/enic/enic_main.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index 66348cc3aaaf..aacf141986d5 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -1098,6 +1098,7 @@ static int enic_set_vf_mac(struct net_device *netdev, int vf, u8 *mac) static int enic_set_vf_port(struct net_device *netdev, int vf, struct nlattr *port[]) { + static const u8 zero_addr[ETH_ALEN] = {}; struct enic *enic = netdev_priv(netdev); struct enic_port_profile prev_pp; struct enic_port_profile *pp; @@ -1162,7 +1163,7 @@ static int enic_set_vf_port(struct net_device *netdev, int vf, } else { memset(pp, 0, sizeof(*pp)); if (vf == PORT_SELF_VF) - eth_zero_addr(netdev->dev_addr); + eth_hw_addr_set(netdev, zero_addr); } } else { /* Set flag to indicate that the port assoc/disassoc @@ -1174,7 +1175,7 @@ static int enic_set_vf_port(struct net_device *netdev, int vf, if (pp->request == PORT_REQUEST_DISASSOCIATE) { eth_zero_addr(pp->mac_addr); if (vf == PORT_SELF_VF) - eth_zero_addr(netdev->dev_addr); + eth_hw_addr_set(netdev, zero_addr); } } From 10e6ded812357ad5ae754bd994521beca091c0b4 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 15 Oct 2021 15:16:49 -0700 Subject: [PATCH 262/440] ethernet: ec_bhf: use eth_hw_addr_set() Commit 406f42fa0d3c ("net-next: When a bond have a massive amount of VLANs...") introduced a rbtree for faster Ethernet address look up. To maintain netdev->dev_addr in this tree we need to make all the writes to it got through appropriate helpers. Copy the address into an array on the stack, then call eth_hw_addr_set(). Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/ec_bhf.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ec_bhf.c b/drivers/net/ethernet/ec_bhf.c index b2d4fb3feb74..46e3a05e9582 100644 --- a/drivers/net/ethernet/ec_bhf.c +++ b/drivers/net/ethernet/ec_bhf.c @@ -479,6 +479,7 @@ static int ec_bhf_probe(struct pci_dev *dev, const struct pci_device_id *id) struct net_device *net_dev; struct ec_bhf_priv *priv; void __iomem *dma_io; + u8 addr[ETH_ALEN]; void __iomem *io; int err = 0; @@ -539,7 +540,8 @@ static int ec_bhf_probe(struct pci_dev *dev, const struct pci_device_id *id) if (err < 0) goto err_free_net_dev; - memcpy_fromio(net_dev->dev_addr, priv->mii_io + MII_MAC_ADDR, 6); + memcpy_fromio(addr, priv->mii_io + MII_MAC_ADDR, ETH_ALEN); + eth_hw_addr_set(net_dev, addr); err = register_netdev(net_dev); if (err < 0) From d9ca87233b688c2b6341fa5ee412711c2986d1da Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 15 Oct 2021 15:16:50 -0700 Subject: [PATCH 263/440] ethernet: enetc: use eth_hw_addr_set() Commit 406f42fa0d3c ("net-next: When a bond have a massive amount of VLANs...") introduced a rbtree for faster Ethernet address look up. To maintain netdev->dev_addr in this tree we need to make all the writes to it got through appropriate helpers. Pass a netdev into the helper instead of just the address, read the address into an array on the stack, then call eth_hw_addr_set(). Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/enetc/enetc_hw.h | 6 +++++- drivers/net/ethernet/freescale/enetc/enetc_pf.c | 2 +- drivers/net/ethernet/freescale/enetc/enetc_vf.c | 2 +- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc_hw.h b/drivers/net/ethernet/freescale/enetc/enetc_hw.h index 0f5f081a5baf..1514e6a4a3ff 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_hw.h +++ b/drivers/net/ethernet/freescale/enetc/enetc_hw.h @@ -635,10 +635,14 @@ struct enetc_cmd_rfse { #define ENETC_RFSE_EN BIT(15) #define ENETC_RFSE_MODE_BD 2 -static inline void enetc_get_primary_mac_addr(struct enetc_hw *hw, u8 *addr) +static inline void enetc_load_primary_mac_addr(struct enetc_hw *hw, + struct net_device *ndev) { + u8 addr[ETH_ALEN] __aligned(4); + *(u32 *)addr = __raw_readl(hw->reg + ENETC_SIPMAR0); *(u16 *)(addr + 4) = __raw_readw(hw->reg + ENETC_SIPMAR1); + eth_hw_addr_set(ndev, addr); } #define ENETC_SI_INT_IDX 0 diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c index 8281dd664f4e..ead2b93bf614 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c @@ -783,7 +783,7 @@ static void enetc_pf_netdev_setup(struct enetc_si *si, struct net_device *ndev, } /* pick up primary MAC address from SI */ - enetc_get_primary_mac_addr(&si->hw, ndev->dev_addr); + enetc_load_primary_mac_addr(&si->hw, ndev); } static int enetc_mdio_probe(struct enetc_pf *pf, struct device_node *np) diff --git a/drivers/net/ethernet/freescale/enetc/enetc_vf.c b/drivers/net/ethernet/freescale/enetc/enetc_vf.c index df312c9f8243..17924305afa2 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_vf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_vf.c @@ -135,7 +135,7 @@ static void enetc_vf_netdev_setup(struct enetc_si *si, struct net_device *ndev, ndev->hw_features |= NETIF_F_RXHASH; /* pick up primary MAC address from SI */ - enetc_get_primary_mac_addr(&si->hw, ndev->dev_addr); + enetc_load_primary_mac_addr(&si->hw, ndev); } static int enetc_vf_probe(struct pci_dev *pdev, From 5c8b348534acf7024587cfd55d747833d964b152 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 15 Oct 2021 15:16:51 -0700 Subject: [PATCH 264/440] ethernet: ibmveth: use ether_addr_to_u64() We'll want to make netdev->dev_addr const, remove the local helper which is missing a const qualifier on the argument and use ether_addr_to_u64(). Similar story to mlx4. Signed-off-by: Jakub Kicinski Reviewed-by: Tyrel Datwyler Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmveth.c | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c index 836617fb3f40..45ba40cf4d07 100644 --- a/drivers/net/ethernet/ibm/ibmveth.c +++ b/drivers/net/ethernet/ibm/ibmveth.c @@ -483,17 +483,6 @@ retry: return rc; } -static u64 ibmveth_encode_mac_addr(u8 *mac) -{ - int i; - u64 encoded = 0; - - for (i = 0; i < ETH_ALEN; i++) - encoded = (encoded << 8) | mac[i]; - - return encoded; -} - static int ibmveth_open(struct net_device *netdev) { struct ibmveth_adapter *adapter = netdev_priv(netdev); @@ -553,7 +542,7 @@ static int ibmveth_open(struct net_device *netdev) adapter->rx_queue.num_slots = rxq_entries; adapter->rx_queue.toggle = 1; - mac_address = ibmveth_encode_mac_addr(netdev->dev_addr); + mac_address = ether_addr_to_u64(netdev->dev_addr); rxq_desc.fields.flags_len = IBMVETH_BUF_VALID | adapter->rx_queue.queue_len; @@ -1476,7 +1465,7 @@ static void ibmveth_set_multicast_list(struct net_device *netdev) netdev_for_each_mc_addr(ha, netdev) { /* add the multicast address to the filter table */ u64 mcast_addr; - mcast_addr = ibmveth_encode_mac_addr(ha->addr); + mcast_addr = ether_addr_to_u64(ha->addr); lpar_rc = h_multicast_ctrl(adapter->vdev->unit_address, IbmVethMcastAddFilter, mcast_addr); @@ -1606,7 +1595,7 @@ static int ibmveth_set_mac_addr(struct net_device *dev, void *p) if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; - mac_address = ibmveth_encode_mac_addr(addr->sa_data); + mac_address = ether_addr_to_u64(addr->sa_data); rc = h_change_logical_lan_mac(adapter->vdev->unit_address, mac_address); if (rc) { netdev_err(adapter->netdev, "h_change_logical_lan_mac failed with rc=%d\n", rc); From ec356edef78c46cbe6467649f26ff75e2ace953f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 15 Oct 2021 15:16:52 -0700 Subject: [PATCH 265/440] ethernet: ixgb: use eth_hw_addr_set() Commit 406f42fa0d3c ("net-next: When a bond have a massive amount of VLANs...") introduced a rbtree for faster Ethernet address look up. To maintain netdev->dev_addr in this tree we need to make all the writes to it got through appropriate helpers. Read the address into an array on the stack, then call eth_hw_addr_set(). ixgb_get_ee_mac_addr() is used with a non-nevdev->dev_addr pointer so we can't deal with the problem inside it. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/ixgb/ixgb_main.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_main.c b/drivers/net/ethernet/intel/ixgb/ixgb_main.c index 5e1e2f0db82a..99d481904ce6 100644 --- a/drivers/net/ethernet/intel/ixgb/ixgb_main.c +++ b/drivers/net/ethernet/intel/ixgb/ixgb_main.c @@ -362,6 +362,7 @@ ixgb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) struct ixgb_adapter *adapter; static int cards_found = 0; int pci_using_dac; + u8 addr[ETH_ALEN]; int i; int err; @@ -461,7 +462,8 @@ ixgb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_eeprom; } - ixgb_get_ee_mac_addr(&adapter->hw, netdev->dev_addr); + ixgb_get_ee_mac_addr(&adapter->hw, addr); + eth_hw_addr_set(netdev, addr); if (!is_valid_ether_addr(netdev->dev_addr)) { netif_err(adapter, probe, adapter->netdev, "Invalid MAC Address\n"); @@ -2227,6 +2229,7 @@ static pci_ers_result_t ixgb_io_slot_reset(struct pci_dev *pdev) { struct net_device *netdev = pci_get_drvdata(pdev); struct ixgb_adapter *adapter = netdev_priv(netdev); + u8 addr[ETH_ALEN]; if (pci_enable_device(pdev)) { netif_err(adapter, probe, adapter->netdev, @@ -2250,7 +2253,8 @@ static pci_ers_result_t ixgb_io_slot_reset(struct pci_dev *pdev) "After reset, the EEPROM checksum is not valid\n"); return PCI_ERS_RESULT_DISCONNECT; } - ixgb_get_ee_mac_addr(&adapter->hw, netdev->dev_addr); + ixgb_get_ee_mac_addr(&adapter->hw, addr); + eth_hw_addr_set(netdev, addr); memcpy(netdev->perm_addr, netdev->dev_addr, netdev->addr_len); if (!is_valid_ether_addr(netdev->perm_addr)) { From 24bcbe1cc69fa52dc4f7b5b2456678ed464724d8 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 15 Oct 2021 06:37:39 -0700 Subject: [PATCH 266/440] net: stream: don't purge sk_error_queue in sk_stream_kill_queues() sk_stream_kill_queues() can be called on close when there are still outstanding skbs to transmit. Those skbs may try to queue notifications to the error queue (e.g. timestamps). If sk_stream_kill_queues() purges the queue without taking its lock the queue may get corrupted, and skbs leaked. This shows up as a warning about an rmem leak: WARNING: CPU: 24 PID: 0 at net/ipv4/af_inet.c:154 inet_sock_destruct+0x... The leak is always a multiple of 0x300 bytes (the value is in %rax on my builds, so RAX: 0000000000000300). 0x300 is truesize of an empty sk_buff. Indeed if we dump the socket state at the time of the warning the sk_error_queue is often (but not always) corrupted. The ->next pointer points back at the list head, but not the ->prev pointer. Indeed we can find the leaked skb by scanning the kernel memory for something that looks like an skb with ->sk = socket in question, and ->truesize = 0x300. The contents of ->cb[] of the skb confirms the suspicion that it is indeed a timestamp notification (as generated in __skb_complete_tx_timestamp()). Removing purging of sk_error_queue should be okay, since inet_sock_destruct() does it again once all socket refs are gone. Eric suggests this may cause sockets that go thru disconnect() to maintain notifications from the previous incarnations of the socket, but that should be okay since the race was there anyway, and disconnect() is not exactly dependable. Thanks to Jonathan Lemon and Omar Sandoval for help at various stages of tracing the issue. Fixes: cb9eff097831 ("net: new user space API for time stamping of incoming and outgoing packets") Signed-off-by: Jakub Kicinski Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/stream.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/core/stream.c b/net/core/stream.c index e09ffd410685..06b36c730ce8 100644 --- a/net/core/stream.c +++ b/net/core/stream.c @@ -195,9 +195,6 @@ void sk_stream_kill_queues(struct sock *sk) /* First the read buffer. */ __skb_queue_purge(&sk->sk_receive_queue); - /* Next, the error queue. */ - __skb_queue_purge(&sk->sk_error_queue); - /* Next, the write queue. */ WARN_ON(!skb_queue_empty(&sk->sk_write_queue)); From ed990df29f5b4ae720a9044958aaf589073eb26d Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Sat, 16 Oct 2021 11:37:43 +0200 Subject: [PATCH 267/440] net/smc: save stack space and allocate smc_init_info The struct smc_init_info grew over time, its time to save space on stack and allocate this struct dynamically. Signed-off-by: Karsten Graul Signed-off-by: David S. Miller --- net/smc/smc_llc.c | 83 ++++++++++++++++++++++++++++++----------------- 1 file changed, 53 insertions(+), 30 deletions(-) diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index 72f4b72eb175..760f5ed7c8f0 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -870,31 +870,37 @@ int smc_llc_cli_add_link(struct smc_link *link, struct smc_llc_qentry *qentry) struct smc_llc_msg_add_link *llc = &qentry->msg.add_link; enum smc_lgr_type lgr_new_t = SMC_LGR_SYMMETRIC; struct smc_link_group *lgr = smc_get_lgr(link); + struct smc_init_info *ini = NULL; struct smc_link *lnk_new = NULL; - struct smc_init_info ini; int lnk_idx, rc = 0; if (!llc->qp_mtu) goto out_reject; - ini.vlan_id = lgr->vlan_id; - smc_pnet_find_alt_roce(lgr, &ini, link->smcibdev); + ini = kzalloc(sizeof(*ini), GFP_KERNEL); + if (!ini) { + rc = -ENOMEM; + goto out_reject; + } + + ini->vlan_id = lgr->vlan_id; + smc_pnet_find_alt_roce(lgr, ini, link->smcibdev); if (!memcmp(llc->sender_gid, link->peer_gid, SMC_GID_SIZE) && !memcmp(llc->sender_mac, link->peer_mac, ETH_ALEN)) { - if (!ini.ib_dev) + if (!ini->ib_dev) goto out_reject; lgr_new_t = SMC_LGR_ASYMMETRIC_PEER; } - if (!ini.ib_dev) { + if (!ini->ib_dev) { lgr_new_t = SMC_LGR_ASYMMETRIC_LOCAL; - ini.ib_dev = link->smcibdev; - ini.ib_port = link->ibport; + ini->ib_dev = link->smcibdev; + ini->ib_port = link->ibport; } lnk_idx = smc_llc_alloc_alt_link(lgr, lgr_new_t); if (lnk_idx < 0) goto out_reject; lnk_new = &lgr->lnk[lnk_idx]; - rc = smcr_link_init(lgr, lnk_new, lnk_idx, &ini); + rc = smcr_link_init(lgr, lnk_new, lnk_idx, ini); if (rc) goto out_reject; smc_llc_save_add_link_info(lnk_new, llc); @@ -910,7 +916,7 @@ int smc_llc_cli_add_link(struct smc_link *link, struct smc_llc_qentry *qentry) goto out_clear_lnk; rc = smc_llc_send_add_link(link, - lnk_new->smcibdev->mac[ini.ib_port - 1], + lnk_new->smcibdev->mac[ini->ib_port - 1], lnk_new->gid, lnk_new, SMC_LLC_RESP); if (rc) goto out_clear_lnk; @@ -919,7 +925,7 @@ int smc_llc_cli_add_link(struct smc_link *link, struct smc_llc_qentry *qentry) rc = 0; goto out_clear_lnk; } - rc = smc_llc_cli_conf_link(link, &ini, lnk_new, lgr_new_t); + rc = smc_llc_cli_conf_link(link, ini, lnk_new, lgr_new_t); if (!rc) goto out; out_clear_lnk: @@ -928,6 +934,7 @@ out_clear_lnk: out_reject: smc_llc_cli_add_link_reject(qentry); out: + kfree(ini); kfree(qentry); return rc; } @@ -937,20 +944,25 @@ static void smc_llc_cli_add_link_invite(struct smc_link *link, struct smc_llc_qentry *qentry) { struct smc_link_group *lgr = smc_get_lgr(link); - struct smc_init_info ini; + struct smc_init_info *ini = NULL; if (lgr->type == SMC_LGR_SYMMETRIC || lgr->type == SMC_LGR_ASYMMETRIC_PEER) goto out; - ini.vlan_id = lgr->vlan_id; - smc_pnet_find_alt_roce(lgr, &ini, link->smcibdev); - if (!ini.ib_dev) + ini = kzalloc(sizeof(*ini), GFP_KERNEL); + if (!ini) goto out; - smc_llc_send_add_link(link, ini.ib_dev->mac[ini.ib_port - 1], - ini.ib_gid, NULL, SMC_LLC_REQ); + ini->vlan_id = lgr->vlan_id; + smc_pnet_find_alt_roce(lgr, ini, link->smcibdev); + if (!ini->ib_dev) + goto out; + + smc_llc_send_add_link(link, ini->ib_dev->mac[ini->ib_port - 1], + ini->ib_gid, NULL, SMC_LLC_REQ); out: + kfree(ini); kfree(qentry); } @@ -1158,28 +1170,34 @@ int smc_llc_srv_add_link(struct smc_link *link) struct smc_link_group *lgr = link->lgr; struct smc_llc_msg_add_link *add_llc; struct smc_llc_qentry *qentry = NULL; - struct smc_link *link_new; - struct smc_init_info ini; + struct smc_link *link_new = NULL; + struct smc_init_info *ini; int lnk_idx, rc = 0; + ini = kzalloc(sizeof(*ini), GFP_KERNEL); + if (!ini) + return -ENOMEM; + /* ignore client add link recommendation, start new flow */ - ini.vlan_id = lgr->vlan_id; - smc_pnet_find_alt_roce(lgr, &ini, link->smcibdev); - if (!ini.ib_dev) { + ini->vlan_id = lgr->vlan_id; + smc_pnet_find_alt_roce(lgr, ini, link->smcibdev); + if (!ini->ib_dev) { lgr_new_t = SMC_LGR_ASYMMETRIC_LOCAL; - ini.ib_dev = link->smcibdev; - ini.ib_port = link->ibport; + ini->ib_dev = link->smcibdev; + ini->ib_port = link->ibport; } lnk_idx = smc_llc_alloc_alt_link(lgr, lgr_new_t); - if (lnk_idx < 0) - return 0; + if (lnk_idx < 0) { + rc = 0; + goto out; + } - rc = smcr_link_init(lgr, &lgr->lnk[lnk_idx], lnk_idx, &ini); + rc = smcr_link_init(lgr, &lgr->lnk[lnk_idx], lnk_idx, ini); if (rc) - return rc; + goto out; link_new = &lgr->lnk[lnk_idx]; rc = smc_llc_send_add_link(link, - link_new->smcibdev->mac[ini.ib_port - 1], + link_new->smcibdev->mac[ini->ib_port - 1], link_new->gid, link_new, SMC_LLC_REQ); if (rc) goto out_err; @@ -1218,10 +1236,15 @@ int smc_llc_srv_add_link(struct smc_link *link) rc = smc_llc_srv_conf_link(link, link_new, lgr_new_t); if (rc) goto out_err; + kfree(ini); return 0; out_err: - link_new->state = SMC_LNK_INACTIVE; - smcr_link_clear(link_new, false); + if (link_new) { + link_new->state = SMC_LNK_INACTIVE; + smcr_link_clear(link_new, false); + } +out: + kfree(ini); return rc; } From 42042dbbc2ebb926e594b22490374d9343c746ef Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Sat, 16 Oct 2021 11:37:44 +0200 Subject: [PATCH 268/440] net/smc: prepare for SMC-Rv2 connection Prepare the connection establishment with SMC-Rv2. Detect eligible RoCE cards and indicate all supported SMC modes for the connection. Signed-off-by: Karsten Graul Signed-off-by: David S. Miller --- net/smc/af_smc.c | 97 +++++++++++++++++++++++++++++----------------- net/smc/smc_clc.c | 11 ++++++ net/smc/smc_clc.h | 12 ++++++ net/smc/smc_core.h | 28 +++++++++++++ 4 files changed, 113 insertions(+), 35 deletions(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index f69ef3f2019f..f21e74537f53 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -608,7 +608,9 @@ static int smc_find_rdma_device(struct smc_sock *smc, struct smc_init_info *ini) * used for the internal TCP socket */ smc_pnet_find_roce_resource(smc->clcsock->sk, ini); - if (!ini->ib_dev) + if (!ini->check_smcrv2 && !ini->ib_dev) + return SMC_CLC_DECL_NOSMCRDEV; + if (ini->check_smcrv2 && !ini->smcrv2.ib_dev_v2) return SMC_CLC_DECL_NOSMCRDEV; return 0; } @@ -692,27 +694,42 @@ static int smc_find_proposal_devices(struct smc_sock *smc, int rc = 0; /* check if there is an ism device available */ - if (ini->smcd_version & SMC_V1) { - if (smc_find_ism_device(smc, ini) || - smc_connect_ism_vlan_setup(smc, ini)) { - if (ini->smc_type_v1 == SMC_TYPE_B) - ini->smc_type_v1 = SMC_TYPE_R; - else - ini->smc_type_v1 = SMC_TYPE_N; - } /* else ISM V1 is supported for this connection */ - if (smc_find_rdma_device(smc, ini)) { - if (ini->smc_type_v1 == SMC_TYPE_B) - ini->smc_type_v1 = SMC_TYPE_D; - else - ini->smc_type_v1 = SMC_TYPE_N; - } /* else RDMA is supported for this connection */ - } - if (smc_ism_is_v2_capable() && smc_find_ism_v2_device_clnt(smc, ini)) - ini->smc_type_v2 = SMC_TYPE_N; + if (!(ini->smcd_version & SMC_V1) || + smc_find_ism_device(smc, ini) || + smc_connect_ism_vlan_setup(smc, ini)) + ini->smcd_version &= ~SMC_V1; + /* else ISM V1 is supported for this connection */ + + /* check if there is an rdma device available */ + if (!(ini->smcr_version & SMC_V1) || + smc_find_rdma_device(smc, ini)) + ini->smcr_version &= ~SMC_V1; + /* else RDMA is supported for this connection */ + + ini->smc_type_v1 = smc_indicated_type(ini->smcd_version & SMC_V1, + ini->smcr_version & SMC_V1); + + /* check if there is an ism v2 device available */ + if (!(ini->smcd_version & SMC_V2) || + !smc_ism_is_v2_capable() || + smc_find_ism_v2_device_clnt(smc, ini)) + ini->smcd_version &= ~SMC_V2; + + /* check if there is an rdma v2 device available */ + ini->check_smcrv2 = true; + ini->smcrv2.saddr = smc->clcsock->sk->sk_rcv_saddr; + if (!(ini->smcr_version & SMC_V2) || + smc->clcsock->sk->sk_family != AF_INET || + !smc_clc_ueid_count() || + smc_find_rdma_device(smc, ini)) + ini->smcr_version &= ~SMC_V2; + ini->check_smcrv2 = false; + + ini->smc_type_v2 = smc_indicated_type(ini->smcd_version & SMC_V2, + ini->smcr_version & SMC_V2); /* if neither ISM nor RDMA are supported, fallback */ - if (!smcr_indicated(ini->smc_type_v1) && - ini->smc_type_v1 == SMC_TYPE_N && ini->smc_type_v2 == SMC_TYPE_N) + if (ini->smc_type_v1 == SMC_TYPE_N && ini->smc_type_v2 == SMC_TYPE_N) rc = SMC_CLC_DECL_NOSMCDEV; return rc; @@ -950,17 +967,24 @@ connect_abort: static int smc_connect_check_aclc(struct smc_init_info *ini, struct smc_clc_msg_accept_confirm *aclc) { - if ((aclc->hdr.typev1 == SMC_TYPE_R && - !smcr_indicated(ini->smc_type_v1)) || - (aclc->hdr.typev1 == SMC_TYPE_D && - ((!smcd_indicated(ini->smc_type_v1) && - !smcd_indicated(ini->smc_type_v2)) || - (aclc->hdr.version == SMC_V1 && - !smcd_indicated(ini->smc_type_v1)) || - (aclc->hdr.version == SMC_V2 && - !smcd_indicated(ini->smc_type_v2))))) + if (aclc->hdr.typev1 != SMC_TYPE_R && + aclc->hdr.typev1 != SMC_TYPE_D) return SMC_CLC_DECL_MODEUNSUPP; + if (aclc->hdr.version >= SMC_V2) { + if ((aclc->hdr.typev1 == SMC_TYPE_R && + !smcr_indicated(ini->smc_type_v2)) || + (aclc->hdr.typev1 == SMC_TYPE_D && + !smcd_indicated(ini->smc_type_v2))) + return SMC_CLC_DECL_MODEUNSUPP; + } else { + if ((aclc->hdr.typev1 == SMC_TYPE_R && + !smcr_indicated(ini->smc_type_v1)) || + (aclc->hdr.typev1 == SMC_TYPE_D && + !smcd_indicated(ini->smc_type_v1))) + return SMC_CLC_DECL_MODEUNSUPP; + } + return 0; } @@ -991,14 +1015,15 @@ static int __smc_connect(struct smc_sock *smc) return smc_connect_decline_fallback(smc, SMC_CLC_DECL_MEM, version); - ini->smcd_version = SMC_V1; - ini->smcd_version |= smc_ism_is_v2_capable() ? SMC_V2 : 0; + ini->smcd_version = SMC_V1 | SMC_V2; + ini->smcr_version = SMC_V1 | SMC_V2; ini->smc_type_v1 = SMC_TYPE_B; - ini->smc_type_v2 = smc_ism_is_v2_capable() ? SMC_TYPE_D : SMC_TYPE_N; + ini->smc_type_v2 = SMC_TYPE_B; /* get vlan id from IP device */ if (smc_vlan_by_tcpsk(smc->clcsock, ini)) { ini->smcd_version &= ~SMC_V1; + ini->smcr_version = 0; ini->smc_type_v1 = SMC_TYPE_N; if (!ini->smcd_version) { rc = SMC_CLC_DECL_GETVLANERR; @@ -1026,15 +1051,17 @@ static int __smc_connect(struct smc_sock *smc) /* check if smc modes and versions of CLC proposal and accept match */ rc = smc_connect_check_aclc(ini, aclc); version = aclc->hdr.version == SMC_V1 ? SMC_V1 : SMC_V2; - ini->smcd_version = version; if (rc) goto vlan_cleanup; /* depending on previous steps, connect using rdma or ism */ - if (aclc->hdr.typev1 == SMC_TYPE_R) + if (aclc->hdr.typev1 == SMC_TYPE_R) { + ini->smcr_version = version; rc = smc_connect_rdma(smc, aclc, ini); - else if (aclc->hdr.typev1 == SMC_TYPE_D) + } else if (aclc->hdr.typev1 == SMC_TYPE_D) { + ini->smcd_version = version; rc = smc_connect_ism(smc, aclc, ini); + } if (rc) goto vlan_cleanup; diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index 1cc8a76b39f9..8d44f06cf401 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -114,6 +114,17 @@ err_out: return rc; } +int smc_clc_ueid_count(void) +{ + int count; + + read_lock(&smc_clc_eid_table.lock); + count = smc_clc_eid_table.ueid_cnt; + read_unlock(&smc_clc_eid_table.lock); + + return count; +} + int smc_nl_add_ueid(struct sk_buff *skb, struct genl_info *info) { struct nlattr *nla_ueid = info->attrs[SMC_NLA_EID_TABLE_ENTRY]; diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h index 974d01d16bb5..37ce97f7fdb0 100644 --- a/net/smc/smc_clc.h +++ b/net/smc/smc_clc.h @@ -282,6 +282,17 @@ static inline bool smcd_indicated(int smc_type) return smc_type == SMC_TYPE_D || smc_type == SMC_TYPE_B; } +static inline u8 smc_indicated_type(int is_smcd, int is_smcr) +{ + if (is_smcd && is_smcr) + return SMC_TYPE_B; + if (is_smcd) + return SMC_TYPE_D; + if (is_smcr) + return SMC_TYPE_R; + return SMC_TYPE_N; +} + /* get SMC-D info from proposal message */ static inline struct smc_clc_msg_smcd * smc_get_clc_msg_smcd(struct smc_clc_msg_proposal *prop) @@ -343,6 +354,7 @@ void smc_clc_get_hostname(u8 **host); bool smc_clc_match_eid(u8 *negotiated_eid, struct smc_clc_v2_extension *smc_v2_ext, u8 *peer_eid, u8 *local_eid); +int smc_clc_ueid_count(void); int smc_nl_dump_ueid(struct sk_buff *skb, struct netlink_callback *cb); int smc_nl_add_ueid(struct sk_buff *skb, struct genl_info *info); int smc_nl_remove_ueid(struct sk_buff *skb, struct genl_info *info); diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index 83d30b06016f..4a1778a7e3a5 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -302,6 +302,31 @@ struct smc_link_group { struct smc_clc_msg_local; +#define GID_LIST_SIZE 2 + +struct smc_gidlist { + u8 len; + u8 list[GID_LIST_SIZE][SMC_GID_SIZE]; +}; + +struct smc_init_info_smcrv2 { + /* Input fields */ + __be32 saddr; + struct sock *clc_sk; + __be32 daddr; + + /* Output fields when saddr is set */ + struct smc_ib_device *ib_dev_v2; + u8 ib_port_v2; + u8 ib_gid_v2[SMC_GID_SIZE]; + + /* Additional output fields when clc_sk and daddr is set as well */ + u8 uses_gateway; + u8 nexthop_mac[ETH_ALEN]; + + struct smc_gidlist gidlist; +}; + struct smc_init_info { u8 is_smcd; u8 smc_type_v1; @@ -313,10 +338,13 @@ struct smc_init_info { u8 negotiated_eid[SMC_MAX_EID_LEN]; /* SMC-R */ struct smc_clc_msg_local *ib_lcl; + u8 smcr_version; + u8 check_smcrv2; struct smc_ib_device *ib_dev; u8 ib_gid[SMC_GID_SIZE]; u8 ib_port; u32 ib_clcqpn; + struct smc_init_info_smcrv2 smcrv2; /* SMC-D */ u64 ism_peer_gid[SMC_MAX_ISM_DEVS + 1]; struct smcd_dev *ism_dev[SMC_MAX_ISM_DEVS + 1]; From e5c4744cfb598f98672f8d21d59ef2c1fa9c9b5f Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Sat, 16 Oct 2021 11:37:45 +0200 Subject: [PATCH 269/440] net/smc: add SMC-Rv2 connection establishment Send a CLC proposal message, and the remote side process this type of message and determine the target GID. Check for a valid route to this GID, and complete the connection establishment. Signed-off-by: Karsten Graul Signed-off-by: David S. Miller --- net/smc/af_smc.c | 169 +++++++++++++++++++++++++++++++++++---------- net/smc/smc_clc.c | 89 +++++++++++++++++++----- net/smc/smc_clc.h | 24 +++++-- net/smc/smc_core.h | 6 ++ net/smc/smc_ib.c | 27 ++++++++ net/smc/smc_ib.h | 13 ++++ 6 files changed, 267 insertions(+), 61 deletions(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index f21e74537f53..d4280262b829 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -439,6 +439,47 @@ static int smcr_clnt_conf_first_link(struct smc_sock *smc) return 0; } +static bool smc_isascii(char *hostname) +{ + int i; + + for (i = 0; i < SMC_MAX_HOSTNAME_LEN; i++) + if (!isascii(hostname[i])) + return false; + return true; +} + +static void smc_conn_save_peer_info_fce(struct smc_sock *smc, + struct smc_clc_msg_accept_confirm *clc) +{ + struct smc_clc_msg_accept_confirm_v2 *clc_v2 = + (struct smc_clc_msg_accept_confirm_v2 *)clc; + struct smc_clc_first_contact_ext *fce; + int clc_v2_len; + + if (clc->hdr.version == SMC_V1 || + !(clc->hdr.typev2 & SMC_FIRST_CONTACT_MASK)) + return; + + if (smc->conn.lgr->is_smcd) { + memcpy(smc->conn.lgr->negotiated_eid, clc_v2->d1.eid, + SMC_MAX_EID_LEN); + clc_v2_len = offsetofend(struct smc_clc_msg_accept_confirm_v2, + d1); + } else { + memcpy(smc->conn.lgr->negotiated_eid, clc_v2->r1.eid, + SMC_MAX_EID_LEN); + clc_v2_len = offsetofend(struct smc_clc_msg_accept_confirm_v2, + r1); + } + fce = (struct smc_clc_first_contact_ext *)(((u8 *)clc_v2) + clc_v2_len); + smc->conn.lgr->peer_os = fce->os_type; + smc->conn.lgr->peer_smc_release = fce->release; + if (smc_isascii(fce->hostname)) + memcpy(smc->conn.lgr->peer_hostname, fce->hostname, + SMC_MAX_HOSTNAME_LEN); +} + static void smcr_conn_save_peer_info(struct smc_sock *smc, struct smc_clc_msg_accept_confirm *clc) { @@ -451,16 +492,6 @@ static void smcr_conn_save_peer_info(struct smc_sock *smc, smc->conn.tx_off = bufsize * (smc->conn.peer_rmbe_idx - 1); } -static bool smc_isascii(char *hostname) -{ - int i; - - for (i = 0; i < SMC_MAX_HOSTNAME_LEN; i++) - if (!isascii(hostname[i])) - return false; - return true; -} - static void smcd_conn_save_peer_info(struct smc_sock *smc, struct smc_clc_msg_accept_confirm *clc) { @@ -472,22 +503,6 @@ static void smcd_conn_save_peer_info(struct smc_sock *smc, smc->conn.peer_rmbe_size = bufsize - sizeof(struct smcd_cdc_msg); atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size); smc->conn.tx_off = bufsize * smc->conn.peer_rmbe_idx; - if (clc->hdr.version > SMC_V1 && - (clc->hdr.typev2 & SMC_FIRST_CONTACT_MASK)) { - struct smc_clc_msg_accept_confirm_v2 *clc_v2 = - (struct smc_clc_msg_accept_confirm_v2 *)clc; - struct smc_clc_first_contact_ext *fce = - (struct smc_clc_first_contact_ext *) - (((u8 *)clc_v2) + sizeof(*clc_v2)); - - memcpy(smc->conn.lgr->negotiated_eid, clc_v2->eid, - SMC_MAX_EID_LEN); - smc->conn.lgr->peer_os = fce->os_type; - smc->conn.lgr->peer_smc_release = fce->release; - if (smc_isascii(fce->hostname)) - memcpy(smc->conn.lgr->peer_hostname, fce->hostname, - SMC_MAX_HOSTNAME_LEN); - } } static void smc_conn_save_peer_info(struct smc_sock *smc, @@ -497,14 +512,16 @@ static void smc_conn_save_peer_info(struct smc_sock *smc, smcd_conn_save_peer_info(smc, clc); else smcr_conn_save_peer_info(smc, clc); + smc_conn_save_peer_info_fce(smc, clc); } static void smc_link_save_peer_info(struct smc_link *link, - struct smc_clc_msg_accept_confirm *clc) + struct smc_clc_msg_accept_confirm *clc, + struct smc_init_info *ini) { link->peer_qpn = ntoh24(clc->r0.qpn); - memcpy(link->peer_gid, clc->r0.lcl.gid, SMC_GID_SIZE); - memcpy(link->peer_mac, clc->r0.lcl.mac, sizeof(link->peer_mac)); + memcpy(link->peer_gid, ini->peer_gid, SMC_GID_SIZE); + memcpy(link->peer_mac, ini->peer_mac, sizeof(link->peer_mac)); link->peer_psn = ntoh24(clc->r0.psn); link->peer_mtu = clc->r0.qp_mtu; } @@ -769,6 +786,64 @@ static int smc_connect_clc(struct smc_sock *smc, SMC_CLC_ACCEPT, CLC_WAIT_TIME); } +static void smc_fill_gid_list(struct smc_link_group *lgr, + struct smc_gidlist *gidlist, + struct smc_ib_device *known_dev, u8 *known_gid) +{ + struct smc_init_info *alt_ini = NULL; + + memset(gidlist, 0, sizeof(*gidlist)); + memcpy(gidlist->list[gidlist->len++], known_gid, SMC_GID_SIZE); + + alt_ini = kzalloc(sizeof(*alt_ini), GFP_KERNEL); + if (!alt_ini) + goto out; + + alt_ini->vlan_id = lgr->vlan_id; + alt_ini->check_smcrv2 = true; + alt_ini->smcrv2.saddr = lgr->saddr; + smc_pnet_find_alt_roce(lgr, alt_ini, known_dev); + + if (!alt_ini->smcrv2.ib_dev_v2) + goto out; + + memcpy(gidlist->list[gidlist->len++], alt_ini->smcrv2.ib_gid_v2, + SMC_GID_SIZE); + +out: + kfree(alt_ini); +} + +static int smc_connect_rdma_v2_prepare(struct smc_sock *smc, + struct smc_clc_msg_accept_confirm *aclc, + struct smc_init_info *ini) +{ + struct smc_clc_msg_accept_confirm_v2 *clc_v2 = + (struct smc_clc_msg_accept_confirm_v2 *)aclc; + struct smc_clc_first_contact_ext *fce = + (struct smc_clc_first_contact_ext *) + (((u8 *)clc_v2) + sizeof(*clc_v2)); + + if (!ini->first_contact_peer || aclc->hdr.version == SMC_V1) + return 0; + + if (fce->v2_direct) { + memcpy(ini->smcrv2.nexthop_mac, &aclc->r0.lcl.mac, ETH_ALEN); + ini->smcrv2.uses_gateway = false; + } else { + if (smc_ib_find_route(smc->clcsock->sk->sk_rcv_saddr, + smc_ib_gid_to_ipv4(aclc->r0.lcl.gid), + ini->smcrv2.nexthop_mac, + &ini->smcrv2.uses_gateway)) + return SMC_CLC_DECL_NOROUTE; + if (!ini->smcrv2.uses_gateway) { + /* mismatch: peer claims indirect, but its direct */ + return SMC_CLC_DECL_NOINDIRECT; + } + } + return 0; +} + /* setup for RDMA connection of client */ static int smc_connect_rdma(struct smc_sock *smc, struct smc_clc_msg_accept_confirm *aclc, @@ -776,11 +851,18 @@ static int smc_connect_rdma(struct smc_sock *smc, { int i, reason_code = 0; struct smc_link *link; + u8 *eid = NULL; ini->is_smcd = false; - ini->ib_lcl = &aclc->r0.lcl; ini->ib_clcqpn = ntoh24(aclc->r0.qpn); ini->first_contact_peer = aclc->hdr.typev2 & SMC_FIRST_CONTACT_MASK; + memcpy(ini->peer_systemid, aclc->r0.lcl.id_for_peer, SMC_SYSTEMID_LEN); + memcpy(ini->peer_gid, aclc->r0.lcl.gid, SMC_GID_SIZE); + memcpy(ini->peer_mac, aclc->r0.lcl.mac, ETH_ALEN); + + reason_code = smc_connect_rdma_v2_prepare(smc, aclc, ini); + if (reason_code) + return reason_code; mutex_lock(&smc_client_lgr_pending); reason_code = smc_conn_create(smc, ini); @@ -802,8 +884,9 @@ static int smc_connect_rdma(struct smc_sock *smc, if (l->peer_qpn == ntoh24(aclc->r0.qpn) && !memcmp(l->peer_gid, &aclc->r0.lcl.gid, SMC_GID_SIZE) && - !memcmp(l->peer_mac, &aclc->r0.lcl.mac, - sizeof(l->peer_mac))) { + (aclc->hdr.version > SMC_V1 || + !memcmp(l->peer_mac, &aclc->r0.lcl.mac, + sizeof(l->peer_mac)))) { link = l; break; } @@ -822,7 +905,7 @@ static int smc_connect_rdma(struct smc_sock *smc, } if (ini->first_contact_local) - smc_link_save_peer_info(link, aclc); + smc_link_save_peer_info(link, aclc, ini); if (smc_rmb_rtoken_handling(&smc->conn, link, aclc)) { reason_code = SMC_CLC_DECL_ERR_RTOK; @@ -845,8 +928,18 @@ static int smc_connect_rdma(struct smc_sock *smc, } smc_rmb_sync_sg_for_device(&smc->conn); + if (aclc->hdr.version > SMC_V1) { + struct smc_clc_msg_accept_confirm_v2 *clc_v2 = + (struct smc_clc_msg_accept_confirm_v2 *)aclc; + + eid = clc_v2->r1.eid; + if (ini->first_contact_local) + smc_fill_gid_list(link->lgr, &ini->smcrv2.gidlist, + link->smcibdev, link->gid); + } + reason_code = smc_clc_send_confirm(smc, ini->first_contact_local, - SMC_V1, NULL); + aclc->hdr.version, eid, ini); if (reason_code) goto connect_abort; @@ -886,7 +979,7 @@ smc_v2_determine_accepted_chid(struct smc_clc_msg_accept_confirm_v2 *aclc, int i; for (i = 0; i < ini->ism_offered_cnt + 1; i++) { - if (ini->ism_chid[i] == ntohs(aclc->chid)) { + if (ini->ism_chid[i] == ntohs(aclc->d1.chid)) { ini->ism_selected = i; return 0; } @@ -940,11 +1033,11 @@ static int smc_connect_ism(struct smc_sock *smc, struct smc_clc_msg_accept_confirm_v2 *clc_v2 = (struct smc_clc_msg_accept_confirm_v2 *)aclc; - eid = clc_v2->eid; + eid = clc_v2->d1.eid; } rc = smc_clc_send_confirm(smc, ini->first_contact_local, - aclc->hdr.version, eid); + aclc->hdr.version, eid, NULL); if (rc) goto connect_abort; mutex_unlock(&smc_server_lgr_pending); @@ -1735,7 +1828,7 @@ static int smc_listen_rdma_finish(struct smc_sock *new_smc, int reason_code = 0; if (local_first) - smc_link_save_peer_info(link, cclc); + smc_link_save_peer_info(link, cclc, NULL); if (smc_rmb_rtoken_handling(&new_smc->conn, link, cclc)) return SMC_CLC_DECL_ERR_RTOK; diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index 8d44f06cf401..5cc2e2dc7417 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -31,6 +31,7 @@ #define SMCR_CLC_ACCEPT_CONFIRM_LEN 68 #define SMCD_CLC_ACCEPT_CONFIRM_LEN 48 #define SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 78 +#define SMCR_CLC_ACCEPT_CONFIRM_LEN_V2 108 #define SMC_CLC_RECV_BUF_LEN 100 /* eye catcher "SMCR" EBCDIC for CLC messages */ @@ -309,7 +310,8 @@ bool smc_clc_match_eid(u8 *negotiated_eid, negotiated_eid[0] = 0; read_lock(&smc_clc_eid_table.lock); - if (smc_clc_eid_table.seid_enabled && + if (peer_eid && local_eid && + smc_clc_eid_table.seid_enabled && smc_v2_ext->hdr.flag.seid && !memcmp(peer_eid, local_eid, SMC_MAX_EID_LEN)) { memcpy(negotiated_eid, peer_eid, SMC_MAX_EID_LEN); @@ -391,6 +393,9 @@ smc_clc_msg_acc_conf_valid(struct smc_clc_msg_accept_confirm_v2 *clc_v2) (ntohs(hdr->length) != SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 + sizeof(struct smc_clc_first_contact_ext))) return false; + if (hdr->typev1 == SMC_TYPE_R && + ntohs(hdr->length) < SMCR_CLC_ACCEPT_CONFIRM_LEN_V2) + return false; } return true; } @@ -844,8 +849,8 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini) } else { struct smc_clc_eid_entry *ueident; u16 v2_ext_offset; - u8 *eid = NULL; + v2_ext->hdr.flag.release = SMC_RELEASE; v2_ext_offset = sizeof(*pclc_smcd) - offsetofend(struct smc_clc_msg_smcd, v2_ext_offset); if (ini->smc_type_v1 != SMC_TYPE_N) @@ -853,6 +858,7 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini) pclc_prfx->ipv6_prefixes_cnt * sizeof(ipv6_prfx[0]); pclc_smcd->v2_ext_offset = htons(v2_ext_offset); + plen += sizeof(*v2_ext); read_lock(&smc_clc_eid_table.lock); v2_ext->hdr.eid_cnt = smc_clc_eid_table.ueid_cnt; @@ -862,10 +868,13 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini) memcpy(v2_ext->user_eids[i++], ueident->eid, sizeof(ueident->eid)); } - v2_ext->hdr.flag.seid = smc_clc_eid_table.seid_enabled; read_unlock(&smc_clc_eid_table.lock); + } + if (smcd_indicated(ini->smc_type_v2)) { + u8 *eid = NULL; + + v2_ext->hdr.flag.seid = smc_clc_eid_table.seid_enabled; v2_ext->hdr.ism_gid_cnt = ini->ism_offered_cnt; - v2_ext->hdr.flag.release = SMC_RELEASE; v2_ext->hdr.smcd_v2_ext_offset = htons(sizeof(*v2_ext) - offsetofend(struct smc_clnt_opts_area_hdr, smcd_v2_ext_offset) + @@ -873,7 +882,7 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini) smc_ism_get_system_eid(&eid); if (eid && v2_ext->hdr.flag.seid) memcpy(smcd_v2_ext->system_eid, eid, SMC_MAX_EID_LEN); - plen += sizeof(*v2_ext) + sizeof(*smcd_v2_ext); + plen += sizeof(*smcd_v2_ext); if (ini->ism_offered_cnt) { for (i = 1; i <= ini->ism_offered_cnt; i++) { gidchids[i - 1].gid = @@ -885,6 +894,9 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini) sizeof(struct smc_clc_smcd_gid_chid); } } + if (smcr_indicated(ini->smc_type_v2)) + memcpy(v2_ext->roce, ini->smcrv2.ib_gid_v2, SMC_GID_SIZE); + pclc_base->hdr.length = htons(plen); memcpy(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); @@ -908,12 +920,14 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini) vec[i].iov_base = v2_ext; vec[i++].iov_len = sizeof(*v2_ext) + (v2_ext->hdr.eid_cnt * SMC_MAX_EID_LEN); - vec[i].iov_base = smcd_v2_ext; - vec[i++].iov_len = sizeof(*smcd_v2_ext); - if (ini->ism_offered_cnt) { - vec[i].iov_base = gidchids; - vec[i++].iov_len = ini->ism_offered_cnt * + if (smcd_indicated(ini->smc_type_v2)) { + vec[i].iov_base = smcd_v2_ext; + vec[i++].iov_len = sizeof(*smcd_v2_ext); + if (ini->ism_offered_cnt) { + vec[i].iov_base = gidchids; + vec[i++].iov_len = ini->ism_offered_cnt * sizeof(struct smc_clc_smcd_gid_chid); + } } } vec[i].iov_base = trl; @@ -936,13 +950,14 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini) static int smc_clc_send_confirm_accept(struct smc_sock *smc, struct smc_clc_msg_accept_confirm_v2 *clc_v2, int first_contact, u8 version, - u8 *eid) + u8 *eid, struct smc_init_info *ini) { struct smc_connection *conn = &smc->conn; struct smc_clc_msg_accept_confirm *clc; struct smc_clc_first_contact_ext fce; + struct smc_clc_fce_gid_ext gle; struct smc_clc_msg_trail trl; - struct kvec vec[3]; + struct kvec vec[5]; struct msghdr msg; int i, len; @@ -964,9 +979,10 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc, if (version == SMC_V1) { clc->hdr.length = htons(SMCD_CLC_ACCEPT_CONFIRM_LEN); } else { - clc_v2->chid = htons(smc_ism_get_chid(conn->lgr->smcd)); - if (eid[0]) - memcpy(clc_v2->eid, eid, SMC_MAX_EID_LEN); + clc_v2->d1.chid = + htons(smc_ism_get_chid(conn->lgr->smcd)); + if (eid && eid[0]) + memcpy(clc_v2->d1.eid, eid, SMC_MAX_EID_LEN); len = SMCD_CLC_ACCEPT_CONFIRM_LEN_V2; if (first_contact) smc_clc_fill_fce(&fce, &len); @@ -1005,6 +1021,26 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc, clc->r0.rmb_dma_addr = cpu_to_be64((u64)sg_dma_address (conn->rmb_desc->sgt[link->link_idx].sgl)); hton24(clc->r0.psn, link->psn_initial); + if (version == SMC_V1) { + clc->hdr.length = htons(SMCR_CLC_ACCEPT_CONFIRM_LEN); + } else { + if (eid && eid[0]) + memcpy(clc_v2->r1.eid, eid, SMC_MAX_EID_LEN); + len = SMCR_CLC_ACCEPT_CONFIRM_LEN_V2; + if (first_contact) { + smc_clc_fill_fce(&fce, &len); + fce.v2_direct = !link->lgr->uses_gateway; + memset(&gle, 0, sizeof(gle)); + if (ini && clc->hdr.type == SMC_CLC_CONFIRM) { + gle.gid_cnt = ini->smcrv2.gidlist.len; + len += sizeof(gle); + len += gle.gid_cnt * sizeof(gle.gid[0]); + } else { + len += sizeof(gle.reserved); + } + } + clc_v2->hdr.length = htons(len); + } memcpy(trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); } @@ -1012,7 +1048,10 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc, i = 0; vec[i].iov_base = clc_v2; if (version > SMC_V1) - vec[i++].iov_len = SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 - sizeof(trl); + vec[i++].iov_len = (clc->hdr.typev1 == SMC_TYPE_D ? + SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 : + SMCR_CLC_ACCEPT_CONFIRM_LEN_V2) - + sizeof(trl); else vec[i++].iov_len = (clc->hdr.typev1 == SMC_TYPE_D ? SMCD_CLC_ACCEPT_CONFIRM_LEN : @@ -1021,6 +1060,18 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc, if (version > SMC_V1 && first_contact) { vec[i].iov_base = &fce; vec[i++].iov_len = sizeof(fce); + if (!conn->lgr->is_smcd) { + if (clc->hdr.type == SMC_CLC_CONFIRM) { + vec[i].iov_base = &gle; + vec[i++].iov_len = sizeof(gle); + vec[i].iov_base = &ini->smcrv2.gidlist.list; + vec[i++].iov_len = gle.gid_cnt * + sizeof(gle.gid[0]); + } else { + vec[i].iov_base = &gle.reserved; + vec[i++].iov_len = sizeof(gle.reserved); + } + } } vec[i].iov_base = &trl; vec[i++].iov_len = sizeof(trl); @@ -1030,7 +1081,7 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc, /* send CLC CONFIRM message across internal TCP socket */ int smc_clc_send_confirm(struct smc_sock *smc, bool clnt_first_contact, - u8 version, u8 *eid) + u8 version, u8 *eid, struct smc_init_info *ini) { struct smc_clc_msg_accept_confirm_v2 cclc_v2; int reason_code = 0; @@ -1040,7 +1091,7 @@ int smc_clc_send_confirm(struct smc_sock *smc, bool clnt_first_contact, memset(&cclc_v2, 0, sizeof(cclc_v2)); cclc_v2.hdr.type = SMC_CLC_CONFIRM; len = smc_clc_send_confirm_accept(smc, &cclc_v2, clnt_first_contact, - version, eid); + version, eid, ini); if (len < ntohs(cclc_v2.hdr.length)) { if (len >= 0) { reason_code = -ENETUNREACH; @@ -1063,7 +1114,7 @@ int smc_clc_send_accept(struct smc_sock *new_smc, bool srv_first_contact, memset(&aclc_v2, 0, sizeof(aclc_v2)); aclc_v2.hdr.type = SMC_CLC_ACCEPT; len = smc_clc_send_confirm_accept(new_smc, &aclc_v2, srv_first_contact, - version, negotiated_eid); + version, negotiated_eid, NULL); if (len < ntohs(aclc_v2.hdr.length)) len = len >= 0 ? -EPROTO : -new_smc->clcsock->sk->sk_err; diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h index 37ce97f7fdb0..4f2fe278f404 100644 --- a/net/smc/smc_clc.h +++ b/net/smc/smc_clc.h @@ -54,6 +54,8 @@ #define SMC_CLC_DECL_NOSRVLINK 0x030b0000 /* SMC-R link from srv not found */ #define SMC_CLC_DECL_VERSMISMAT 0x030c0000 /* SMC version mismatch */ #define SMC_CLC_DECL_MAX_DMB 0x030d0000 /* SMC-D DMB limit exceeded */ +#define SMC_CLC_DECL_NOROUTE 0x030e0000 /* SMC-Rv2 conn. no route to peer */ +#define SMC_CLC_DECL_NOINDIRECT 0x030f0000 /* SMC-Rv2 conn. indirect mismatch*/ #define SMC_CLC_DECL_SYNCERR 0x04000000 /* synchronization error */ #define SMC_CLC_DECL_PEERDECL 0x05000000 /* peer declined during handshake */ #define SMC_CLC_DECL_INTERR 0x09990000 /* internal error */ @@ -213,11 +215,14 @@ struct smcd_clc_msg_accept_confirm_common { /* SMCD accept/confirm */ #define SMC_CLC_OS_AIX 3 struct smc_clc_first_contact_ext { - u8 reserved1; #if defined(__BIG_ENDIAN_BITFIELD) + u8 v2_direct : 1, + reserved : 7; u8 os_type : 4, release : 4; #elif defined(__LITTLE_ENDIAN_BITFIELD) + u8 reserved : 7, + v2_direct : 1; u8 release : 4, os_type : 4; #endif @@ -225,6 +230,13 @@ struct smc_clc_first_contact_ext { u8 hostname[SMC_MAX_HOSTNAME_LEN]; }; +struct smc_clc_fce_gid_ext { + u8 reserved[16]; + u8 gid_cnt; + u8 reserved2[3]; + u8 gid[][SMC_GID_SIZE]; +}; + struct smc_clc_msg_accept_confirm { /* clc accept / confirm message */ struct smc_clc_msg_hdr hdr; union { @@ -239,13 +251,17 @@ struct smc_clc_msg_accept_confirm { /* clc accept / confirm message */ struct smc_clc_msg_accept_confirm_v2 { /* clc accept / confirm message */ struct smc_clc_msg_hdr hdr; union { - struct smcr_clc_msg_accept_confirm r0; /* SMC-R */ + struct { /* SMC-R */ + struct smcr_clc_msg_accept_confirm r0; + u8 eid[SMC_MAX_EID_LEN]; + u8 reserved6[8]; + } r1; struct { /* SMC-D */ struct smcd_clc_msg_accept_confirm_common d0; __be16 chid; u8 eid[SMC_MAX_EID_LEN]; u8 reserved5[8]; - }; + } d1; }; }; @@ -345,7 +361,7 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info, u8 version); int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini); int smc_clc_send_confirm(struct smc_sock *smc, bool clnt_first_contact, - u8 version, u8 *eid); + u8 version, u8 *eid, struct smc_init_info *ini); int smc_clc_send_accept(struct smc_sock *smc, bool srv_first_contact, u8 version, u8 *negotiated_eid); void smc_clc_init(void) __init; diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index 4a1778a7e3a5..128474f33b8f 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -288,6 +288,9 @@ struct smc_link_group { /* link keep alive time */ u32 llc_termination_rsn; /* rsn code for termination */ + u8 nexthop_mac[ETH_ALEN]; + u8 uses_gateway; + __be32 saddr; }; struct { /* SMC-D */ u64 peer_gid; @@ -340,6 +343,9 @@ struct smc_init_info { struct smc_clc_msg_local *ib_lcl; u8 smcr_version; u8 check_smcrv2; + u8 peer_gid[SMC_GID_SIZE]; + u8 peer_mac[ETH_ALEN]; + u8 peer_systemid[SMC_SYSTEMID_LEN]; struct smc_ib_device *ib_dev; u8 ib_gid[SMC_GID_SIZE]; u8 ib_port; diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index a8845343d183..9f72910af1d0 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -183,6 +183,33 @@ bool smc_ib_port_active(struct smc_ib_device *smcibdev, u8 ibport) return smcibdev->pattr[ibport - 1].state == IB_PORT_ACTIVE; } +int smc_ib_find_route(__be32 saddr, __be32 daddr, + u8 nexthop_mac[], u8 *uses_gateway) +{ + struct neighbour *neigh = NULL; + struct rtable *rt = NULL; + struct flowi4 fl4 = { + .saddr = saddr, + .daddr = daddr + }; + + if (daddr == cpu_to_be32(INADDR_NONE)) + goto out; + rt = ip_route_output_flow(&init_net, &fl4, NULL); + if (IS_ERR(rt)) + goto out; + if (rt->rt_uses_gateway && rt->rt_gw_family != AF_INET) + goto out; + neigh = rt->dst.ops->neigh_lookup(&rt->dst, NULL, &fl4.daddr); + if (neigh) { + memcpy(nexthop_mac, neigh->ha, ETH_ALEN); + *uses_gateway = rt->rt_uses_gateway; + return 0; + } +out: + return -ENOENT; +} + /* determine the gid for an ib-device port and vlan id */ int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport, unsigned short vlan_id, u8 gid[], u8 *sgid_index) diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h index 3085f5180da7..c55cbd7be67a 100644 --- a/net/smc/smc_ib.h +++ b/net/smc/smc_ib.h @@ -59,6 +59,17 @@ struct smc_ib_device { /* ib-device infos for smc */ int ndev_ifidx[SMC_MAX_PORTS]; /* ndev if indexes */ }; +static inline __be32 smc_ib_gid_to_ipv4(u8 gid[SMC_GID_SIZE]) +{ + struct in6_addr *addr6 = (struct in6_addr *)gid; + + if (ipv6_addr_v4mapped(addr6) || + !(addr6->s6_addr32[0] | addr6->s6_addr32[1] | addr6->s6_addr32[2])) + return addr6->s6_addr32[3]; + return cpu_to_be32(INADDR_NONE); +} + +struct smc_init_info_smcrv2; struct smc_buf_desc; struct smc_link; @@ -91,6 +102,8 @@ void smc_ib_sync_sg_for_device(struct smc_link *lnk, enum dma_data_direction data_direction); int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport, unsigned short vlan_id, u8 gid[], u8 *sgid_index); +int smc_ib_find_route(__be32 saddr, __be32 daddr, + u8 nexthop_mac[], u8 *uses_gateway); bool smc_ib_is_valid_local_systemid(void); int smcr_nl_get_device(struct sk_buff *skb, struct netlink_callback *cb); #endif From e49300a6bf6218c835403545e9356141a6340181 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Sat, 16 Oct 2021 11:37:46 +0200 Subject: [PATCH 270/440] net/smc: add listen processing for SMC-Rv2 Implement the server side of the SMC-Rv2 processing. Process incoming CLC messages, find eligible devices and check for a valid route to the remote peer. Signed-off-by: Karsten Graul Signed-off-by: David S. Miller --- net/smc/af_smc.c | 165 ++++++++++++++++++++++++++++++++------------- net/smc/smc_clc.h | 1 + net/smc/smc_core.c | 64 +++++++++++++----- net/smc/smc_core.h | 1 - 4 files changed, 165 insertions(+), 66 deletions(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index d4280262b829..ce5feb68382f 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -1515,33 +1515,48 @@ static int smc_listen_v2_check(struct smc_sock *new_smc, ini->smc_type_v1 = pclc->hdr.typev1; ini->smc_type_v2 = pclc->hdr.typev2; - ini->smcd_version = ini->smc_type_v1 != SMC_TYPE_N ? SMC_V1 : 0; - if (pclc->hdr.version > SMC_V1) - ini->smcd_version |= - ini->smc_type_v2 != SMC_TYPE_N ? SMC_V2 : 0; - if (!(ini->smcd_version & SMC_V2)) { - rc = SMC_CLC_DECL_PEERNOSMC; - goto out; + ini->smcd_version = smcd_indicated(ini->smc_type_v1) ? SMC_V1 : 0; + ini->smcr_version = smcr_indicated(ini->smc_type_v1) ? SMC_V1 : 0; + if (pclc->hdr.version > SMC_V1) { + if (smcd_indicated(ini->smc_type_v2)) + ini->smcd_version |= SMC_V2; + if (smcr_indicated(ini->smc_type_v2)) + ini->smcr_version |= SMC_V2; } - if (!smc_ism_is_v2_capable()) { - ini->smcd_version &= ~SMC_V2; - rc = SMC_CLC_DECL_NOISM2SUPP; + if (!(ini->smcd_version & SMC_V2) && !(ini->smcr_version & SMC_V2)) { + rc = SMC_CLC_DECL_PEERNOSMC; goto out; } pclc_v2_ext = smc_get_clc_v2_ext(pclc); if (!pclc_v2_ext) { ini->smcd_version &= ~SMC_V2; + ini->smcr_version &= ~SMC_V2; rc = SMC_CLC_DECL_NOV2EXT; goto out; } pclc_smcd_v2_ext = smc_get_clc_smcd_v2_ext(pclc_v2_ext); - if (!pclc_smcd_v2_ext) { - ini->smcd_version &= ~SMC_V2; - rc = SMC_CLC_DECL_NOV2DEXT; + if (ini->smcd_version & SMC_V2) { + if (!smc_ism_is_v2_capable()) { + ini->smcd_version &= ~SMC_V2; + rc = SMC_CLC_DECL_NOISM2SUPP; + } else if (!pclc_smcd_v2_ext) { + ini->smcd_version &= ~SMC_V2; + rc = SMC_CLC_DECL_NOV2DEXT; + } else if (!pclc_v2_ext->hdr.eid_cnt && + !pclc_v2_ext->hdr.flag.seid) { + ini->smcd_version &= ~SMC_V2; + rc = SMC_CLC_DECL_NOUEID; + } + } + if (ini->smcr_version & SMC_V2) { + if (!pclc_v2_ext->hdr.eid_cnt) { + ini->smcr_version &= ~SMC_V2; + rc = SMC_CLC_DECL_NOUEID; + } } out: - if (!ini->smcd_version) + if (!ini->smcd_version && !ini->smcr_version) return rc; return 0; @@ -1661,10 +1676,6 @@ static void smc_find_ism_v2_device_serv(struct smc_sock *new_smc, pclc_smcd = smc_get_clc_msg_smcd(pclc); smc_v2_ext = smc_get_clc_v2_ext(pclc); smcd_v2_ext = smc_get_clc_smcd_v2_ext(smc_v2_ext); - if (!smcd_v2_ext) { - smc_find_ism_store_rc(SMC_CLC_DECL_NOV2DEXT, ini); - goto not_found; - } mutex_lock(&smcd_dev_list.mutex); if (pclc_smcd->ism.chid) @@ -1682,8 +1693,10 @@ static void smc_find_ism_v2_device_serv(struct smc_sock *new_smc, } mutex_unlock(&smcd_dev_list.mutex); - if (!ini->ism_dev[0]) + if (!ini->ism_dev[0]) { + smc_find_ism_store_rc(SMC_CLC_DECL_NOSMCD2DEV, ini); goto not_found; + } smc_ism_get_system_eid(&eid); if (!smc_clc_match_eid(ini->negotiated_eid, smc_v2_ext, @@ -1736,6 +1749,7 @@ static void smc_find_ism_v1_device_serv(struct smc_sock *new_smc, not_found: smc_find_ism_store_rc(rc, ini); + ini->smcd_version &= ~SMC_V1; ini->ism_dev[0] = NULL; ini->is_smcd = false; } @@ -1754,24 +1768,69 @@ static int smc_listen_rdma_reg(struct smc_sock *new_smc, bool local_first) return 0; } +static void smc_find_rdma_v2_device_serv(struct smc_sock *new_smc, + struct smc_clc_msg_proposal *pclc, + struct smc_init_info *ini) +{ + struct smc_clc_v2_extension *smc_v2_ext; + u8 smcr_version; + int rc; + + if (!(ini->smcr_version & SMC_V2) || !smcr_indicated(ini->smc_type_v2)) + goto not_found; + + smc_v2_ext = smc_get_clc_v2_ext(pclc); + if (!smc_clc_match_eid(ini->negotiated_eid, smc_v2_ext, NULL, NULL)) + goto not_found; + + /* prepare RDMA check */ + memcpy(ini->peer_systemid, pclc->lcl.id_for_peer, SMC_SYSTEMID_LEN); + memcpy(ini->peer_gid, smc_v2_ext->roce, SMC_GID_SIZE); + memcpy(ini->peer_mac, pclc->lcl.mac, ETH_ALEN); + ini->check_smcrv2 = true; + ini->smcrv2.clc_sk = new_smc->clcsock->sk; + ini->smcrv2.saddr = new_smc->clcsock->sk->sk_rcv_saddr; + ini->smcrv2.daddr = smc_ib_gid_to_ipv4(smc_v2_ext->roce); + rc = smc_find_rdma_device(new_smc, ini); + if (rc) { + smc_find_ism_store_rc(rc, ini); + goto not_found; + } + if (!ini->smcrv2.uses_gateway) + memcpy(ini->smcrv2.nexthop_mac, pclc->lcl.mac, ETH_ALEN); + + smcr_version = ini->smcr_version; + ini->smcr_version = SMC_V2; + rc = smc_listen_rdma_init(new_smc, ini); + if (!rc) + rc = smc_listen_rdma_reg(new_smc, ini->first_contact_local); + if (!rc) + return; + ini->smcr_version = smcr_version; + smc_find_ism_store_rc(rc, ini); + +not_found: + ini->smcr_version &= ~SMC_V2; + ini->check_smcrv2 = false; +} + static int smc_find_rdma_v1_device_serv(struct smc_sock *new_smc, struct smc_clc_msg_proposal *pclc, struct smc_init_info *ini) { int rc; - if (!smcr_indicated(ini->smc_type_v1)) + if (!(ini->smcr_version & SMC_V1) || !smcr_indicated(ini->smc_type_v1)) return SMC_CLC_DECL_NOSMCDEV; /* prepare RDMA check */ - ini->ib_lcl = &pclc->lcl; + memcpy(ini->peer_systemid, pclc->lcl.id_for_peer, SMC_SYSTEMID_LEN); + memcpy(ini->peer_gid, pclc->lcl.gid, SMC_GID_SIZE); + memcpy(ini->peer_mac, pclc->lcl.mac, ETH_ALEN); rc = smc_find_rdma_device(new_smc, ini); if (rc) { /* no RDMA device found */ - if (ini->smc_type_v1 == SMC_TYPE_B) - /* neither ISM nor RDMA device found */ - rc = SMC_CLC_DECL_NOSMCDEV; - return rc; + return SMC_CLC_DECL_NOSMCDEV; } rc = smc_listen_rdma_init(new_smc, ini); if (rc) @@ -1784,51 +1843,60 @@ static int smc_listen_find_device(struct smc_sock *new_smc, struct smc_clc_msg_proposal *pclc, struct smc_init_info *ini) { - int rc; + int prfx_rc; /* check for ISM device matching V2 proposed device */ smc_find_ism_v2_device_serv(new_smc, pclc, ini); if (ini->ism_dev[0]) return 0; - if (!(ini->smcd_version & SMC_V1)) - return ini->rc ?: SMC_CLC_DECL_NOSMCD2DEV; - - /* check for matching IP prefix and subnet length */ - rc = smc_listen_prfx_check(new_smc, pclc); - if (rc) - return ini->rc ?: rc; + /* check for matching IP prefix and subnet length (V1) */ + prfx_rc = smc_listen_prfx_check(new_smc, pclc); + if (prfx_rc) + smc_find_ism_store_rc(prfx_rc, ini); /* get vlan id from IP device */ if (smc_vlan_by_tcpsk(new_smc->clcsock, ini)) return ini->rc ?: SMC_CLC_DECL_GETVLANERR; /* check for ISM device matching V1 proposed device */ - smc_find_ism_v1_device_serv(new_smc, pclc, ini); + if (!prfx_rc) + smc_find_ism_v1_device_serv(new_smc, pclc, ini); if (ini->ism_dev[0]) return 0; - if (pclc->hdr.typev1 == SMC_TYPE_D) + if (!smcr_indicated(pclc->hdr.typev1) && + !smcr_indicated(pclc->hdr.typev2)) /* skip RDMA and decline */ return ini->rc ?: SMC_CLC_DECL_NOSMCDDEV; - /* check if RDMA is available */ - rc = smc_find_rdma_v1_device_serv(new_smc, pclc, ini); - smc_find_ism_store_rc(rc, ini); + /* check if RDMA V2 is available */ + smc_find_rdma_v2_device_serv(new_smc, pclc, ini); + if (ini->smcrv2.ib_dev_v2) + return 0; - return (!rc) ? 0 : ini->rc; + /* check if RDMA V1 is available */ + if (!prfx_rc) { + int rc; + + rc = smc_find_rdma_v1_device_serv(new_smc, pclc, ini); + smc_find_ism_store_rc(rc, ini); + return (!rc) ? 0 : ini->rc; + } + return SMC_CLC_DECL_NOSMCDEV; } /* listen worker: finish RDMA setup */ static int smc_listen_rdma_finish(struct smc_sock *new_smc, struct smc_clc_msg_accept_confirm *cclc, - bool local_first) + bool local_first, + struct smc_init_info *ini) { struct smc_link *link = new_smc->conn.lnk; int reason_code = 0; if (local_first) - smc_link_save_peer_info(link, cclc, NULL); + smc_link_save_peer_info(link, cclc, ini); if (smc_rmb_rtoken_handling(&new_smc->conn, link, cclc)) return SMC_CLC_DECL_ERR_RTOK; @@ -1849,12 +1917,13 @@ static void smc_listen_work(struct work_struct *work) { struct smc_sock *new_smc = container_of(work, struct smc_sock, smc_listen_work); - u8 version = smc_ism_is_v2_capable() ? SMC_V2 : SMC_V1; struct socket *newclcsock = new_smc->clcsock; struct smc_clc_msg_accept_confirm *cclc; struct smc_clc_msg_proposal_area *buf; struct smc_clc_msg_proposal *pclc; struct smc_init_info *ini = NULL; + u8 proposal_version = SMC_V1; + u8 accept_version; int rc = 0; if (new_smc->listen_smc->sk.sk_state != SMC_LISTEN) @@ -1885,7 +1954,9 @@ static void smc_listen_work(struct work_struct *work) SMC_CLC_PROPOSAL, CLC_WAIT_TIME); if (rc) goto out_decl; - version = pclc->hdr.version == SMC_V1 ? SMC_V1 : version; + + if (pclc->hdr.version > SMC_V1) + proposal_version = SMC_V2; /* IPSec connections opt out of SMC optimizations */ if (using_ipsec(new_smc)) { @@ -1915,9 +1986,9 @@ static void smc_listen_work(struct work_struct *work) goto out_unlock; /* send SMC Accept CLC message */ + accept_version = ini->is_smcd ? ini->smcd_version : ini->smcr_version; rc = smc_clc_send_accept(new_smc, ini->first_contact_local, - ini->smcd_version == SMC_V2 ? SMC_V2 : SMC_V1, - ini->negotiated_eid); + accept_version, ini->negotiated_eid); if (rc) goto out_unlock; @@ -1939,7 +2010,7 @@ static void smc_listen_work(struct work_struct *work) /* finish worker */ if (!ini->is_smcd) { rc = smc_listen_rdma_finish(new_smc, cclc, - ini->first_contact_local); + ini->first_contact_local, ini); if (rc) goto out_unlock; mutex_unlock(&smc_server_lgr_pending); @@ -1953,7 +2024,7 @@ out_unlock: mutex_unlock(&smc_server_lgr_pending); out_decl: smc_listen_decline(new_smc, rc, ini ? ini->first_contact_local : 0, - version); + proposal_version); out_free: kfree(ini); kfree(buf); diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h index 4f2fe278f404..22c079ed77a9 100644 --- a/net/smc/smc_clc.h +++ b/net/smc/smc_clc.h @@ -44,6 +44,7 @@ #define SMC_CLC_DECL_NOV2DEXT 0x03030005 /* peer sent no clc SMC-Dv2 ext. */ #define SMC_CLC_DECL_NOSEID 0x03030006 /* peer sent no SEID */ #define SMC_CLC_DECL_NOSMCD2DEV 0x03030007 /* no SMC-Dv2 device found */ +#define SMC_CLC_DECL_NOUEID 0x03030008 /* peer sent no UEID */ #define SMC_CLC_DECL_MODEUNSUPP 0x03040000 /* smc modes do not match (R or D)*/ #define SMC_CLC_DECL_RMBE_EC 0x03050000 /* peer has eyecatcher in RMBE */ #define SMC_CLC_DECL_OPTUNSUPP 0x03060000 /* fastopen sockopt not supported */ diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 4d463701a759..a081582e5669 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -684,24 +684,30 @@ static void smcr_copy_dev_info_to_link(struct smc_link *link) int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk, u8 link_idx, struct smc_init_info *ini) { + struct smc_ib_device *smcibdev; u8 rndvec[3]; int rc; - get_device(&ini->ib_dev->ibdev->dev); - atomic_inc(&ini->ib_dev->lnk_cnt); + if (lgr->smc_version == SMC_V2) { + lnk->smcibdev = ini->smcrv2.ib_dev_v2; + lnk->ibport = ini->smcrv2.ib_port_v2; + } else { + lnk->smcibdev = ini->ib_dev; + lnk->ibport = ini->ib_port; + } + get_device(&lnk->smcibdev->ibdev->dev); + atomic_inc(&lnk->smcibdev->lnk_cnt); + lnk->path_mtu = lnk->smcibdev->pattr[lnk->ibport - 1].active_mtu; lnk->link_id = smcr_next_link_id(lgr); lnk->lgr = lgr; lnk->link_idx = link_idx; - lnk->smcibdev = ini->ib_dev; - lnk->ibport = ini->ib_port; smc_ibdev_cnt_inc(lnk); smcr_copy_dev_info_to_link(lnk); - lnk->path_mtu = ini->ib_dev->pattr[ini->ib_port - 1].active_mtu; atomic_set(&lnk->conn_cnt, 0); smc_llc_link_set_uid(lnk); INIT_WORK(&lnk->link_down_wrk, smc_link_down_work); - if (!ini->ib_dev->initialized) { - rc = (int)smc_ib_setup_per_ibdev(ini->ib_dev); + if (!lnk->smcibdev->initialized) { + rc = (int)smc_ib_setup_per_ibdev(lnk->smcibdev); if (rc) goto out; } @@ -740,11 +746,12 @@ clear_llc_lnk: smc_llc_link_clear(lnk, false); out: smc_ibdev_cnt_dec(lnk); - put_device(&ini->ib_dev->ibdev->dev); + put_device(&lnk->smcibdev->ibdev->dev); + smcibdev = lnk->smcibdev; memset(lnk, 0, sizeof(struct smc_link)); lnk->state = SMC_LNK_UNUSED; - if (!atomic_dec_return(&ini->ib_dev->lnk_cnt)) - wake_up(&ini->ib_dev->lnks_deleted); + if (!atomic_dec_return(&smcibdev->lnk_cnt)) + wake_up(&smcibdev->lnks_deleted); return rc; } @@ -808,10 +815,25 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini) atomic_inc(&ini->ism_dev[ini->ism_selected]->lgr_cnt); } else { /* SMC-R specific settings */ + struct smc_ib_device *ibdev; + int ibport; + lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT; - memcpy(lgr->peer_systemid, ini->ib_lcl->id_for_peer, + lgr->smc_version = ini->smcr_version; + memcpy(lgr->peer_systemid, ini->peer_systemid, SMC_SYSTEMID_LEN); - memcpy(lgr->pnet_id, ini->ib_dev->pnetid[ini->ib_port - 1], + if (lgr->smc_version == SMC_V2) { + ibdev = ini->smcrv2.ib_dev_v2; + ibport = ini->smcrv2.ib_port_v2; + lgr->saddr = ini->smcrv2.saddr; + lgr->uses_gateway = ini->smcrv2.uses_gateway; + memcpy(lgr->nexthop_mac, ini->smcrv2.nexthop_mac, + ETH_ALEN); + } else { + ibdev = ini->ib_dev; + ibport = ini->ib_port; + } + memcpy(lgr->pnet_id, ibdev->pnetid[ibport - 1], SMC_MAX_PNETID_LEN); smc_llc_lgr_init(lgr, smc); @@ -1636,13 +1658,15 @@ out: return rc; } -static bool smcr_lgr_match(struct smc_link_group *lgr, - struct smc_clc_msg_local *lcl, +static bool smcr_lgr_match(struct smc_link_group *lgr, u8 smcr_version, + u8 peer_systemid[], + u8 peer_gid[], + u8 peer_mac_v1[], enum smc_lgr_role role, u32 clcqpn) { int i; - if (memcmp(lgr->peer_systemid, lcl->id_for_peer, SMC_SYSTEMID_LEN) || + if (memcmp(lgr->peer_systemid, peer_systemid, SMC_SYSTEMID_LEN) || lgr->role != role) return false; @@ -1650,8 +1674,9 @@ static bool smcr_lgr_match(struct smc_link_group *lgr, if (!smc_link_active(&lgr->lnk[i])) continue; if ((lgr->role == SMC_SERV || lgr->lnk[i].peer_qpn == clcqpn) && - !memcmp(lgr->lnk[i].peer_gid, &lcl->gid, SMC_GID_SIZE) && - !memcmp(lgr->lnk[i].peer_mac, lcl->mac, sizeof(lcl->mac))) + !memcmp(lgr->lnk[i].peer_gid, peer_gid, SMC_GID_SIZE) && + (smcr_version == SMC_V2 || + !memcmp(lgr->lnk[i].peer_mac, peer_mac_v1, ETH_ALEN))) return true; } return false; @@ -1690,7 +1715,10 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini) if ((ini->is_smcd ? smcd_lgr_match(lgr, ini->ism_dev[ini->ism_selected], ini->ism_peer_gid[ini->ism_selected]) : - smcr_lgr_match(lgr, ini->ib_lcl, role, ini->ib_clcqpn)) && + smcr_lgr_match(lgr, ini->smcr_version, + ini->peer_systemid, + ini->peer_gid, ini->peer_mac, role, + ini->ib_clcqpn)) && !lgr->sync_err && (ini->smcd_version == SMC_V2 || lgr->vlan_id == ini->vlan_id) && diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index 128474f33b8f..dd3198bb6cb9 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -340,7 +340,6 @@ struct smc_init_info { u32 rc; u8 negotiated_eid[SMC_MAX_EID_LEN]; /* SMC-R */ - struct smc_clc_msg_local *ib_lcl; u8 smcr_version; u8 check_smcrv2; u8 peer_gid[SMC_GID_SIZE]; From 8ade200c269f8530efde05b616801ed0612d7d72 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Sat, 16 Oct 2021 11:37:47 +0200 Subject: [PATCH 271/440] net/smc: add v2 format of CLC decline message The CLC decline message changed with SMC-Rv2 and supports up to 4 additional diagnosis codes. Signed-off-by: Karsten Graul Signed-off-by: David S. Miller --- net/smc/smc_clc.c | 47 +++++++++++++++++++++++++++++++++++++---------- net/smc/smc_clc.h | 18 ++++++++++++++++++ 2 files changed, 55 insertions(+), 10 deletions(-) diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index 5cc2e2dc7417..8409ab71a5e4 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -400,6 +400,24 @@ smc_clc_msg_acc_conf_valid(struct smc_clc_msg_accept_confirm_v2 *clc_v2) return true; } +/* check arriving CLC decline */ +static bool +smc_clc_msg_decl_valid(struct smc_clc_msg_decline *dclc) +{ + struct smc_clc_msg_hdr *hdr = &dclc->hdr; + + if (hdr->typev1 != SMC_TYPE_R && hdr->typev1 != SMC_TYPE_D) + return false; + if (hdr->version == SMC_V1) { + if (ntohs(hdr->length) != sizeof(struct smc_clc_msg_decline)) + return false; + } else { + if (ntohs(hdr->length) != sizeof(struct smc_clc_msg_decline_v2)) + return false; + } + return true; +} + static void smc_clc_fill_fce(struct smc_clc_first_contact_ext *fce, int *len) { memset(fce, 0, sizeof(*fce)); @@ -441,9 +459,9 @@ static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm, bool check_trl) break; case SMC_CLC_DECLINE: dclc = (struct smc_clc_msg_decline *)clcm; - if (ntohs(dclc->hdr.length) != sizeof(*dclc)) + if (!smc_clc_msg_decl_valid(dclc)) return false; - trl = &dclc->trl; + check_trl = false; break; default: return false; @@ -742,15 +760,16 @@ out: /* send CLC DECLINE message across internal TCP socket */ int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info, u8 version) { - struct smc_clc_msg_decline dclc; + struct smc_clc_msg_decline *dclc_v1; + struct smc_clc_msg_decline_v2 dclc; struct msghdr msg; + int len, send_len; struct kvec vec; - int len; + dclc_v1 = (struct smc_clc_msg_decline *)&dclc; memset(&dclc, 0, sizeof(dclc)); memcpy(dclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); dclc.hdr.type = SMC_CLC_DECLINE; - dclc.hdr.length = htons(sizeof(struct smc_clc_msg_decline)); dclc.hdr.version = version; dclc.os_type = version == SMC_V1 ? 0 : SMC_CLC_OS_LINUX; dclc.hdr.typev2 = (peer_diag_info == SMC_CLC_DECL_SYNCERR) ? @@ -760,14 +779,22 @@ int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info, u8 version) memcpy(dclc.id_for_peer, local_systemid, sizeof(local_systemid)); dclc.peer_diagnosis = htonl(peer_diag_info); - memcpy(dclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); + if (version == SMC_V1) { + memcpy(dclc_v1->trl.eyecatcher, SMC_EYECATCHER, + sizeof(SMC_EYECATCHER)); + send_len = sizeof(*dclc_v1); + } else { + memcpy(dclc.trl.eyecatcher, SMC_EYECATCHER, + sizeof(SMC_EYECATCHER)); + send_len = sizeof(dclc); + } + dclc.hdr.length = htons(send_len); memset(&msg, 0, sizeof(msg)); vec.iov_base = &dclc; - vec.iov_len = sizeof(struct smc_clc_msg_decline); - len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1, - sizeof(struct smc_clc_msg_decline)); - if (len < 0 || len < sizeof(struct smc_clc_msg_decline)) + vec.iov_len = send_len; + len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1, send_len); + if (len < 0 || len < send_len) len = -EPROTO; return len > 0 ? 0 : len; } diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h index 22c079ed77a9..83f02f131fc0 100644 --- a/net/smc/smc_clc.h +++ b/net/smc/smc_clc.h @@ -281,6 +281,24 @@ struct smc_clc_msg_decline { /* clc decline message */ struct smc_clc_msg_trail trl; /* eye catcher "SMCD" or "SMCR" EBCDIC */ } __aligned(4); +#define SMC_DECL_DIAG_COUNT_V2 4 /* no. of additional peer diagnosis codes */ + +struct smc_clc_msg_decline_v2 { /* clc decline message */ + struct smc_clc_msg_hdr hdr; + u8 id_for_peer[SMC_SYSTEMID_LEN]; /* sender peer_id */ + __be32 peer_diagnosis; /* diagnosis information */ +#if defined(__BIG_ENDIAN_BITFIELD) + u8 os_type : 4, + reserved : 4; +#elif defined(__LITTLE_ENDIAN_BITFIELD) + u8 reserved : 4, + os_type : 4; +#endif + u8 reserved2[3]; + __be32 peer_diagnosis_v2[SMC_DECL_DIAG_COUNT_V2]; + struct smc_clc_msg_trail trl; /* eye catcher "SMCD" or "SMCR" EBCDIC */ +} __aligned(4); + /* determine start of the prefix area within the proposal message */ static inline struct smc_clc_msg_proposal_prefix * smc_clc_proposal_get_prefix(struct smc_clc_msg_proposal *pclc) From 24fb68111d4509524b483b2577f1b20a24f5fdfd Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Sat, 16 Oct 2021 11:37:48 +0200 Subject: [PATCH 272/440] net/smc: retrieve v2 gid from IB device In smc_ib.c, scan for RoCE devices that support UDP encapsulation. Find an eligible device and check that there is a route to the remote peer. Signed-off-by: Karsten Graul Signed-off-by: David S. Miller --- net/smc/smc_core.c | 4 ++- net/smc/smc_ib.c | 77 ++++++++++++++++++++++++++++++++++++++-------- net/smc/smc_ib.h | 3 +- net/smc/smc_pnet.c | 41 +++++++++++++++--------- 4 files changed, 95 insertions(+), 30 deletions(-) diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index a081582e5669..6bbd71de6bc0 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -715,7 +715,9 @@ int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk, lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) + (rndvec[2] << 16); rc = smc_ib_determine_gid(lnk->smcibdev, lnk->ibport, - ini->vlan_id, lnk->gid, &lnk->sgid_index); + ini->vlan_id, lnk->gid, &lnk->sgid_index, + lgr->smc_version == SMC_V2 ? + &ini->smcrv2 : NULL); if (rc) goto out; rc = smc_llc_link_init(lnk); diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index 9f72910af1d0..d15bacbd73e0 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -62,16 +63,23 @@ static int smc_ib_modify_qp_rtr(struct smc_link *lnk) IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU | IB_QP_DEST_QPN | IB_QP_RQ_PSN | IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_MIN_RNR_TIMER; struct ib_qp_attr qp_attr; + u8 hop_lim = 1; memset(&qp_attr, 0, sizeof(qp_attr)); qp_attr.qp_state = IB_QPS_RTR; qp_attr.path_mtu = min(lnk->path_mtu, lnk->peer_mtu); qp_attr.ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE; rdma_ah_set_port_num(&qp_attr.ah_attr, lnk->ibport); - rdma_ah_set_grh(&qp_attr.ah_attr, NULL, 0, lnk->sgid_index, 1, 0); + if (lnk->lgr->smc_version == SMC_V2 && lnk->lgr->uses_gateway) + hop_lim = IPV6_DEFAULT_HOPLIMIT; + rdma_ah_set_grh(&qp_attr.ah_attr, NULL, 0, lnk->sgid_index, hop_lim, 0); rdma_ah_set_dgid_raw(&qp_attr.ah_attr, lnk->peer_gid); - memcpy(&qp_attr.ah_attr.roce.dmac, lnk->peer_mac, - sizeof(lnk->peer_mac)); + if (lnk->lgr->smc_version == SMC_V2 && lnk->lgr->uses_gateway) + memcpy(&qp_attr.ah_attr.roce.dmac, lnk->lgr->nexthop_mac, + sizeof(lnk->lgr->nexthop_mac)); + else + memcpy(&qp_attr.ah_attr.roce.dmac, lnk->peer_mac, + sizeof(lnk->peer_mac)); qp_attr.dest_qp_num = lnk->peer_qpn; qp_attr.rq_psn = lnk->peer_psn; /* starting receive packet seq # */ qp_attr.max_dest_rd_atomic = 1; /* max # of resources for incoming @@ -210,9 +218,54 @@ out: return -ENOENT; } +static int smc_ib_determine_gid_rcu(const struct net_device *ndev, + const struct ib_gid_attr *attr, + u8 gid[], u8 *sgid_index, + struct smc_init_info_smcrv2 *smcrv2) +{ + if (!smcrv2 && attr->gid_type == IB_GID_TYPE_ROCE) { + if (gid) + memcpy(gid, &attr->gid, SMC_GID_SIZE); + if (sgid_index) + *sgid_index = attr->index; + return 0; + } + if (smcrv2 && attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP && + smc_ib_gid_to_ipv4((u8 *)&attr->gid) != cpu_to_be32(INADDR_NONE)) { + struct in_device *in_dev = __in_dev_get_rcu(ndev); + const struct in_ifaddr *ifa; + bool subnet_match = false; + + if (!in_dev) + goto out; + in_dev_for_each_ifa_rcu(ifa, in_dev) { + if (!inet_ifa_match(smcrv2->saddr, ifa)) + continue; + subnet_match = true; + break; + } + if (!subnet_match) + goto out; + if (smcrv2->daddr && smc_ib_find_route(smcrv2->saddr, + smcrv2->daddr, + smcrv2->nexthop_mac, + &smcrv2->uses_gateway)) + goto out; + + if (gid) + memcpy(gid, &attr->gid, SMC_GID_SIZE); + if (sgid_index) + *sgid_index = attr->index; + return 0; + } +out: + return -ENODEV; +} + /* determine the gid for an ib-device port and vlan id */ int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport, - unsigned short vlan_id, u8 gid[], u8 *sgid_index) + unsigned short vlan_id, u8 gid[], u8 *sgid_index, + struct smc_init_info_smcrv2 *smcrv2) { const struct ib_gid_attr *attr; const struct net_device *ndev; @@ -228,15 +281,13 @@ int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport, if (!IS_ERR(ndev) && ((!vlan_id && !is_vlan_dev(ndev)) || (vlan_id && is_vlan_dev(ndev) && - vlan_dev_vlan_id(ndev) == vlan_id)) && - attr->gid_type == IB_GID_TYPE_ROCE) { - rcu_read_unlock(); - if (gid) - memcpy(gid, &attr->gid, SMC_GID_SIZE); - if (sgid_index) - *sgid_index = attr->index; - rdma_put_gid_attr(attr); - return 0; + vlan_dev_vlan_id(ndev) == vlan_id))) { + if (!smc_ib_determine_gid_rcu(ndev, attr, gid, + sgid_index, smcrv2)) { + rcu_read_unlock(); + rdma_put_gid_attr(attr); + return 0; + } } rcu_read_unlock(); rdma_put_gid_attr(attr); diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h index c55cbd7be67a..07585937370e 100644 --- a/net/smc/smc_ib.h +++ b/net/smc/smc_ib.h @@ -101,7 +101,8 @@ void smc_ib_sync_sg_for_device(struct smc_link *lnk, struct smc_buf_desc *buf_slot, enum dma_data_direction data_direction); int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport, - unsigned short vlan_id, u8 gid[], u8 *sgid_index); + unsigned short vlan_id, u8 gid[], u8 *sgid_index, + struct smc_init_info_smcrv2 *smcrv2); int smc_ib_find_route(__be32 saddr, __be32 daddr, u8 nexthop_mac[], u8 *uses_gateway); bool smc_ib_is_valid_local_systemid(void); diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index 4a964e9190b0..67e9d9fde085 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -953,6 +953,26 @@ static int smc_pnet_find_ndev_pnetid_by_table(struct net_device *ndev, return rc; } +static int smc_pnet_determine_gid(struct smc_ib_device *ibdev, int i, + struct smc_init_info *ini) +{ + if (!ini->check_smcrv2 && + !smc_ib_determine_gid(ibdev, i, ini->vlan_id, ini->ib_gid, NULL, + NULL)) { + ini->ib_dev = ibdev; + ini->ib_port = i; + return 0; + } + if (ini->check_smcrv2 && + !smc_ib_determine_gid(ibdev, i, ini->vlan_id, ini->smcrv2.ib_gid_v2, + NULL, &ini->smcrv2)) { + ini->smcrv2.ib_dev_v2 = ibdev; + ini->smcrv2.ib_port_v2 = i; + return 0; + } + return -ENODEV; +} + /* find a roce device for the given pnetid */ static void _smc_pnet_find_roce_by_pnetid(u8 *pnet_id, struct smc_init_info *ini, @@ -961,7 +981,6 @@ static void _smc_pnet_find_roce_by_pnetid(u8 *pnet_id, struct smc_ib_device *ibdev; int i; - ini->ib_dev = NULL; mutex_lock(&smc_ib_devices.mutex); list_for_each_entry(ibdev, &smc_ib_devices.list, list) { if (ibdev == known_dev) @@ -971,12 +990,9 @@ static void _smc_pnet_find_roce_by_pnetid(u8 *pnet_id, continue; if (smc_pnet_match(ibdev->pnetid[i - 1], pnet_id) && smc_ib_port_active(ibdev, i) && - !test_bit(i - 1, ibdev->ports_going_away) && - !smc_ib_determine_gid(ibdev, i, ini->vlan_id, - ini->ib_gid, NULL)) { - ini->ib_dev = ibdev; - ini->ib_port = i; - goto out; + !test_bit(i - 1, ibdev->ports_going_away)) { + if (!smc_pnet_determine_gid(ibdev, i, ini)) + goto out; } } } @@ -1016,12 +1032,9 @@ static void smc_pnet_find_rdma_dev(struct net_device *netdev, dev_put(ndev); if (netdev == ndev && smc_ib_port_active(ibdev, i) && - !test_bit(i - 1, ibdev->ports_going_away) && - !smc_ib_determine_gid(ibdev, i, ini->vlan_id, - ini->ib_gid, NULL)) { - ini->ib_dev = ibdev; - ini->ib_port = i; - break; + !test_bit(i - 1, ibdev->ports_going_away)) { + if (!smc_pnet_determine_gid(ibdev, i, ini)) + break; } } } @@ -1083,8 +1096,6 @@ void smc_pnet_find_roce_resource(struct sock *sk, struct smc_init_info *ini) { struct dst_entry *dst = sk_dst_get(sk); - ini->ib_dev = NULL; - ini->ib_port = 0; if (!dst) goto out; if (!dst->dev) From 8799e310fb3f15759824a78b6b93d7e6d5def067 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Sat, 16 Oct 2021 11:37:49 +0200 Subject: [PATCH 273/440] net/smc: add v2 support to the work request layer In the work request layer define one large v2 buffer for each link group that is used to transmit and receive large LLC control messages. Add the completion queue handling for this buffer. Signed-off-by: Karsten Graul Signed-off-by: David S. Miller --- net/smc/smc_core.c | 7 +- net/smc/smc_core.h | 14 ++++ net/smc/smc_ib.c | 3 +- net/smc/smc_wr.c | 194 ++++++++++++++++++++++++++++++++++++++++----- net/smc/smc_wr.h | 2 + 5 files changed, 199 insertions(+), 21 deletions(-) diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 6bbd71de6bc0..1ccab993683d 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -837,13 +837,17 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini) } memcpy(lgr->pnet_id, ibdev->pnetid[ibport - 1], SMC_MAX_PNETID_LEN); + if (smc_wr_alloc_lgr_mem(lgr)) + goto free_wq; smc_llc_lgr_init(lgr, smc); link_idx = SMC_SINGLE_LINK; lnk = &lgr->lnk[link_idx]; rc = smcr_link_init(lgr, lnk, link_idx, ini); - if (rc) + if (rc) { + smc_wr_free_lgr_mem(lgr); goto free_wq; + } lgr_list = &smc_lgr_list.list; lgr_lock = &smc_lgr_list.lock; atomic_inc(&lgr_cnt); @@ -1250,6 +1254,7 @@ static void smc_lgr_free(struct smc_link_group *lgr) if (!atomic_dec_return(&lgr->smcd->lgr_cnt)) wake_up(&lgr->smcd->lgrs_deleted); } else { + smc_wr_free_lgr_mem(lgr); if (!atomic_dec_return(&lgr_cnt)) wake_up(&lgrs_deleted); } diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index dd3198bb6cb9..5997657ee9cc 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -42,11 +42,16 @@ enum smc_link_state { /* possible states of a link */ }; #define SMC_WR_BUF_SIZE 48 /* size of work request buffer */ +#define SMC_WR_BUF_V2_SIZE 8192 /* size of v2 work request buffer */ struct smc_wr_buf { u8 raw[SMC_WR_BUF_SIZE]; }; +struct smc_wr_v2_buf { + u8 raw[SMC_WR_BUF_V2_SIZE]; +}; + #define SMC_WR_REG_MR_WAIT_TIME (5 * HZ)/* wait time for ib_wr_reg_mr result */ enum smc_wr_reg_state { @@ -92,7 +97,11 @@ struct smc_link { struct smc_wr_tx_pend *wr_tx_pends; /* WR send waiting for CQE */ struct completion *wr_tx_compl; /* WR send CQE completion */ /* above four vectors have wr_tx_cnt elements and use the same index */ + struct ib_send_wr *wr_tx_v2_ib; /* WR send v2 meta data */ + struct ib_sge *wr_tx_v2_sge; /* WR send v2 gather meta data*/ + struct smc_wr_tx_pend *wr_tx_v2_pend; /* WR send v2 waiting for CQE */ dma_addr_t wr_tx_dma_addr; /* DMA address of wr_tx_bufs */ + dma_addr_t wr_tx_v2_dma_addr; /* DMA address of v2 tx buf*/ atomic_long_t wr_tx_id; /* seq # of last sent WR */ unsigned long *wr_tx_mask; /* bit mask of used indexes */ u32 wr_tx_cnt; /* number of WR send buffers */ @@ -104,6 +113,7 @@ struct smc_link { struct ib_sge *wr_rx_sges; /* WR recv scatter meta data */ /* above three vectors have wr_rx_cnt elements and use the same index */ dma_addr_t wr_rx_dma_addr; /* DMA address of wr_rx_bufs */ + dma_addr_t wr_rx_v2_dma_addr; /* DMA address of v2 rx buf*/ u64 wr_rx_id; /* seq # of last recv WR */ u32 wr_rx_cnt; /* number of WR recv buffers */ unsigned long wr_rx_tstamp; /* jiffies when last buf rx */ @@ -250,6 +260,10 @@ struct smc_link_group { /* client or server */ struct smc_link lnk[SMC_LINKS_PER_LGR_MAX]; /* smc link */ + struct smc_wr_v2_buf *wr_rx_buf_v2; + /* WR v2 recv payload buffer */ + struct smc_wr_v2_buf *wr_tx_buf_v2; + /* WR v2 send payload buffer */ char peer_systemid[SMC_SYSTEMID_LEN]; /* unique system_id of peer */ struct smc_rtoken rtokens[SMC_RMBS_PER_LGR_MAX] diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index d15bacbd73e0..32f9d2fdc474 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -601,6 +601,7 @@ void smc_ib_destroy_queue_pair(struct smc_link *lnk) /* create a queue pair within the protection domain for a link */ int smc_ib_create_queue_pair(struct smc_link *lnk) { + int sges_per_buf = (lnk->lgr->smc_version == SMC_V2) ? 2 : 1; struct ib_qp_init_attr qp_attr = { .event_handler = smc_ib_qp_event_handler, .qp_context = lnk, @@ -614,7 +615,7 @@ int smc_ib_create_queue_pair(struct smc_link *lnk) .max_send_wr = SMC_WR_BUF_CNT * 3, .max_recv_wr = SMC_WR_BUF_CNT * 3, .max_send_sge = SMC_IB_MAX_SEND_SGE, - .max_recv_sge = 1, + .max_recv_sge = sges_per_buf, }, .sq_sig_type = IB_SIGNAL_REQ_WR, .qp_type = IB_QPT_RC, diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c index a419e9af36b9..22d7324969cd 100644 --- a/net/smc/smc_wr.c +++ b/net/smc/smc_wr.c @@ -101,19 +101,32 @@ static inline void smc_wr_tx_process_cqe(struct ib_wc *wc) } pnd_snd_idx = smc_wr_tx_find_pending_index(link, wc->wr_id); - if (pnd_snd_idx == link->wr_tx_cnt) - return; - link->wr_tx_pends[pnd_snd_idx].wc_status = wc->status; - if (link->wr_tx_pends[pnd_snd_idx].compl_requested) - complete(&link->wr_tx_compl[pnd_snd_idx]); - memcpy(&pnd_snd, &link->wr_tx_pends[pnd_snd_idx], sizeof(pnd_snd)); - /* clear the full struct smc_wr_tx_pend including .priv */ - memset(&link->wr_tx_pends[pnd_snd_idx], 0, - sizeof(link->wr_tx_pends[pnd_snd_idx])); - memset(&link->wr_tx_bufs[pnd_snd_idx], 0, - sizeof(link->wr_tx_bufs[pnd_snd_idx])); - if (!test_and_clear_bit(pnd_snd_idx, link->wr_tx_mask)) - return; + if (pnd_snd_idx == link->wr_tx_cnt) { + if (link->lgr->smc_version != SMC_V2 || + link->wr_tx_v2_pend->wr_id != wc->wr_id) + return; + link->wr_tx_v2_pend->wc_status = wc->status; + memcpy(&pnd_snd, link->wr_tx_v2_pend, sizeof(pnd_snd)); + /* clear the full struct smc_wr_tx_pend including .priv */ + memset(link->wr_tx_v2_pend, 0, + sizeof(*link->wr_tx_v2_pend)); + memset(link->lgr->wr_tx_buf_v2, 0, + sizeof(*link->lgr->wr_tx_buf_v2)); + } else { + link->wr_tx_pends[pnd_snd_idx].wc_status = wc->status; + if (link->wr_tx_pends[pnd_snd_idx].compl_requested) + complete(&link->wr_tx_compl[pnd_snd_idx]); + memcpy(&pnd_snd, &link->wr_tx_pends[pnd_snd_idx], + sizeof(pnd_snd)); + /* clear the full struct smc_wr_tx_pend including .priv */ + memset(&link->wr_tx_pends[pnd_snd_idx], 0, + sizeof(link->wr_tx_pends[pnd_snd_idx])); + memset(&link->wr_tx_bufs[pnd_snd_idx], 0, + sizeof(link->wr_tx_bufs[pnd_snd_idx])); + if (!test_and_clear_bit(pnd_snd_idx, link->wr_tx_mask)) + return; + } + if (wc->status) { for_each_set_bit(i, link->wr_tx_mask, link->wr_tx_cnt) { /* clear full struct smc_wr_tx_pend including .priv */ @@ -123,6 +136,12 @@ static inline void smc_wr_tx_process_cqe(struct ib_wc *wc) sizeof(link->wr_tx_bufs[i])); clear_bit(i, link->wr_tx_mask); } + if (link->lgr->smc_version == SMC_V2) { + memset(link->wr_tx_v2_pend, 0, + sizeof(*link->wr_tx_v2_pend)); + memset(link->lgr->wr_tx_buf_v2, 0, + sizeof(*link->lgr->wr_tx_buf_v2)); + } /* terminate link */ smcr_link_down_cond_sched(link); } @@ -256,6 +275,14 @@ int smc_wr_tx_put_slot(struct smc_link *link, test_and_clear_bit(idx, link->wr_tx_mask); wake_up(&link->wr_tx_wait); return 1; + } else if (link->lgr->smc_version == SMC_V2 && + pend->idx == link->wr_tx_cnt) { + /* Large v2 buffer */ + memset(&link->wr_tx_v2_pend, 0, + sizeof(link->wr_tx_v2_pend)); + memset(&link->lgr->wr_tx_buf_v2, 0, + sizeof(link->lgr->wr_tx_buf_v2)); + return 1; } return 0; @@ -517,6 +544,7 @@ void smc_wr_remember_qp_attr(struct smc_link *lnk) static void smc_wr_init_sge(struct smc_link *lnk) { + int sges_per_buf = (lnk->lgr->smc_version == SMC_V2) ? 2 : 1; u32 i; for (i = 0; i < lnk->wr_tx_cnt; i++) { @@ -545,14 +573,44 @@ static void smc_wr_init_sge(struct smc_link *lnk) lnk->wr_tx_rdmas[i].wr_tx_rdma[1].wr.sg_list = lnk->wr_tx_rdma_sges[i].tx_rdma_sge[1].wr_tx_rdma_sge; } + + if (lnk->lgr->smc_version == SMC_V2) { + lnk->wr_tx_v2_sge->addr = lnk->wr_tx_v2_dma_addr; + lnk->wr_tx_v2_sge->length = SMC_WR_BUF_V2_SIZE; + lnk->wr_tx_v2_sge->lkey = lnk->roce_pd->local_dma_lkey; + + lnk->wr_tx_v2_ib->next = NULL; + lnk->wr_tx_v2_ib->sg_list = lnk->wr_tx_v2_sge; + lnk->wr_tx_v2_ib->num_sge = 1; + lnk->wr_tx_v2_ib->opcode = IB_WR_SEND; + lnk->wr_tx_v2_ib->send_flags = + IB_SEND_SIGNALED | IB_SEND_SOLICITED; + } + + /* With SMC-Rv2 there can be messages larger than SMC_WR_TX_SIZE. + * Each ib_recv_wr gets 2 sges, the second one is a spillover buffer + * and the same buffer for all sges. When a larger message arrived then + * the content of the first small sge is copied to the beginning of + * the larger spillover buffer, allowing easy data mapping. + */ for (i = 0; i < lnk->wr_rx_cnt; i++) { - lnk->wr_rx_sges[i].addr = + int x = i * sges_per_buf; + + lnk->wr_rx_sges[x].addr = lnk->wr_rx_dma_addr + i * SMC_WR_BUF_SIZE; - lnk->wr_rx_sges[i].length = SMC_WR_BUF_SIZE; - lnk->wr_rx_sges[i].lkey = lnk->roce_pd->local_dma_lkey; + lnk->wr_rx_sges[x].length = SMC_WR_TX_SIZE; + lnk->wr_rx_sges[x].lkey = lnk->roce_pd->local_dma_lkey; + if (lnk->lgr->smc_version == SMC_V2) { + lnk->wr_rx_sges[x + 1].addr = + lnk->wr_rx_v2_dma_addr + SMC_WR_TX_SIZE; + lnk->wr_rx_sges[x + 1].length = + SMC_WR_BUF_V2_SIZE - SMC_WR_TX_SIZE; + lnk->wr_rx_sges[x + 1].lkey = + lnk->roce_pd->local_dma_lkey; + } lnk->wr_rx_ibs[i].next = NULL; - lnk->wr_rx_ibs[i].sg_list = &lnk->wr_rx_sges[i]; - lnk->wr_rx_ibs[i].num_sge = 1; + lnk->wr_rx_ibs[i].sg_list = &lnk->wr_rx_sges[x]; + lnk->wr_rx_ibs[i].num_sge = sges_per_buf; } lnk->wr_reg.wr.next = NULL; lnk->wr_reg.wr.num_sge = 0; @@ -585,16 +643,45 @@ void smc_wr_free_link(struct smc_link *lnk) DMA_FROM_DEVICE); lnk->wr_rx_dma_addr = 0; } + if (lnk->wr_rx_v2_dma_addr) { + ib_dma_unmap_single(ibdev, lnk->wr_rx_v2_dma_addr, + SMC_WR_BUF_V2_SIZE, + DMA_FROM_DEVICE); + lnk->wr_rx_v2_dma_addr = 0; + } if (lnk->wr_tx_dma_addr) { ib_dma_unmap_single(ibdev, lnk->wr_tx_dma_addr, SMC_WR_BUF_SIZE * lnk->wr_tx_cnt, DMA_TO_DEVICE); lnk->wr_tx_dma_addr = 0; } + if (lnk->wr_tx_v2_dma_addr) { + ib_dma_unmap_single(ibdev, lnk->wr_tx_v2_dma_addr, + SMC_WR_BUF_V2_SIZE, + DMA_TO_DEVICE); + lnk->wr_tx_v2_dma_addr = 0; + } +} + +void smc_wr_free_lgr_mem(struct smc_link_group *lgr) +{ + if (lgr->smc_version < SMC_V2) + return; + + kfree(lgr->wr_rx_buf_v2); + lgr->wr_rx_buf_v2 = NULL; + kfree(lgr->wr_tx_buf_v2); + lgr->wr_tx_buf_v2 = NULL; } void smc_wr_free_link_mem(struct smc_link *lnk) { + kfree(lnk->wr_tx_v2_ib); + lnk->wr_tx_v2_ib = NULL; + kfree(lnk->wr_tx_v2_sge); + lnk->wr_tx_v2_sge = NULL; + kfree(lnk->wr_tx_v2_pend); + lnk->wr_tx_v2_pend = NULL; kfree(lnk->wr_tx_compl); lnk->wr_tx_compl = NULL; kfree(lnk->wr_tx_pends); @@ -619,8 +706,26 @@ void smc_wr_free_link_mem(struct smc_link *lnk) lnk->wr_rx_bufs = NULL; } +int smc_wr_alloc_lgr_mem(struct smc_link_group *lgr) +{ + if (lgr->smc_version < SMC_V2) + return 0; + + lgr->wr_rx_buf_v2 = kzalloc(SMC_WR_BUF_V2_SIZE, GFP_KERNEL); + if (!lgr->wr_rx_buf_v2) + return -ENOMEM; + lgr->wr_tx_buf_v2 = kzalloc(SMC_WR_BUF_V2_SIZE, GFP_KERNEL); + if (!lgr->wr_tx_buf_v2) { + kfree(lgr->wr_rx_buf_v2); + return -ENOMEM; + } + return 0; +} + int smc_wr_alloc_link_mem(struct smc_link *link) { + int sges_per_buf = link->lgr->smc_version == SMC_V2 ? 2 : 1; + /* allocate link related memory */ link->wr_tx_bufs = kcalloc(SMC_WR_BUF_CNT, SMC_WR_BUF_SIZE, GFP_KERNEL); if (!link->wr_tx_bufs) @@ -653,7 +758,7 @@ int smc_wr_alloc_link_mem(struct smc_link *link) if (!link->wr_tx_sges) goto no_mem_wr_tx_rdma_sges; link->wr_rx_sges = kcalloc(SMC_WR_BUF_CNT * 3, - sizeof(link->wr_rx_sges[0]), + sizeof(link->wr_rx_sges[0]) * sges_per_buf, GFP_KERNEL); if (!link->wr_rx_sges) goto no_mem_wr_tx_sges; @@ -672,8 +777,29 @@ int smc_wr_alloc_link_mem(struct smc_link *link) GFP_KERNEL); if (!link->wr_tx_compl) goto no_mem_wr_tx_pends; + + if (link->lgr->smc_version == SMC_V2) { + link->wr_tx_v2_ib = kzalloc(sizeof(*link->wr_tx_v2_ib), + GFP_KERNEL); + if (!link->wr_tx_v2_ib) + goto no_mem_tx_compl; + link->wr_tx_v2_sge = kzalloc(sizeof(*link->wr_tx_v2_sge), + GFP_KERNEL); + if (!link->wr_tx_v2_sge) + goto no_mem_v2_ib; + link->wr_tx_v2_pend = kzalloc(sizeof(*link->wr_tx_v2_pend), + GFP_KERNEL); + if (!link->wr_tx_v2_pend) + goto no_mem_v2_sge; + } return 0; +no_mem_v2_sge: + kfree(link->wr_tx_v2_sge); +no_mem_v2_ib: + kfree(link->wr_tx_v2_ib); +no_mem_tx_compl: + kfree(link->wr_tx_compl); no_mem_wr_tx_pends: kfree(link->wr_tx_pends); no_mem_wr_tx_mask: @@ -725,6 +851,24 @@ int smc_wr_create_link(struct smc_link *lnk) rc = -EIO; goto out; } + if (lnk->lgr->smc_version == SMC_V2) { + lnk->wr_rx_v2_dma_addr = ib_dma_map_single(ibdev, + lnk->lgr->wr_rx_buf_v2, SMC_WR_BUF_V2_SIZE, + DMA_FROM_DEVICE); + if (ib_dma_mapping_error(ibdev, lnk->wr_rx_v2_dma_addr)) { + lnk->wr_rx_v2_dma_addr = 0; + rc = -EIO; + goto dma_unmap; + } + lnk->wr_tx_v2_dma_addr = ib_dma_map_single(ibdev, + lnk->lgr->wr_tx_buf_v2, SMC_WR_BUF_V2_SIZE, + DMA_TO_DEVICE); + if (ib_dma_mapping_error(ibdev, lnk->wr_tx_v2_dma_addr)) { + lnk->wr_tx_v2_dma_addr = 0; + rc = -EIO; + goto dma_unmap; + } + } lnk->wr_tx_dma_addr = ib_dma_map_single( ibdev, lnk->wr_tx_bufs, SMC_WR_BUF_SIZE * lnk->wr_tx_cnt, DMA_TO_DEVICE); @@ -742,6 +886,18 @@ int smc_wr_create_link(struct smc_link *lnk) return rc; dma_unmap: + if (lnk->wr_rx_v2_dma_addr) { + ib_dma_unmap_single(ibdev, lnk->wr_rx_v2_dma_addr, + SMC_WR_BUF_V2_SIZE, + DMA_FROM_DEVICE); + lnk->wr_rx_v2_dma_addr = 0; + } + if (lnk->wr_tx_v2_dma_addr) { + ib_dma_unmap_single(ibdev, lnk->wr_tx_v2_dma_addr, + SMC_WR_BUF_V2_SIZE, + DMA_TO_DEVICE); + lnk->wr_tx_v2_dma_addr = 0; + } ib_dma_unmap_single(ibdev, lnk->wr_rx_dma_addr, SMC_WR_BUF_SIZE * lnk->wr_rx_cnt, DMA_FROM_DEVICE); diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h index 2bc626f230a5..6d2cdb231859 100644 --- a/net/smc/smc_wr.h +++ b/net/smc/smc_wr.h @@ -101,8 +101,10 @@ static inline int smc_wr_rx_post(struct smc_link *link) int smc_wr_create_link(struct smc_link *lnk); int smc_wr_alloc_link_mem(struct smc_link *lnk); +int smc_wr_alloc_lgr_mem(struct smc_link_group *lgr); void smc_wr_free_link(struct smc_link *lnk); void smc_wr_free_link_mem(struct smc_link *lnk); +void smc_wr_free_lgr_mem(struct smc_link_group *lgr); void smc_wr_remember_qp_attr(struct smc_link *lnk); void smc_wr_remove_dev(struct smc_ib_device *smcibdev); void smc_wr_add_dev(struct smc_ib_device *smcibdev); From b4ba4652b3f8b7c9bbb5786f8acf4724bdab2196 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Sat, 16 Oct 2021 11:37:50 +0200 Subject: [PATCH 274/440] net/smc: extend LLC layer for SMC-Rv2 Add support for large v2 LLC control messages in smc_llc.c. The new large work request buffer allows to combine control messages into one packet that had to be spread over several packets before. Add handling of the new v2 LLC messages. Signed-off-by: Karsten Graul Signed-off-by: David S. Miller --- net/smc/af_smc.c | 8 +- net/smc/smc.h | 20 +- net/smc/smc_core.h | 1 + net/smc/smc_llc.c | 556 ++++++++++++++++++++++++++++++++++++--------- net/smc/smc_llc.h | 12 +- net/smc/smc_wr.c | 43 ++++ net/smc/smc_wr.h | 6 + 7 files changed, 531 insertions(+), 115 deletions(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index ce5feb68382f..5e50e007a7da 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -786,9 +786,9 @@ static int smc_connect_clc(struct smc_sock *smc, SMC_CLC_ACCEPT, CLC_WAIT_TIME); } -static void smc_fill_gid_list(struct smc_link_group *lgr, - struct smc_gidlist *gidlist, - struct smc_ib_device *known_dev, u8 *known_gid) +void smc_fill_gid_list(struct smc_link_group *lgr, + struct smc_gidlist *gidlist, + struct smc_ib_device *known_dev, u8 *known_gid) { struct smc_init_info *alt_ini = NULL; @@ -1435,7 +1435,7 @@ static int smcr_serv_conf_first_link(struct smc_sock *smc) smcr_lgr_set_type(link->lgr, SMC_LGR_SINGLE); /* initial contact - try to establish second link */ - smc_llc_srv_add_link(link); + smc_llc_srv_add_link(link, NULL); return 0; } diff --git a/net/smc/smc.h b/net/smc/smc.h index 5e7def3ab730..f4286ca1f228 100644 --- a/net/smc/smc.h +++ b/net/smc/smc.h @@ -56,7 +56,20 @@ enum smc_state { /* possible states of an SMC socket */ struct smc_link_group; struct smc_wr_rx_hdr { /* common prefix part of LLC and CDC to demultiplex */ - u8 type; + union { + u8 type; +#if defined(__BIG_ENDIAN_BITFIELD) + struct { + u8 llc_version:4, + llc_type:4; + }; +#elif defined(__LITTLE_ENDIAN_BITFIELD) + struct { + u8 llc_type:4, + llc_version:4; + }; +#endif + }; } __aligned(1); struct smc_cdc_conn_state_flags { @@ -286,7 +299,12 @@ static inline bool using_ipsec(struct smc_sock *smc) } #endif +struct smc_gidlist; + struct sock *smc_accept_dequeue(struct sock *parent, struct socket *new_sock); void smc_close_non_accepted(struct sock *sk); +void smc_fill_gid_list(struct smc_link_group *lgr, + struct smc_gidlist *gidlist, + struct smc_ib_device *known_dev, u8 *known_gid); #endif /* __SMC_H */ diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index 5997657ee9cc..59cef3b830d8 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -218,6 +218,7 @@ enum smc_llc_flowtype { SMC_LLC_FLOW_NONE = 0, SMC_LLC_FLOW_ADD_LINK = 2, SMC_LLC_FLOW_DEL_LINK = 4, + SMC_LLC_FLOW_REQ_ADD_LINK = 5, SMC_LLC_FLOW_RKEY = 6, }; diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index 760f5ed7c8f0..a9623c952007 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -23,16 +23,24 @@ struct smc_llc_hdr { struct smc_wr_rx_hdr common; - u8 length; /* 44 */ -#if defined(__BIG_ENDIAN_BITFIELD) - u8 reserved:4, - add_link_rej_rsn:4; + union { + struct { + u8 length; /* 44 */ + #if defined(__BIG_ENDIAN_BITFIELD) + u8 reserved:4, + add_link_rej_rsn:4; #elif defined(__LITTLE_ENDIAN_BITFIELD) - u8 add_link_rej_rsn:4, - reserved:4; + u8 add_link_rej_rsn:4, + reserved:4; #endif + }; + u16 length_v2; /* 44 - 8192*/ + }; u8 flags; -}; +} __packed; /* format defined in + * IBM Shared Memory Communications Version 2 + * (https://www.ibm.com/support/pages/node/6326337) + */ #define SMC_LLC_FLAG_NO_RMBE_EYEC 0x03 @@ -76,6 +84,32 @@ struct smc_llc_msg_add_link_cont_rt { __be64 rmb_vaddr_new; }; +struct smc_llc_msg_add_link_v2_ext { +#if defined(__BIG_ENDIAN_BITFIELD) + u8 v2_direct : 1, + reserved : 7; +#elif defined(__LITTLE_ENDIAN_BITFIELD) + u8 reserved : 7, + v2_direct : 1; +#endif + u8 reserved2; + u8 client_target_gid[SMC_GID_SIZE]; + u8 reserved3[8]; + u16 num_rkeys; + struct smc_llc_msg_add_link_cont_rt rt[]; +} __packed; /* format defined in + * IBM Shared Memory Communications Version 2 + * (https://www.ibm.com/support/pages/node/6326337) + */ + +struct smc_llc_msg_req_add_link_v2 { + struct smc_llc_hdr hd; + u8 reserved[20]; + u8 gid_cnt; + u8 reserved2[3]; + u8 gid[][SMC_GID_SIZE]; +}; + #define SMC_LLC_RKEYS_PER_CONT_MSG 2 struct smc_llc_msg_add_link_cont { /* type 0x03 */ @@ -114,7 +148,8 @@ struct smc_rmb_rtoken { __be64 rmb_vaddr; } __packed; /* format defined in RFC7609 */ -#define SMC_LLC_RKEYS_PER_MSG 3 +#define SMC_LLC_RKEYS_PER_MSG 3 +#define SMC_LLC_RKEYS_PER_MSG_V2 255 struct smc_llc_msg_confirm_rkey { /* type 0x06 */ struct smc_llc_hdr hd; @@ -135,9 +170,18 @@ struct smc_llc_msg_delete_rkey { /* type 0x09 */ u8 reserved2[4]; }; +struct smc_llc_msg_delete_rkey_v2 { /* type 0x29 */ + struct smc_llc_hdr hd; + u8 num_rkeys; + u8 num_inval_rkeys; + u8 reserved[2]; + __be32 rkey[]; +}; + union smc_llc_msg { struct smc_llc_msg_confirm_link confirm_link; struct smc_llc_msg_add_link add_link; + struct smc_llc_msg_req_add_link_v2 req_add_link; struct smc_llc_msg_add_link_cont add_link_cont; struct smc_llc_msg_del_link delete_link; @@ -189,7 +233,7 @@ static inline void smc_llc_flow_qentry_set(struct smc_llc_flow *flow, static void smc_llc_flow_parallel(struct smc_link_group *lgr, u8 flow_type, struct smc_llc_qentry *qentry) { - u8 msg_type = qentry->msg.raw.hdr.common.type; + u8 msg_type = qentry->msg.raw.hdr.common.llc_type; if ((msg_type == SMC_LLC_ADD_LINK || msg_type == SMC_LLC_DELETE_LINK) && flow_type != msg_type && !lgr->delayed_event) { @@ -219,7 +263,7 @@ static bool smc_llc_flow_start(struct smc_llc_flow *flow, spin_unlock_bh(&lgr->llc_flow_lock); return false; } - switch (qentry->msg.raw.hdr.common.type) { + switch (qentry->msg.raw.hdr.common.llc_type) { case SMC_LLC_ADD_LINK: flow->type = SMC_LLC_FLOW_ADD_LINK; break; @@ -306,7 +350,7 @@ struct smc_llc_qentry *smc_llc_wait(struct smc_link_group *lgr, smc_llc_flow_qentry_del(flow); goto out; } - rcv_msg = flow->qentry->msg.raw.hdr.common.type; + rcv_msg = flow->qentry->msg.raw.hdr.common.llc_type; if (exp_msg && rcv_msg != exp_msg) { if (exp_msg == SMC_LLC_ADD_LINK && rcv_msg == SMC_LLC_DELETE_LINK) { @@ -374,6 +418,30 @@ static int smc_llc_add_pending_send(struct smc_link *link, return 0; } +static int smc_llc_add_pending_send_v2(struct smc_link *link, + struct smc_wr_v2_buf **wr_buf, + struct smc_wr_tx_pend_priv **pend) +{ + int rc; + + rc = smc_wr_tx_get_v2_slot(link, smc_llc_tx_handler, wr_buf, pend); + if (rc < 0) + return rc; + return 0; +} + +static void smc_llc_init_msg_hdr(struct smc_llc_hdr *hdr, + struct smc_link_group *lgr, size_t len) +{ + if (lgr->smc_version == SMC_V2) { + hdr->common.llc_version = SMC_V2; + hdr->length_v2 = len; + } else { + hdr->common.llc_version = 0; + hdr->length = len; + } +} + /* high-level API to send LLC confirm link */ int smc_llc_send_confirm_link(struct smc_link *link, enum smc_llc_reqresp reqresp) @@ -390,8 +458,8 @@ int smc_llc_send_confirm_link(struct smc_link *link, goto put_out; confllc = (struct smc_llc_msg_confirm_link *)wr_buf; memset(confllc, 0, sizeof(*confllc)); - confllc->hd.common.type = SMC_LLC_CONFIRM_LINK; - confllc->hd.length = sizeof(struct smc_llc_msg_confirm_link); + confllc->hd.common.llc_type = SMC_LLC_CONFIRM_LINK; + smc_llc_init_msg_hdr(&confllc->hd, link->lgr, sizeof(*confllc)); confllc->hd.flags |= SMC_LLC_FLAG_NO_RMBE_EYEC; if (reqresp == SMC_LLC_RESP) confllc->hd.flags |= SMC_LLC_FLAG_RESP; @@ -426,8 +494,8 @@ static int smc_llc_send_confirm_rkey(struct smc_link *send_link, goto put_out; rkeyllc = (struct smc_llc_msg_confirm_rkey *)wr_buf; memset(rkeyllc, 0, sizeof(*rkeyllc)); - rkeyllc->hd.common.type = SMC_LLC_CONFIRM_RKEY; - rkeyllc->hd.length = sizeof(struct smc_llc_msg_confirm_rkey); + rkeyllc->hd.common.llc_type = SMC_LLC_CONFIRM_RKEY; + smc_llc_init_msg_hdr(&rkeyllc->hd, send_link->lgr, sizeof(*rkeyllc)); rtok_ix = 1; for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { @@ -471,8 +539,8 @@ static int smc_llc_send_delete_rkey(struct smc_link *link, goto put_out; rkeyllc = (struct smc_llc_msg_delete_rkey *)wr_buf; memset(rkeyllc, 0, sizeof(*rkeyllc)); - rkeyllc->hd.common.type = SMC_LLC_DELETE_RKEY; - rkeyllc->hd.length = sizeof(struct smc_llc_msg_delete_rkey); + rkeyllc->hd.common.llc_type = SMC_LLC_DELETE_RKEY; + smc_llc_init_msg_hdr(&rkeyllc->hd, link->lgr, sizeof(*rkeyllc)); rkeyllc->num_rkeys = 1; rkeyllc->rkey[0] = htonl(rmb_desc->mr_rx[link->link_idx]->rkey); /* send llc message */ @@ -482,26 +550,116 @@ put_out: return rc; } +/* return first buffer from any of the next buf lists */ +static struct smc_buf_desc *_smc_llc_get_next_rmb(struct smc_link_group *lgr, + int *buf_lst) +{ + struct smc_buf_desc *buf_pos; + + while (*buf_lst < SMC_RMBE_SIZES) { + buf_pos = list_first_entry_or_null(&lgr->rmbs[*buf_lst], + struct smc_buf_desc, list); + if (buf_pos) + return buf_pos; + (*buf_lst)++; + } + return NULL; +} + +/* return next rmb from buffer lists */ +static struct smc_buf_desc *smc_llc_get_next_rmb(struct smc_link_group *lgr, + int *buf_lst, + struct smc_buf_desc *buf_pos) +{ + struct smc_buf_desc *buf_next; + + if (!buf_pos || list_is_last(&buf_pos->list, &lgr->rmbs[*buf_lst])) { + (*buf_lst)++; + return _smc_llc_get_next_rmb(lgr, buf_lst); + } + buf_next = list_next_entry(buf_pos, list); + return buf_next; +} + +static struct smc_buf_desc *smc_llc_get_first_rmb(struct smc_link_group *lgr, + int *buf_lst) +{ + *buf_lst = 0; + return smc_llc_get_next_rmb(lgr, buf_lst, NULL); +} + +static int smc_llc_fill_ext_v2(struct smc_llc_msg_add_link_v2_ext *ext, + struct smc_link *link, struct smc_link *link_new) +{ + struct smc_link_group *lgr = link->lgr; + struct smc_buf_desc *buf_pos; + int prim_lnk_idx, lnk_idx, i; + struct smc_buf_desc *rmb; + int len = sizeof(*ext); + int buf_lst; + + ext->v2_direct = !lgr->uses_gateway; + memcpy(ext->client_target_gid, link_new->gid, SMC_GID_SIZE); + + prim_lnk_idx = link->link_idx; + lnk_idx = link_new->link_idx; + mutex_lock(&lgr->rmbs_lock); + ext->num_rkeys = lgr->conns_num; + if (!ext->num_rkeys) + goto out; + buf_pos = smc_llc_get_first_rmb(lgr, &buf_lst); + for (i = 0; i < ext->num_rkeys; i++) { + if (!buf_pos) + break; + rmb = buf_pos; + ext->rt[i].rmb_key = htonl(rmb->mr_rx[prim_lnk_idx]->rkey); + ext->rt[i].rmb_key_new = htonl(rmb->mr_rx[lnk_idx]->rkey); + ext->rt[i].rmb_vaddr_new = + cpu_to_be64((u64)sg_dma_address(rmb->sgt[lnk_idx].sgl)); + buf_pos = smc_llc_get_next_rmb(lgr, &buf_lst, buf_pos); + while (buf_pos && !(buf_pos)->used) + buf_pos = smc_llc_get_next_rmb(lgr, &buf_lst, buf_pos); + } + len += i * sizeof(ext->rt[0]); +out: + mutex_unlock(&lgr->rmbs_lock); + return len; +} + /* send ADD LINK request or response */ int smc_llc_send_add_link(struct smc_link *link, u8 mac[], u8 gid[], struct smc_link *link_new, enum smc_llc_reqresp reqresp) { + struct smc_llc_msg_add_link_v2_ext *ext = NULL; struct smc_llc_msg_add_link *addllc; struct smc_wr_tx_pend_priv *pend; - struct smc_wr_buf *wr_buf; + int len = sizeof(*addllc); int rc; if (!smc_wr_tx_link_hold(link)) return -ENOLINK; - rc = smc_llc_add_pending_send(link, &wr_buf, &pend); - if (rc) - goto put_out; - addllc = (struct smc_llc_msg_add_link *)wr_buf; + if (link->lgr->smc_version == SMC_V2) { + struct smc_wr_v2_buf *wr_buf; + + rc = smc_llc_add_pending_send_v2(link, &wr_buf, &pend); + if (rc) + goto put_out; + addllc = (struct smc_llc_msg_add_link *)wr_buf; + ext = (struct smc_llc_msg_add_link_v2_ext *) + &wr_buf->raw[sizeof(*addllc)]; + memset(ext, 0, SMC_WR_TX_SIZE); + } else { + struct smc_wr_buf *wr_buf; + + rc = smc_llc_add_pending_send(link, &wr_buf, &pend); + if (rc) + goto put_out; + addllc = (struct smc_llc_msg_add_link *)wr_buf; + } memset(addllc, 0, sizeof(*addllc)); - addllc->hd.common.type = SMC_LLC_ADD_LINK; - addllc->hd.length = sizeof(struct smc_llc_msg_add_link); + addllc->hd.common.llc_type = SMC_LLC_ADD_LINK; if (reqresp == SMC_LLC_RESP) addllc->hd.flags |= SMC_LLC_FLAG_RESP; memcpy(addllc->sender_mac, mac, ETH_ALEN); @@ -516,8 +674,14 @@ int smc_llc_send_add_link(struct smc_link *link, u8 mac[], u8 gid[], addllc->qp_mtu = min(link_new->path_mtu, link_new->peer_mtu); } + if (ext && link_new) + len += smc_llc_fill_ext_v2(ext, link, link_new); + smc_llc_init_msg_hdr(&addllc->hd, link->lgr, len); /* send llc message */ - rc = smc_wr_tx_send(link, pend); + if (link->lgr->smc_version == SMC_V2) + rc = smc_wr_tx_v2_send(link, pend, len); + else + rc = smc_wr_tx_send(link, pend); put_out: smc_wr_tx_link_put(link); return rc; @@ -541,8 +705,8 @@ int smc_llc_send_delete_link(struct smc_link *link, u8 link_del_id, delllc = (struct smc_llc_msg_del_link *)wr_buf; memset(delllc, 0, sizeof(*delllc)); - delllc->hd.common.type = SMC_LLC_DELETE_LINK; - delllc->hd.length = sizeof(struct smc_llc_msg_del_link); + delllc->hd.common.llc_type = SMC_LLC_DELETE_LINK; + smc_llc_init_msg_hdr(&delllc->hd, link->lgr, sizeof(*delllc)); if (reqresp == SMC_LLC_RESP) delllc->hd.flags |= SMC_LLC_FLAG_RESP; if (orderly) @@ -574,8 +738,8 @@ static int smc_llc_send_test_link(struct smc_link *link, u8 user_data[16]) goto put_out; testllc = (struct smc_llc_msg_test_link *)wr_buf; memset(testllc, 0, sizeof(*testllc)); - testllc->hd.common.type = SMC_LLC_TEST_LINK; - testllc->hd.length = sizeof(struct smc_llc_msg_test_link); + testllc->hd.common.llc_type = SMC_LLC_TEST_LINK; + smc_llc_init_msg_hdr(&testllc->hd, link->lgr, sizeof(*testllc)); memcpy(testllc->user_data, user_data, sizeof(testllc->user_data)); /* send llc message */ rc = smc_wr_tx_send(link, pend); @@ -651,44 +815,6 @@ static int smc_llc_alloc_alt_link(struct smc_link_group *lgr, return -EMLINK; } -/* return first buffer from any of the next buf lists */ -static struct smc_buf_desc *_smc_llc_get_next_rmb(struct smc_link_group *lgr, - int *buf_lst) -{ - struct smc_buf_desc *buf_pos; - - while (*buf_lst < SMC_RMBE_SIZES) { - buf_pos = list_first_entry_or_null(&lgr->rmbs[*buf_lst], - struct smc_buf_desc, list); - if (buf_pos) - return buf_pos; - (*buf_lst)++; - } - return NULL; -} - -/* return next rmb from buffer lists */ -static struct smc_buf_desc *smc_llc_get_next_rmb(struct smc_link_group *lgr, - int *buf_lst, - struct smc_buf_desc *buf_pos) -{ - struct smc_buf_desc *buf_next; - - if (!buf_pos || list_is_last(&buf_pos->list, &lgr->rmbs[*buf_lst])) { - (*buf_lst)++; - return _smc_llc_get_next_rmb(lgr, buf_lst); - } - buf_next = list_next_entry(buf_pos, list); - return buf_next; -} - -static struct smc_buf_desc *smc_llc_get_first_rmb(struct smc_link_group *lgr, - int *buf_lst) -{ - *buf_lst = 0; - return smc_llc_get_next_rmb(lgr, buf_lst, NULL); -} - /* send one add_link_continue msg */ static int smc_llc_add_link_cont(struct smc_link *link, struct smc_link *link_new, u8 *num_rkeys_todo, @@ -734,7 +860,7 @@ static int smc_llc_add_link_cont(struct smc_link *link, while (*buf_pos && !(*buf_pos)->used) *buf_pos = smc_llc_get_next_rmb(lgr, buf_lst, *buf_pos); } - addc_llc->hd.common.type = SMC_LLC_ADD_LINK_CONT; + addc_llc->hd.common.llc_type = SMC_LLC_ADD_LINK_CONT; addc_llc->hd.length = sizeof(struct smc_llc_msg_add_link_cont); if (lgr->role == SMC_CLNT) addc_llc->hd.flags |= SMC_LLC_FLAG_RESP; @@ -793,6 +919,8 @@ static int smc_llc_cli_add_link_reject(struct smc_llc_qentry *qentry) qentry->msg.raw.hdr.flags |= SMC_LLC_FLAG_RESP; qentry->msg.raw.hdr.flags |= SMC_LLC_FLAG_ADD_LNK_REJ; qentry->msg.raw.hdr.add_link_rej_rsn = SMC_LLC_REJ_RSN_NO_ALT_PATH; + smc_llc_init_msg_hdr(&qentry->msg.raw.hdr, qentry->link->lgr, + sizeof(qentry->msg)); return smc_llc_send_message(qentry->link, &qentry->msg); } @@ -813,7 +941,7 @@ static int smc_llc_cli_conf_link(struct smc_link *link, SMC_LLC_DEL_LOST_PATH); return -ENOLINK; } - if (qentry->msg.raw.hdr.common.type != SMC_LLC_CONFIRM_LINK) { + if (qentry->msg.raw.hdr.common.llc_type != SMC_LLC_CONFIRM_LINK) { /* received DELETE_LINK instead */ qentry->msg.raw.hdr.flags |= SMC_LLC_FLAG_RESP; smc_llc_send_message(link, &qentry->msg); @@ -854,6 +982,26 @@ static int smc_llc_cli_conf_link(struct smc_link *link, return 0; } +static void smc_llc_save_add_link_rkeys(struct smc_link *link, + struct smc_link *link_new) +{ + struct smc_llc_msg_add_link_v2_ext *ext; + struct smc_link_group *lgr = link->lgr; + int max, i; + + ext = (struct smc_llc_msg_add_link_v2_ext *)((u8 *)lgr->wr_rx_buf_v2 + + SMC_WR_TX_SIZE); + max = min_t(u8, ext->num_rkeys, SMC_LLC_RKEYS_PER_MSG_V2); + mutex_lock(&lgr->rmbs_lock); + for (i = 0; i < max; i++) { + smc_rtoken_set(lgr, link->link_idx, link_new->link_idx, + ext->rt[i].rmb_key, + ext->rt[i].rmb_vaddr_new, + ext->rt[i].rmb_key_new); + } + mutex_unlock(&lgr->rmbs_lock); +} + static void smc_llc_save_add_link_info(struct smc_link *link, struct smc_llc_msg_add_link *add_llc) { @@ -884,14 +1032,24 @@ int smc_llc_cli_add_link(struct smc_link *link, struct smc_llc_qentry *qentry) } ini->vlan_id = lgr->vlan_id; + if (lgr->smc_version == SMC_V2) { + ini->check_smcrv2 = true; + ini->smcrv2.saddr = lgr->saddr; + ini->smcrv2.daddr = smc_ib_gid_to_ipv4(llc->sender_gid); + } smc_pnet_find_alt_roce(lgr, ini, link->smcibdev); if (!memcmp(llc->sender_gid, link->peer_gid, SMC_GID_SIZE) && - !memcmp(llc->sender_mac, link->peer_mac, ETH_ALEN)) { - if (!ini->ib_dev) + (lgr->smc_version == SMC_V2 || + !memcmp(llc->sender_mac, link->peer_mac, ETH_ALEN))) { + if (!ini->ib_dev && !ini->smcrv2.ib_dev_v2) goto out_reject; lgr_new_t = SMC_LGR_ASYMMETRIC_PEER; } - if (!ini->ib_dev) { + if (lgr->smc_version == SMC_V2 && !ini->smcrv2.ib_dev_v2) { + lgr_new_t = SMC_LGR_ASYMMETRIC_LOCAL; + ini->smcrv2.ib_dev_v2 = link->smcibdev; + ini->smcrv2.ib_port_v2 = link->ibport; + } else if (lgr->smc_version < SMC_V2 && !ini->ib_dev) { lgr_new_t = SMC_LGR_ASYMMETRIC_LOCAL; ini->ib_dev = link->smcibdev; ini->ib_port = link->ibport; @@ -916,14 +1074,18 @@ int smc_llc_cli_add_link(struct smc_link *link, struct smc_llc_qentry *qentry) goto out_clear_lnk; rc = smc_llc_send_add_link(link, - lnk_new->smcibdev->mac[ini->ib_port - 1], + lnk_new->smcibdev->mac[lnk_new->ibport - 1], lnk_new->gid, lnk_new, SMC_LLC_RESP); if (rc) goto out_clear_lnk; - rc = smc_llc_cli_rkey_exchange(link, lnk_new); - if (rc) { - rc = 0; - goto out_clear_lnk; + if (lgr->smc_version == SMC_V2) { + smc_llc_save_add_link_rkeys(link, lnk_new); + } else { + rc = smc_llc_cli_rkey_exchange(link, lnk_new); + if (rc) { + rc = 0; + goto out_clear_lnk; + } } rc = smc_llc_cli_conf_link(link, ini, lnk_new, lgr_new_t); if (!rc) @@ -939,6 +1101,44 @@ out: return rc; } +static void smc_llc_send_request_add_link(struct smc_link *link) +{ + struct smc_llc_msg_req_add_link_v2 *llc; + struct smc_wr_tx_pend_priv *pend; + struct smc_wr_v2_buf *wr_buf; + struct smc_gidlist gidlist; + int rc, len, i; + + if (!smc_wr_tx_link_hold(link)) + return; + if (link->lgr->type == SMC_LGR_SYMMETRIC || + link->lgr->type == SMC_LGR_ASYMMETRIC_PEER) + goto put_out; + + smc_fill_gid_list(link->lgr, &gidlist, link->smcibdev, link->gid); + if (gidlist.len <= 1) + goto put_out; + + rc = smc_llc_add_pending_send_v2(link, &wr_buf, &pend); + if (rc) + goto put_out; + llc = (struct smc_llc_msg_req_add_link_v2 *)wr_buf; + memset(llc, 0, SMC_WR_TX_SIZE); + + llc->hd.common.llc_type = SMC_LLC_REQ_ADD_LINK; + for (i = 0; i < gidlist.len; i++) + memcpy(llc->gid[i], gidlist.list[i], sizeof(gidlist.list[0])); + llc->gid_cnt = gidlist.len; + len = sizeof(*llc) + (gidlist.len * sizeof(gidlist.list[0])); + smc_llc_init_msg_hdr(&llc->hd, link->lgr, len); + rc = smc_wr_tx_v2_send(link, pend, len); + if (!rc) + /* set REQ_ADD_LINK flow and wait for response from peer */ + link->lgr->llc_flow_lcl.type = SMC_LLC_FLOW_REQ_ADD_LINK; +put_out: + smc_wr_tx_link_put(link); +} + /* as an SMC client, invite server to start the add_link processing */ static void smc_llc_cli_add_link_invite(struct smc_link *link, struct smc_llc_qentry *qentry) @@ -946,6 +1146,11 @@ static void smc_llc_cli_add_link_invite(struct smc_link *link, struct smc_link_group *lgr = smc_get_lgr(link); struct smc_init_info *ini = NULL; + if (lgr->smc_version == SMC_V2) { + smc_llc_send_request_add_link(link); + goto out; + } + if (lgr->type == SMC_LGR_SYMMETRIC || lgr->type == SMC_LGR_ASYMMETRIC_PEER) goto out; @@ -978,7 +1183,7 @@ static bool smc_llc_is_empty_llc_message(union smc_llc_msg *llc) static bool smc_llc_is_local_add_link(union smc_llc_msg *llc) { - if (llc->raw.hdr.common.type == SMC_LLC_ADD_LINK && + if (llc->raw.hdr.common.llc_type == SMC_LLC_ADD_LINK && smc_llc_is_empty_llc_message(llc)) return true; return false; @@ -1145,7 +1350,7 @@ static int smc_llc_srv_conf_link(struct smc_link *link, /* receive CONFIRM LINK response over the RoCE fabric */ qentry = smc_llc_wait(lgr, link, SMC_LLC_WAIT_FIRST_TIME, 0); if (!qentry || - qentry->msg.raw.hdr.common.type != SMC_LLC_CONFIRM_LINK) { + qentry->msg.raw.hdr.common.llc_type != SMC_LLC_CONFIRM_LINK) { /* send DELETE LINK */ smc_llc_send_delete_link(link, link_new->link_id, SMC_LLC_REQ, false, SMC_LLC_DEL_LOST_PATH); @@ -1164,24 +1369,55 @@ static int smc_llc_srv_conf_link(struct smc_link *link, return 0; } -int smc_llc_srv_add_link(struct smc_link *link) +static void smc_llc_send_req_add_link_response(struct smc_llc_qentry *qentry) +{ + qentry->msg.raw.hdr.flags |= SMC_LLC_FLAG_RESP; + smc_llc_init_msg_hdr(&qentry->msg.raw.hdr, qentry->link->lgr, + sizeof(qentry->msg)); + memset(&qentry->msg.raw.data, 0, sizeof(qentry->msg.raw.data)); + smc_llc_send_message(qentry->link, &qentry->msg); +} + +int smc_llc_srv_add_link(struct smc_link *link, + struct smc_llc_qentry *req_qentry) { enum smc_lgr_type lgr_new_t = SMC_LGR_SYMMETRIC; struct smc_link_group *lgr = link->lgr; struct smc_llc_msg_add_link *add_llc; struct smc_llc_qentry *qentry = NULL; + bool send_req_add_link_resp = false; struct smc_link *link_new = NULL; - struct smc_init_info *ini; + struct smc_init_info *ini = NULL; int lnk_idx, rc = 0; + if (req_qentry && + req_qentry->msg.raw.hdr.common.llc_type == SMC_LLC_REQ_ADD_LINK) + send_req_add_link_resp = true; + ini = kzalloc(sizeof(*ini), GFP_KERNEL); - if (!ini) - return -ENOMEM; + if (!ini) { + rc = -ENOMEM; + goto out; + } /* ignore client add link recommendation, start new flow */ ini->vlan_id = lgr->vlan_id; + if (lgr->smc_version == SMC_V2) { + ini->check_smcrv2 = true; + ini->smcrv2.saddr = lgr->saddr; + if (send_req_add_link_resp) { + struct smc_llc_msg_req_add_link_v2 *req_add = + &req_qentry->msg.req_add_link; + + ini->smcrv2.daddr = smc_ib_gid_to_ipv4(req_add->gid[0]); + } + } smc_pnet_find_alt_roce(lgr, ini, link->smcibdev); - if (!ini->ib_dev) { + if (lgr->smc_version == SMC_V2 && !ini->smcrv2.ib_dev_v2) { + lgr_new_t = SMC_LGR_ASYMMETRIC_LOCAL; + ini->smcrv2.ib_dev_v2 = link->smcibdev; + ini->smcrv2.ib_port_v2 = link->ibport; + } else if (lgr->smc_version < SMC_V2 && !ini->ib_dev) { lgr_new_t = SMC_LGR_ASYMMETRIC_LOCAL; ini->ib_dev = link->smcibdev; ini->ib_port = link->ibport; @@ -1196,11 +1432,17 @@ int smc_llc_srv_add_link(struct smc_link *link) if (rc) goto out; link_new = &lgr->lnk[lnk_idx]; + + rc = smcr_buf_map_lgr(link_new); + if (rc) + goto out_err; + rc = smc_llc_send_add_link(link, - link_new->smcibdev->mac[ini->ib_port - 1], + link_new->smcibdev->mac[link_new->ibport-1], link_new->gid, link_new, SMC_LLC_REQ); if (rc) goto out_err; + send_req_add_link_resp = false; /* receive ADD LINK response over the RoCE fabric */ qentry = smc_llc_wait(lgr, link, SMC_LLC_WAIT_TIME, SMC_LLC_ADD_LINK); if (!qentry) { @@ -1215,24 +1457,26 @@ int smc_llc_srv_add_link(struct smc_link *link) } if (lgr->type == SMC_LGR_SINGLE && (!memcmp(add_llc->sender_gid, link->peer_gid, SMC_GID_SIZE) && - !memcmp(add_llc->sender_mac, link->peer_mac, ETH_ALEN))) { + (lgr->smc_version == SMC_V2 || + !memcmp(add_llc->sender_mac, link->peer_mac, ETH_ALEN)))) { lgr_new_t = SMC_LGR_ASYMMETRIC_PEER; } smc_llc_save_add_link_info(link_new, add_llc); smc_llc_flow_qentry_del(&lgr->llc_flow_lcl); rc = smc_ib_ready_link(link_new); - if (rc) - goto out_err; - rc = smcr_buf_map_lgr(link_new); if (rc) goto out_err; rc = smcr_buf_reg_lgr(link_new); if (rc) goto out_err; - rc = smc_llc_srv_rkey_exchange(link, link_new); - if (rc) - goto out_err; + if (lgr->smc_version == SMC_V2) { + smc_llc_save_add_link_rkeys(link, link_new); + } else { + rc = smc_llc_srv_rkey_exchange(link, link_new); + if (rc) + goto out_err; + } rc = smc_llc_srv_conf_link(link, link_new, lgr_new_t); if (rc) goto out_err; @@ -1245,23 +1489,27 @@ out_err: } out: kfree(ini); + if (send_req_add_link_resp) + smc_llc_send_req_add_link_response(req_qentry); return rc; } static void smc_llc_process_srv_add_link(struct smc_link_group *lgr) { struct smc_link *link = lgr->llc_flow_lcl.qentry->link; + struct smc_llc_qentry *qentry; int rc; - smc_llc_flow_qentry_del(&lgr->llc_flow_lcl); + qentry = smc_llc_flow_qentry_clr(&lgr->llc_flow_lcl); mutex_lock(&lgr->llc_conf_mutex); - rc = smc_llc_srv_add_link(link); + rc = smc_llc_srv_add_link(link, qentry); if (!rc && lgr->type == SMC_LGR_SYMMETRIC) { /* delete any asymmetric link */ smc_llc_delete_asym_link(lgr); } mutex_unlock(&lgr->llc_conf_mutex); + kfree(qentry); } /* enqueue a local add_link req to trigger a new add_link flow */ @@ -1269,8 +1517,8 @@ void smc_llc_add_link_local(struct smc_link *link) { struct smc_llc_msg_add_link add_llc = {}; - add_llc.hd.length = sizeof(add_llc); - add_llc.hd.common.type = SMC_LLC_ADD_LINK; + add_llc.hd.common.llc_type = SMC_LLC_ADD_LINK; + smc_llc_init_msg_hdr(&add_llc.hd, link->lgr, sizeof(add_llc)); /* no dev and port needed */ smc_llc_enqueue(link, (union smc_llc_msg *)&add_llc); } @@ -1292,7 +1540,8 @@ static void smc_llc_add_link_work(struct work_struct *work) else smc_llc_process_srv_add_link(lgr); out: - smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl); + if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_REQ_ADD_LINK) + smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl); } /* enqueue a local del_link msg to trigger a new del_link flow, @@ -1302,8 +1551,8 @@ void smc_llc_srv_delete_link_local(struct smc_link *link, u8 del_link_id) { struct smc_llc_msg_del_link del_llc = {}; - del_llc.hd.length = sizeof(del_llc); - del_llc.hd.common.type = SMC_LLC_DELETE_LINK; + del_llc.hd.common.llc_type = SMC_LLC_DELETE_LINK; + smc_llc_init_msg_hdr(&del_llc.hd, link->lgr, sizeof(del_llc)); del_llc.link_num = del_link_id; del_llc.reason = htonl(SMC_LLC_DEL_LOST_PATH); del_llc.hd.flags |= SMC_LLC_FLAG_DEL_LINK_ORDERLY; @@ -1373,8 +1622,8 @@ void smc_llc_send_link_delete_all(struct smc_link_group *lgr, bool ord, u32 rsn) struct smc_llc_msg_del_link delllc = {}; int i; - delllc.hd.common.type = SMC_LLC_DELETE_LINK; - delllc.hd.length = sizeof(delllc); + delllc.hd.common.llc_type = SMC_LLC_DELETE_LINK; + smc_llc_init_msg_hdr(&delllc.hd, lgr, sizeof(delllc)); if (ord) delllc.hd.flags |= SMC_LLC_FLAG_DEL_LINK_ORDERLY; delllc.hd.flags |= SMC_LLC_FLAG_DEL_LINK_ALL; @@ -1490,6 +1739,8 @@ static void smc_llc_rmt_conf_rkey(struct smc_link_group *lgr) link = qentry->link; num_entries = llc->rtoken[0].num_rkeys; + if (num_entries > SMC_LLC_RKEYS_PER_MSG) + goto out_err; /* first rkey entry is for receiving link */ rk_idx = smc_rtoken_add(link, llc->rtoken[0].rmb_vaddr, @@ -1508,6 +1759,7 @@ out_err: llc->hd.flags |= SMC_LLC_FLAG_RKEY_RETRY; out: llc->hd.flags |= SMC_LLC_FLAG_RESP; + smc_llc_init_msg_hdr(&llc->hd, link->lgr, sizeof(*llc)); smc_llc_send_message(link, &qentry->msg); smc_llc_flow_qentry_del(&lgr->llc_flow_rmt); } @@ -1525,6 +1777,28 @@ static void smc_llc_rmt_delete_rkey(struct smc_link_group *lgr) llc = &qentry->msg.delete_rkey; link = qentry->link; + if (lgr->smc_version == SMC_V2) { + struct smc_llc_msg_delete_rkey_v2 *llcv2; + + memcpy(lgr->wr_rx_buf_v2, llc, sizeof(*llc)); + llcv2 = (struct smc_llc_msg_delete_rkey_v2 *)lgr->wr_rx_buf_v2; + llcv2->num_inval_rkeys = 0; + + max = min_t(u8, llcv2->num_rkeys, SMC_LLC_RKEYS_PER_MSG_V2); + for (i = 0; i < max; i++) { + if (smc_rtoken_delete(link, llcv2->rkey[i])) + llcv2->num_inval_rkeys++; + } + memset(&llc->rkey[0], 0, sizeof(llc->rkey)); + memset(&llc->reserved2, 0, sizeof(llc->reserved2)); + smc_llc_init_msg_hdr(&llc->hd, link->lgr, sizeof(*llc)); + if (llcv2->num_inval_rkeys) { + llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG; + llc->err_mask = llcv2->num_inval_rkeys; + } + goto finish; + } + max = min_t(u8, llc->num_rkeys, SMC_LLC_DEL_RKEY_MAX); for (i = 0; i < max; i++) { if (smc_rtoken_delete(link, llc->rkey[i])) @@ -1534,6 +1808,7 @@ static void smc_llc_rmt_delete_rkey(struct smc_link_group *lgr) llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG; llc->err_mask = err_mask; } +finish: llc->hd.flags |= SMC_LLC_FLAG_RESP; smc_llc_send_message(link, &qentry->msg); smc_llc_flow_qentry_del(&lgr->llc_flow_rmt); @@ -1569,7 +1844,7 @@ static void smc_llc_event_handler(struct smc_llc_qentry *qentry) if (!smc_link_usable(link)) goto out; - switch (llc->raw.hdr.common.type) { + switch (llc->raw.hdr.common.llc_type) { case SMC_LLC_TEST_LINK: llc->test_link.hd.flags |= SMC_LLC_FLAG_RESP; smc_llc_send_message(link, llc); @@ -1594,8 +1869,18 @@ static void smc_llc_event_handler(struct smc_llc_qentry *qentry) smc_llc_flow_qentry_set(&lgr->llc_flow_lcl, qentry); wake_up(&lgr->llc_msg_waiter); - } else if (smc_llc_flow_start(&lgr->llc_flow_lcl, - qentry)) { + return; + } + if (lgr->llc_flow_lcl.type == + SMC_LLC_FLOW_REQ_ADD_LINK) { + /* server started add_link processing */ + lgr->llc_flow_lcl.type = SMC_LLC_FLOW_ADD_LINK; + smc_llc_flow_qentry_set(&lgr->llc_flow_lcl, + qentry); + schedule_work(&lgr->llc_add_link_work); + return; + } + if (smc_llc_flow_start(&lgr->llc_flow_lcl, qentry)) { schedule_work(&lgr->llc_add_link_work); } } else if (smc_llc_flow_start(&lgr->llc_flow_lcl, qentry)) { @@ -1643,6 +1928,23 @@ static void smc_llc_event_handler(struct smc_llc_qentry *qentry) smc_llc_flow_stop(lgr, &lgr->llc_flow_rmt); } return; + case SMC_LLC_REQ_ADD_LINK: + /* handle response here, smc_llc_flow_stop() cannot be called + * in tasklet context + */ + if (lgr->role == SMC_CLNT && + lgr->llc_flow_lcl.type == SMC_LLC_FLOW_REQ_ADD_LINK && + (llc->raw.hdr.flags & SMC_LLC_FLAG_RESP)) { + smc_llc_flow_stop(link->lgr, &lgr->llc_flow_lcl); + } else if (lgr->role == SMC_SERV) { + if (smc_llc_flow_start(&lgr->llc_flow_lcl, qentry)) { + /* as smc server, handle client suggestion */ + lgr->llc_flow_lcl.type = SMC_LLC_FLOW_ADD_LINK; + schedule_work(&lgr->llc_add_link_work); + } + return; + } + break; default: smc_llc_protocol_violation(lgr, llc->raw.hdr.common.type); break; @@ -1686,7 +1988,7 @@ static void smc_llc_rx_response(struct smc_link *link, { enum smc_llc_flowtype flowtype = link->lgr->llc_flow_lcl.type; struct smc_llc_flow *flow = &link->lgr->llc_flow_lcl; - u8 llc_type = qentry->msg.raw.hdr.common.type; + u8 llc_type = qentry->msg.raw.hdr.common.llc_type; switch (llc_type) { case SMC_LLC_TEST_LINK: @@ -1712,7 +2014,8 @@ static void smc_llc_rx_response(struct smc_link *link, /* not used because max links is 3 */ break; default: - smc_llc_protocol_violation(link->lgr, llc_type); + smc_llc_protocol_violation(link->lgr, + qentry->msg.raw.hdr.common.type); break; } kfree(qentry); @@ -1737,7 +2040,8 @@ static void smc_llc_enqueue(struct smc_link *link, union smc_llc_msg *llc) memcpy(&qentry->msg, llc, sizeof(union smc_llc_msg)); /* process responses immediately */ - if (llc->raw.hdr.flags & SMC_LLC_FLAG_RESP) { + if ((llc->raw.hdr.flags & SMC_LLC_FLAG_RESP) && + llc->raw.hdr.common.llc_type != SMC_LLC_REQ_ADD_LINK) { smc_llc_rx_response(link, qentry); return; } @@ -1757,8 +2061,13 @@ static void smc_llc_rx_handler(struct ib_wc *wc, void *buf) if (wc->byte_len < sizeof(*llc)) return; /* short message */ - if (llc->raw.hdr.length != sizeof(*llc)) - return; /* invalid message */ + if (!llc->raw.hdr.common.llc_version) { + if (llc->raw.hdr.length != sizeof(*llc)) + return; /* invalid message */ + } else { + if (llc->raw.hdr.length_v2 < sizeof(*llc)) + return; /* invalid message */ + } smc_llc_enqueue(link, llc); } @@ -1977,6 +2286,35 @@ static struct smc_wr_rx_handler smc_llc_rx_handlers[] = { .handler = smc_llc_rx_handler, .type = SMC_LLC_DELETE_RKEY }, + /* V2 types */ + { + .handler = smc_llc_rx_handler, + .type = SMC_LLC_CONFIRM_LINK_V2 + }, + { + .handler = smc_llc_rx_handler, + .type = SMC_LLC_TEST_LINK_V2 + }, + { + .handler = smc_llc_rx_handler, + .type = SMC_LLC_ADD_LINK_V2 + }, + { + .handler = smc_llc_rx_handler, + .type = SMC_LLC_DELETE_LINK_V2 + }, + { + .handler = smc_llc_rx_handler, + .type = SMC_LLC_REQ_ADD_LINK_V2 + }, + { + .handler = smc_llc_rx_handler, + .type = SMC_LLC_CONFIRM_RKEY_V2 + }, + { + .handler = smc_llc_rx_handler, + .type = SMC_LLC_DELETE_RKEY_V2 + }, { .handler = NULL, } diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h index cc00a2ec4e92..4404e52b3346 100644 --- a/net/smc/smc_llc.h +++ b/net/smc/smc_llc.h @@ -30,10 +30,19 @@ enum smc_llc_msg_type { SMC_LLC_ADD_LINK = 0x02, SMC_LLC_ADD_LINK_CONT = 0x03, SMC_LLC_DELETE_LINK = 0x04, + SMC_LLC_REQ_ADD_LINK = 0x05, SMC_LLC_CONFIRM_RKEY = 0x06, SMC_LLC_TEST_LINK = 0x07, SMC_LLC_CONFIRM_RKEY_CONT = 0x08, SMC_LLC_DELETE_RKEY = 0x09, + /* V2 types */ + SMC_LLC_CONFIRM_LINK_V2 = 0x21, + SMC_LLC_ADD_LINK_V2 = 0x22, + SMC_LLC_DELETE_LINK_V2 = 0x24, + SMC_LLC_REQ_ADD_LINK_V2 = 0x25, + SMC_LLC_CONFIRM_RKEY_V2 = 0x26, + SMC_LLC_TEST_LINK_V2 = 0x27, + SMC_LLC_DELETE_RKEY_V2 = 0x29, }; #define smc_link_downing(state) \ @@ -102,7 +111,8 @@ void smc_llc_flow_qentry_del(struct smc_llc_flow *flow); void smc_llc_send_link_delete_all(struct smc_link_group *lgr, bool ord, u32 rsn); int smc_llc_cli_add_link(struct smc_link *link, struct smc_llc_qentry *qentry); -int smc_llc_srv_add_link(struct smc_link *link); +int smc_llc_srv_add_link(struct smc_link *link, + struct smc_llc_qentry *req_qentry); void smc_llc_add_link_local(struct smc_link *link); int smc_llc_init(void) __init; diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c index 22d7324969cd..600ab5889227 100644 --- a/net/smc/smc_wr.c +++ b/net/smc/smc_wr.c @@ -258,6 +258,33 @@ int smc_wr_tx_get_free_slot(struct smc_link *link, return 0; } +int smc_wr_tx_get_v2_slot(struct smc_link *link, + smc_wr_tx_handler handler, + struct smc_wr_v2_buf **wr_buf, + struct smc_wr_tx_pend_priv **wr_pend_priv) +{ + struct smc_wr_tx_pend *wr_pend; + struct ib_send_wr *wr_ib; + u64 wr_id; + + if (link->wr_tx_v2_pend->idx == link->wr_tx_cnt) + return -EBUSY; + + *wr_buf = NULL; + *wr_pend_priv = NULL; + wr_id = smc_wr_tx_get_next_wr_id(link); + wr_pend = link->wr_tx_v2_pend; + wr_pend->wr_id = wr_id; + wr_pend->handler = handler; + wr_pend->link = link; + wr_pend->idx = link->wr_tx_cnt; + wr_ib = link->wr_tx_v2_ib; + wr_ib->wr_id = wr_id; + *wr_buf = link->lgr->wr_tx_buf_v2; + *wr_pend_priv = &wr_pend->priv; + return 0; +} + int smc_wr_tx_put_slot(struct smc_link *link, struct smc_wr_tx_pend_priv *wr_pend_priv) { @@ -307,6 +334,22 @@ int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv) return rc; } +int smc_wr_tx_v2_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv, + int len) +{ + int rc; + + link->wr_tx_v2_ib->sg_list[0].length = len; + ib_req_notify_cq(link->smcibdev->roce_cq_send, + IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS); + rc = ib_post_send(link->roce_qp, link->wr_tx_v2_ib, NULL); + if (rc) { + smc_wr_tx_put_slot(link, priv); + smcr_link_down_cond_sched(link); + } + return rc; +} + /* Send prepared WR slot via ib_post_send and wait for send completion * notification. * @priv: pointer to smc_wr_tx_pend_priv identifying prepared message buffer diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h index 6d2cdb231859..f353311e6f84 100644 --- a/net/smc/smc_wr.h +++ b/net/smc/smc_wr.h @@ -113,10 +113,16 @@ int smc_wr_tx_get_free_slot(struct smc_link *link, smc_wr_tx_handler handler, struct smc_wr_buf **wr_buf, struct smc_rdma_wr **wrs, struct smc_wr_tx_pend_priv **wr_pend_priv); +int smc_wr_tx_get_v2_slot(struct smc_link *link, + smc_wr_tx_handler handler, + struct smc_wr_v2_buf **wr_buf, + struct smc_wr_tx_pend_priv **wr_pend_priv); int smc_wr_tx_put_slot(struct smc_link *link, struct smc_wr_tx_pend_priv *wr_pend_priv); int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *wr_pend_priv); +int smc_wr_tx_v2_send(struct smc_link *link, + struct smc_wr_tx_pend_priv *priv, int len); int smc_wr_tx_send_wait(struct smc_link *link, struct smc_wr_tx_pend_priv *priv, unsigned long timeout); void smc_wr_tx_cq_handler(struct ib_cq *ib_cq, void *cq_context); From b0539f5eddc2eefd24378bda3ee9cbbca916f58d Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Sat, 16 Oct 2021 11:37:51 +0200 Subject: [PATCH 275/440] net/smc: add netlink support for SMC-Rv2 Implement the netlink support for SMC-Rv2 related attributes that are provided to user space. Signed-off-by: Karsten Graul Signed-off-by: David S. Miller --- include/uapi/linux/smc.h | 17 ++++++- net/smc/smc_core.c | 98 ++++++++++++++++++++++++++++++---------- 2 files changed, 88 insertions(+), 27 deletions(-) diff --git a/include/uapi/linux/smc.h b/include/uapi/linux/smc.h index b175bd0165a1..20f33b27787f 100644 --- a/include/uapi/linux/smc.h +++ b/include/uapi/linux/smc.h @@ -84,17 +84,28 @@ enum { SMC_NLA_SYS_IS_ISM_V2, /* u8 */ SMC_NLA_SYS_LOCAL_HOST, /* string */ SMC_NLA_SYS_SEID, /* string */ + SMC_NLA_SYS_IS_SMCR_V2, /* u8 */ __SMC_NLA_SYS_MAX, SMC_NLA_SYS_MAX = __SMC_NLA_SYS_MAX - 1 }; -/* SMC_NLA_LGR_V2 nested attributes */ +/* SMC_NLA_LGR_D_V2_COMMON and SMC_NLA_LGR_R_V2_COMMON nested attributes */ enum { SMC_NLA_LGR_V2_VER, /* u8 */ SMC_NLA_LGR_V2_REL, /* u8 */ SMC_NLA_LGR_V2_OS, /* u8 */ SMC_NLA_LGR_V2_NEG_EID, /* string */ SMC_NLA_LGR_V2_PEER_HOST, /* string */ + __SMC_NLA_LGR_V2_MAX, + SMC_NLA_LGR_V2_MAX = __SMC_NLA_LGR_V2_MAX - 1 +}; + +/* SMC_NLA_LGR_R_V2 nested attributes */ +enum { + SMC_NLA_LGR_R_V2_UNSPEC, + SMC_NLA_LGR_R_V2_DIRECT, /* u8 */ + __SMC_NLA_LGR_R_V2_MAX, + SMC_NLA_LGR_R_V2_MAX = __SMC_NLA_LGR_R_V2_MAX - 1 }; /* SMC_GEN_LGR_SMCR attributes */ @@ -106,6 +117,8 @@ enum { SMC_NLA_LGR_R_PNETID, /* string */ SMC_NLA_LGR_R_VLAN_ID, /* u8 */ SMC_NLA_LGR_R_CONNS_NUM, /* u32 */ + SMC_NLA_LGR_R_V2_COMMON, /* nest */ + SMC_NLA_LGR_R_V2, /* nest */ __SMC_NLA_LGR_R_MAX, SMC_NLA_LGR_R_MAX = __SMC_NLA_LGR_R_MAX - 1 }; @@ -138,7 +151,7 @@ enum { SMC_NLA_LGR_D_PNETID, /* string */ SMC_NLA_LGR_D_CHID, /* u16 */ SMC_NLA_LGR_D_PAD, /* flag */ - SMC_NLA_LGR_V2, /* nest */ + SMC_NLA_LGR_D_V2_COMMON, /* nest */ __SMC_NLA_LGR_D_MAX, SMC_NLA_LGR_D_MAX = __SMC_NLA_LGR_D_MAX - 1 }; diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 1ccab993683d..8e642f8f334f 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -244,6 +244,8 @@ int smc_nl_get_sys_info(struct sk_buff *skb, struct netlink_callback *cb) goto errattr; if (nla_put_u8(skb, SMC_NLA_SYS_IS_ISM_V2, smc_ism_is_v2_capable())) goto errattr; + if (nla_put_u8(skb, SMC_NLA_SYS_IS_SMCR_V2, true)) + goto errattr; smc_clc_get_hostname(&host); if (host) { memcpy(hostname, host, SMC_MAX_HOSTNAME_LEN); @@ -271,12 +273,65 @@ errmsg: return skb->len; } +/* Fill SMC_NLA_LGR_D_V2_COMMON/SMC_NLA_LGR_R_V2_COMMON nested attributes */ +static int smc_nl_fill_lgr_v2_common(struct smc_link_group *lgr, + struct sk_buff *skb, + struct netlink_callback *cb, + struct nlattr *v2_attrs) +{ + char smc_host[SMC_MAX_HOSTNAME_LEN + 1]; + char smc_eid[SMC_MAX_EID_LEN + 1]; + + if (nla_put_u8(skb, SMC_NLA_LGR_V2_VER, lgr->smc_version)) + goto errv2attr; + if (nla_put_u8(skb, SMC_NLA_LGR_V2_REL, lgr->peer_smc_release)) + goto errv2attr; + if (nla_put_u8(skb, SMC_NLA_LGR_V2_OS, lgr->peer_os)) + goto errv2attr; + memcpy(smc_host, lgr->peer_hostname, SMC_MAX_HOSTNAME_LEN); + smc_host[SMC_MAX_HOSTNAME_LEN] = 0; + if (nla_put_string(skb, SMC_NLA_LGR_V2_PEER_HOST, smc_host)) + goto errv2attr; + memcpy(smc_eid, lgr->negotiated_eid, SMC_MAX_EID_LEN); + smc_eid[SMC_MAX_EID_LEN] = 0; + if (nla_put_string(skb, SMC_NLA_LGR_V2_NEG_EID, smc_eid)) + goto errv2attr; + + nla_nest_end(skb, v2_attrs); + return 0; + +errv2attr: + nla_nest_cancel(skb, v2_attrs); + return -EMSGSIZE; +} + +static int smc_nl_fill_smcr_lgr_v2(struct smc_link_group *lgr, + struct sk_buff *skb, + struct netlink_callback *cb) +{ + struct nlattr *v2_attrs; + + v2_attrs = nla_nest_start(skb, SMC_NLA_LGR_R_V2); + if (!v2_attrs) + goto errattr; + if (nla_put_u8(skb, SMC_NLA_LGR_R_V2_DIRECT, !lgr->uses_gateway)) + goto errv2attr; + + nla_nest_end(skb, v2_attrs); + return 0; + +errv2attr: + nla_nest_cancel(skb, v2_attrs); +errattr: + return -EMSGSIZE; +} + static int smc_nl_fill_lgr(struct smc_link_group *lgr, struct sk_buff *skb, struct netlink_callback *cb) { char smc_target[SMC_MAX_PNETID_LEN + 1]; - struct nlattr *attrs; + struct nlattr *attrs, *v2_attrs; attrs = nla_nest_start(skb, SMC_GEN_LGR_SMCR); if (!attrs) @@ -296,6 +351,15 @@ static int smc_nl_fill_lgr(struct smc_link_group *lgr, smc_target[SMC_MAX_PNETID_LEN] = 0; if (nla_put_string(skb, SMC_NLA_LGR_R_PNETID, smc_target)) goto errattr; + if (lgr->smc_version > SMC_V1) { + v2_attrs = nla_nest_start(skb, SMC_NLA_LGR_R_V2_COMMON); + if (!v2_attrs) + goto errattr; + if (smc_nl_fill_lgr_v2_common(lgr, skb, cb, v2_attrs)) + goto errattr; + if (smc_nl_fill_smcr_lgr_v2(lgr, skb, cb)) + goto errattr; + } nla_nest_end(skb, attrs); return 0; @@ -428,10 +492,7 @@ static int smc_nl_fill_smcd_lgr(struct smc_link_group *lgr, struct sk_buff *skb, struct netlink_callback *cb) { - char smc_host[SMC_MAX_HOSTNAME_LEN + 1]; char smc_pnet[SMC_MAX_PNETID_LEN + 1]; - char smc_eid[SMC_MAX_EID_LEN + 1]; - struct nlattr *v2_attrs; struct nlattr *attrs; void *nlh; @@ -463,32 +524,19 @@ static int smc_nl_fill_smcd_lgr(struct smc_link_group *lgr, smc_pnet[SMC_MAX_PNETID_LEN] = 0; if (nla_put_string(skb, SMC_NLA_LGR_D_PNETID, smc_pnet)) goto errattr; + if (lgr->smc_version > SMC_V1) { + struct nlattr *v2_attrs; - v2_attrs = nla_nest_start(skb, SMC_NLA_LGR_V2); - if (!v2_attrs) - goto errattr; - if (nla_put_u8(skb, SMC_NLA_LGR_V2_VER, lgr->smc_version)) - goto errv2attr; - if (nla_put_u8(skb, SMC_NLA_LGR_V2_REL, lgr->peer_smc_release)) - goto errv2attr; - if (nla_put_u8(skb, SMC_NLA_LGR_V2_OS, lgr->peer_os)) - goto errv2attr; - memcpy(smc_host, lgr->peer_hostname, SMC_MAX_HOSTNAME_LEN); - smc_host[SMC_MAX_HOSTNAME_LEN] = 0; - if (nla_put_string(skb, SMC_NLA_LGR_V2_PEER_HOST, smc_host)) - goto errv2attr; - memcpy(smc_eid, lgr->negotiated_eid, SMC_MAX_EID_LEN); - smc_eid[SMC_MAX_EID_LEN] = 0; - if (nla_put_string(skb, SMC_NLA_LGR_V2_NEG_EID, smc_eid)) - goto errv2attr; - - nla_nest_end(skb, v2_attrs); + v2_attrs = nla_nest_start(skb, SMC_NLA_LGR_D_V2_COMMON); + if (!v2_attrs) + goto errattr; + if (smc_nl_fill_lgr_v2_common(lgr, skb, cb, v2_attrs)) + goto errattr; + } nla_nest_end(skb, attrs); genlmsg_end(skb, nlh); return 0; -errv2attr: - nla_nest_cancel(skb, v2_attrs); errattr: nla_nest_cancel(skb, attrs); errout: From 29397e34c76b57ce596493008176cf374f1cb867 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Sat, 16 Oct 2021 11:37:52 +0200 Subject: [PATCH 276/440] net/smc: stop links when their GID is removed With SMC-Rv2 the GID is an IP address that can be deleted from the device. When an IB_EVENT_GID_CHANGE event is provided then iterate over all active links and check if their GID is still defined. Otherwise stop the affected link. Signed-off-by: Karsten Graul Signed-off-by: David S. Miller --- net/smc/smc_ib.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index 32f9d2fdc474..d93055ec17ae 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -295,6 +295,58 @@ int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport, return -ENODEV; } +/* check if gid is still defined on smcibdev */ +static bool smc_ib_check_link_gid(u8 gid[SMC_GID_SIZE], bool smcrv2, + struct smc_ib_device *smcibdev, u8 ibport) +{ + const struct ib_gid_attr *attr; + bool rc = false; + int i; + + for (i = 0; !rc && i < smcibdev->pattr[ibport - 1].gid_tbl_len; i++) { + attr = rdma_get_gid_attr(smcibdev->ibdev, ibport, i); + if (IS_ERR(attr)) + continue; + + rcu_read_lock(); + if ((!smcrv2 && attr->gid_type == IB_GID_TYPE_ROCE) || + (smcrv2 && attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP && + !(ipv6_addr_type((const struct in6_addr *)&attr->gid) + & IPV6_ADDR_LINKLOCAL))) + if (!memcmp(gid, &attr->gid, SMC_GID_SIZE)) + rc = true; + rcu_read_unlock(); + rdma_put_gid_attr(attr); + } + return rc; +} + +/* check all links if the gid is still defined on smcibdev */ +static void smc_ib_gid_check(struct smc_ib_device *smcibdev, u8 ibport) +{ + struct smc_link_group *lgr; + int i; + + spin_lock_bh(&smc_lgr_list.lock); + list_for_each_entry(lgr, &smc_lgr_list.list, list) { + if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id, + SMC_MAX_PNETID_LEN)) + continue; /* lgr is not affected */ + if (list_empty(&lgr->list)) + continue; + for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { + if (lgr->lnk[i].state == SMC_LNK_UNUSED || + lgr->lnk[i].smcibdev != smcibdev) + continue; + if (!smc_ib_check_link_gid(lgr->lnk[i].gid, + lgr->smc_version == SMC_V2, + smcibdev, ibport)) + smcr_port_err(smcibdev, ibport); + } + } + spin_unlock_bh(&smc_lgr_list.lock); +} + static int smc_ib_remember_port_attr(struct smc_ib_device *smcibdev, u8 ibport) { int rc; @@ -333,6 +385,7 @@ static void smc_ib_port_event_work(struct work_struct *work) } else { clear_bit(port_idx, smcibdev->ports_going_away); smcr_port_add(smcibdev, port_idx + 1); + smc_ib_gid_check(smcibdev, port_idx + 1); } } } From 254ec036db1123b10e23e1412c191a3cf70dce71 Mon Sep 17 00:00:00 2001 From: Kyungrok Chung Date: Sat, 16 Oct 2021 20:21:36 +0900 Subject: [PATCH 277/440] net: make use of helper netif_is_bridge_master() Make use of netdev helper functions to improve code readability. Replace 'dev->priv_flags & IFF_EBRIDGE' with netif_is_bridge_master(dev). Signed-off-by: Kyungrok Chung Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/batman-adv/multicast.c | 2 +- net/bridge/br.c | 4 ++-- net/bridge/br_fdb.c | 6 +++--- net/bridge/br_if.c | 2 +- net/bridge/br_ioctl.c | 2 +- net/bridge/br_mdb.c | 4 ++-- net/bridge/br_netfilter_hooks.c | 2 +- net/bridge/br_netlink.c | 4 ++-- net/core/rtnetlink.c | 2 +- 9 files changed, 14 insertions(+), 14 deletions(-) diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c index a3b6658ed789..433901dcf0c3 100644 --- a/net/batman-adv/multicast.c +++ b/net/batman-adv/multicast.c @@ -89,7 +89,7 @@ static struct net_device *batadv_mcast_get_bridge(struct net_device *soft_iface) rcu_read_lock(); do { upper = netdev_master_upper_dev_get_rcu(upper); - } while (upper && !(upper->priv_flags & IFF_EBRIDGE)); + } while (upper && !netif_is_bridge_master(upper)); dev_hold(upper); rcu_read_unlock(); diff --git a/net/bridge/br.c b/net/bridge/br.c index d3a32c6813e0..1fac72cc617f 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -36,7 +36,7 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v bool changed_addr; int err; - if (dev->priv_flags & IFF_EBRIDGE) { + if (netif_is_bridge_master(dev)) { err = br_vlan_bridge_event(dev, event, ptr); if (err) return notifier_from_errno(err); @@ -349,7 +349,7 @@ static void __net_exit br_net_exit(struct net *net) rtnl_lock(); for_each_netdev(net, dev) - if (dev->priv_flags & IFF_EBRIDGE) + if (netif_is_bridge_master(dev)) br_dev_delete(dev, &list); unregister_netdevice_many(&list); diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 46812b659710..a6a68e18c70a 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -825,7 +825,7 @@ int br_fdb_dump(struct sk_buff *skb, struct net_bridge_fdb_entry *f; int err = 0; - if (!(dev->priv_flags & IFF_EBRIDGE)) + if (!netif_is_bridge_master(dev)) return err; if (!filter_dev) { @@ -1076,7 +1076,7 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], return -EINVAL; } - if (dev->priv_flags & IFF_EBRIDGE) { + if (netif_is_bridge_master(dev)) { br = netdev_priv(dev); vg = br_vlan_group(br); } else { @@ -1173,7 +1173,7 @@ int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], struct net_bridge *br; int err; - if (dev->priv_flags & IFF_EBRIDGE) { + if (netif_is_bridge_master(dev)) { br = netdev_priv(dev); vg = br_vlan_group(br); } else { diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 4a02f8bb278a..c11bba3e7ec0 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -471,7 +471,7 @@ int br_del_bridge(struct net *net, const char *name) if (dev == NULL) ret = -ENXIO; /* Could not find device */ - else if (!(dev->priv_flags & IFF_EBRIDGE)) { + else if (!netif_is_bridge_master(dev)) { /* Attempt to delete non bridge device! */ ret = -EPERM; } diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c index 49c268871fc1..db4ab2c2ce18 100644 --- a/net/bridge/br_ioctl.c +++ b/net/bridge/br_ioctl.c @@ -26,7 +26,7 @@ static int get_bridge_ifindices(struct net *net, int *indices, int num) for_each_netdev_rcu(net, dev) { if (i >= num) break; - if (dev->priv_flags & IFF_EBRIDGE) + if (netif_is_bridge_master(dev)) indices[i++] = dev->ifindex; } rcu_read_unlock(); diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index 0281453f7766..61ccf46fcc21 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -422,7 +422,7 @@ static int br_mdb_dump(struct sk_buff *skb, struct netlink_callback *cb) cb->seq = net->dev_base_seq; for_each_netdev_rcu(net, dev) { - if (dev->priv_flags & IFF_EBRIDGE) { + if (netif_is_bridge_master(dev)) { struct net_bridge *br = netdev_priv(dev); struct br_port_msg *bpm; @@ -1016,7 +1016,7 @@ static int br_mdb_parse(struct sk_buff *skb, struct nlmsghdr *nlh, return -ENODEV; } - if (!(dev->priv_flags & IFF_EBRIDGE)) { + if (!netif_is_bridge_master(dev)) { NL_SET_ERR_MSG_MOD(extack, "Device is not a bridge"); return -EOPNOTSUPP; } diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 8edfb98ae1d5..b5af68c105a8 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -968,7 +968,7 @@ static int brnf_device_event(struct notifier_block *unused, unsigned long event, struct net *net; int ret; - if (event != NETDEV_REGISTER || !(dev->priv_flags & IFF_EBRIDGE)) + if (event != NETDEV_REGISTER || !netif_is_bridge_master(dev)) return NOTIFY_DONE; ASSERT_RTNL(); diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 5c6c4305ed23..0c8b5f1a15bc 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -106,7 +106,7 @@ static size_t br_get_link_af_size_filtered(const struct net_device *dev, p = br_port_get_check_rcu(dev); if (p) vg = nbp_vlan_group_rcu(p); - } else if (dev->priv_flags & IFF_EBRIDGE) { + } else if (netif_is_bridge_master(dev)) { br = netdev_priv(dev); vg = br_vlan_group_rcu(br); } @@ -1050,7 +1050,7 @@ int br_dellink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags) p = br_port_get_rtnl(dev); /* We want to accept dev as bridge itself as well */ - if (!p && !(dev->priv_flags & IFF_EBRIDGE)) + if (!p && !netif_is_bridge_master(dev)) return -EINVAL; err = br_afspec(br, p, afspec, RTM_DELLINK, &changed, NULL); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 2dc1b209ba91..564d24c451af 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -4384,7 +4384,7 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb) continue; if (br_dev != netdev_master_upper_dev_get(dev) && - !(dev->priv_flags & IFF_EBRIDGE)) + !netif_is_bridge_master(dev)) continue; cops = ops; } From ffdd33dd9c12a8c263f78d778066709ef94671f9 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Sat, 16 Oct 2021 10:13:27 +0200 Subject: [PATCH 278/440] netfilter: core: Fix clang warnings about unused static inlines Unlike gcc, clang warns about unused static inlines that are not in an include file: net/netfilter/core.c:344:20: error: unused function 'nf_ingress_hook' [-Werror,-Wunused-function] static inline bool nf_ingress_hook(const struct nf_hook_ops *reg, int pf) ^ net/netfilter/core.c:353:20: error: unused function 'nf_egress_hook' [-Werror,-Wunused-function] static inline bool nf_egress_hook(const struct nf_hook_ops *reg, int pf) ^ According to commit 6863f5643dd7 ("kbuild: allow Clang to find unused static inline functions for W=1 build"), the proper resolution is to mark the affected functions as __maybe_unused. An alternative approach would be to move them to include/linux/netfilter_netdev.h, but since Pablo didn't do that in commit ddcfa710d40b ("netfilter: add nf_ingress_hook() helper function"), I'm guessing __maybe_unused is preferred. This fixes both the warning introduced by Pablo in v5.10 as well as the one recently introduced by myself with commit 42df6e1d221d ("netfilter: Introduce egress hook"). Fixes: ddcfa710d40b ("netfilter: add nf_ingress_hook() helper function") Reported-by: kernel test robot Signed-off-by: Lukas Wunner Signed-off-by: Pablo Neira Ayuso --- net/netfilter/core.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 3a32a813fcde..6dec9cd395f1 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -341,7 +341,8 @@ static int nf_ingress_check(struct net *net, const struct nf_hook_ops *reg, return 0; } -static inline bool nf_ingress_hook(const struct nf_hook_ops *reg, int pf) +static inline bool __maybe_unused nf_ingress_hook(const struct nf_hook_ops *reg, + int pf) { if ((pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS) || (pf == NFPROTO_INET && reg->hooknum == NF_INET_INGRESS)) @@ -350,7 +351,8 @@ static inline bool nf_ingress_hook(const struct nf_hook_ops *reg, int pf) return false; } -static inline bool nf_egress_hook(const struct nf_hook_ops *reg, int pf) +static inline bool __maybe_unused nf_egress_hook(const struct nf_hook_ops *reg, + int pf) { return pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_EGRESS; } From fbf307c89eb08c51da4dd039f68c19afbcf5949d Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Sat, 16 Oct 2021 10:49:02 +0200 Subject: [PATCH 279/440] gen_stats: Add instead Set the value in __gnet_stats_copy_basic(). __gnet_stats_copy_basic() always assigns the value to the bstats argument overwriting the previous value. The later added per-CPU version always accumulated the values in the returning gnet_stats_basic_packed argument. Based on review there are five users of that function as of today: - est_fetch_counters(), ___gnet_stats_copy_basic() memsets() bstats to zero, single invocation. - mq_dump(), mqprio_dump(), mqprio_dump_class_stats() memsets() bstats to zero, multiple invocation but does not use the function due to !qdisc_is_percpu_stats(). Add the values in __gnet_stats_copy_basic() instead overwriting. Rename the function to gnet_stats_add_basic() to make it more obvious. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: David S. Miller --- include/net/gen_stats.h | 8 ++++---- net/core/gen_estimator.c | 2 +- net/core/gen_stats.c | 29 ++++++++++++++++------------- net/sched/sch_mq.c | 5 ++--- net/sched/sch_mqprio.c | 11 +++++------ 5 files changed, 28 insertions(+), 27 deletions(-) diff --git a/include/net/gen_stats.h b/include/net/gen_stats.h index 1424e02cef90..25740d004bdb 100644 --- a/include/net/gen_stats.h +++ b/include/net/gen_stats.h @@ -46,10 +46,10 @@ int gnet_stats_copy_basic(const seqcount_t *running, struct gnet_dump *d, struct gnet_stats_basic_cpu __percpu *cpu, struct gnet_stats_basic_packed *b); -void __gnet_stats_copy_basic(const seqcount_t *running, - struct gnet_stats_basic_packed *bstats, - struct gnet_stats_basic_cpu __percpu *cpu, - struct gnet_stats_basic_packed *b); +void gnet_stats_add_basic(const seqcount_t *running, + struct gnet_stats_basic_packed *bstats, + struct gnet_stats_basic_cpu __percpu *cpu, + struct gnet_stats_basic_packed *b); int gnet_stats_copy_basic_hw(const seqcount_t *running, struct gnet_dump *d, struct gnet_stats_basic_cpu __percpu *cpu, diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index 8e582e29a41e..205df8b5116e 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c @@ -66,7 +66,7 @@ static void est_fetch_counters(struct net_rate_estimator *e, if (e->stats_lock) spin_lock(e->stats_lock); - __gnet_stats_copy_basic(e->running, b, e->cpu_bstats, e->bstats); + gnet_stats_add_basic(e->running, b, e->cpu_bstats, e->bstats); if (e->stats_lock) spin_unlock(e->stats_lock); diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c index e491b083b348..25d7c0989b83 100644 --- a/net/core/gen_stats.c +++ b/net/core/gen_stats.c @@ -114,9 +114,8 @@ gnet_stats_start_copy(struct sk_buff *skb, int type, spinlock_t *lock, } EXPORT_SYMBOL(gnet_stats_start_copy); -static void -__gnet_stats_copy_basic_cpu(struct gnet_stats_basic_packed *bstats, - struct gnet_stats_basic_cpu __percpu *cpu) +static void gnet_stats_add_basic_cpu(struct gnet_stats_basic_packed *bstats, + struct gnet_stats_basic_cpu __percpu *cpu) { int i; @@ -136,26 +135,30 @@ __gnet_stats_copy_basic_cpu(struct gnet_stats_basic_packed *bstats, } } -void -__gnet_stats_copy_basic(const seqcount_t *running, - struct gnet_stats_basic_packed *bstats, - struct gnet_stats_basic_cpu __percpu *cpu, - struct gnet_stats_basic_packed *b) +void gnet_stats_add_basic(const seqcount_t *running, + struct gnet_stats_basic_packed *bstats, + struct gnet_stats_basic_cpu __percpu *cpu, + struct gnet_stats_basic_packed *b) { unsigned int seq; + u64 bytes = 0; + u64 packets = 0; if (cpu) { - __gnet_stats_copy_basic_cpu(bstats, cpu); + gnet_stats_add_basic_cpu(bstats, cpu); return; } do { if (running) seq = read_seqcount_begin(running); - bstats->bytes = b->bytes; - bstats->packets = b->packets; + bytes = b->bytes; + packets = b->packets; } while (running && read_seqcount_retry(running, seq)); + + bstats->bytes += bytes; + bstats->packets += packets; } -EXPORT_SYMBOL(__gnet_stats_copy_basic); +EXPORT_SYMBOL(gnet_stats_add_basic); static int ___gnet_stats_copy_basic(const seqcount_t *running, @@ -166,7 +169,7 @@ ___gnet_stats_copy_basic(const seqcount_t *running, { struct gnet_stats_basic_packed bstats = {0}; - __gnet_stats_copy_basic(running, &bstats, cpu, b); + gnet_stats_add_basic(running, &bstats, cpu, b); if (d->compat_tc_stats && type == TCA_STATS_BASIC) { d->tc_stats.bytes = bstats.bytes; diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c index e04f1a87642b..1edd98a50e33 100644 --- a/net/sched/sch_mq.c +++ b/net/sched/sch_mq.c @@ -147,9 +147,8 @@ static int mq_dump(struct Qdisc *sch, struct sk_buff *skb) if (qdisc_is_percpu_stats(qdisc)) { qlen = qdisc_qlen_sum(qdisc); - __gnet_stats_copy_basic(NULL, &sch->bstats, - qdisc->cpu_bstats, - &qdisc->bstats); + gnet_stats_add_basic(NULL, &sch->bstats, + qdisc->cpu_bstats, &qdisc->bstats); __gnet_stats_copy_queue(&sch->qstats, qdisc->cpu_qstats, &qdisc->qstats, qlen); diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index e1904e62425e..4bae601e15e1 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -405,9 +405,8 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb) if (qdisc_is_percpu_stats(qdisc)) { __u32 qlen = qdisc_qlen_sum(qdisc); - __gnet_stats_copy_basic(NULL, &sch->bstats, - qdisc->cpu_bstats, - &qdisc->bstats); + gnet_stats_add_basic(NULL, &sch->bstats, + qdisc->cpu_bstats, &qdisc->bstats); __gnet_stats_copy_queue(&sch->qstats, qdisc->cpu_qstats, &qdisc->qstats, qlen); @@ -535,9 +534,9 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, if (qdisc_is_percpu_stats(qdisc)) { qlen = qdisc_qlen_sum(qdisc); - __gnet_stats_copy_basic(NULL, &bstats, - qdisc->cpu_bstats, - &qdisc->bstats); + gnet_stats_add_basic(NULL, &bstats, + qdisc->cpu_bstats, + &qdisc->bstats); __gnet_stats_copy_queue(&qstats, qdisc->cpu_qstats, &qdisc->qstats, From 448e163f8b9b2dab4c07c47c9e35c9116dec9489 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Sat, 16 Oct 2021 10:49:03 +0200 Subject: [PATCH 280/440] gen_stats: Add gnet_stats_add_queue(). This function will replace __gnet_stats_copy_queue(). It reads all arguments and adds them into the passed gnet_stats_queue argument. In contrast to __gnet_stats_copy_queue() it also copies the qlen member. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: David S. Miller --- include/net/gen_stats.h | 3 +++ net/core/gen_stats.c | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/include/net/gen_stats.h b/include/net/gen_stats.h index 25740d004bdb..148f0ba85f25 100644 --- a/include/net/gen_stats.h +++ b/include/net/gen_stats.h @@ -62,6 +62,9 @@ int gnet_stats_copy_queue(struct gnet_dump *d, void __gnet_stats_copy_queue(struct gnet_stats_queue *qstats, const struct gnet_stats_queue __percpu *cpu_q, const struct gnet_stats_queue *q, __u32 qlen); +void gnet_stats_add_queue(struct gnet_stats_queue *qstats, + const struct gnet_stats_queue __percpu *cpu_q, + const struct gnet_stats_queue *q); int gnet_stats_copy_app(struct gnet_dump *d, void *st, int len); int gnet_stats_finish_copy(struct gnet_dump *d); diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c index 25d7c0989b83..26c020a7ead4 100644 --- a/net/core/gen_stats.c +++ b/net/core/gen_stats.c @@ -321,6 +321,38 @@ void __gnet_stats_copy_queue(struct gnet_stats_queue *qstats, } EXPORT_SYMBOL(__gnet_stats_copy_queue); +static void gnet_stats_add_queue_cpu(struct gnet_stats_queue *qstats, + const struct gnet_stats_queue __percpu *q) +{ + int i; + + for_each_possible_cpu(i) { + const struct gnet_stats_queue *qcpu = per_cpu_ptr(q, i); + + qstats->qlen += qcpu->backlog; + qstats->backlog += qcpu->backlog; + qstats->drops += qcpu->drops; + qstats->requeues += qcpu->requeues; + qstats->overlimits += qcpu->overlimits; + } +} + +void gnet_stats_add_queue(struct gnet_stats_queue *qstats, + const struct gnet_stats_queue __percpu *cpu, + const struct gnet_stats_queue *q) +{ + if (cpu) { + gnet_stats_add_queue_cpu(qstats, cpu); + } else { + qstats->qlen += q->qlen; + qstats->backlog += q->backlog; + qstats->drops += q->drops; + qstats->requeues += q->requeues; + qstats->overlimits += q->overlimits; + } +} +EXPORT_SYMBOL(gnet_stats_add_queue); + /** * gnet_stats_copy_queue - copy queue statistics into statistics TLV * @d: dumping handle From 7361df4606ba5ab6b998f4467496b4bbf4e5526b Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Sat, 16 Oct 2021 10:49:04 +0200 Subject: [PATCH 281/440] mq, mqprio: Use gnet_stats_add_queue(). gnet_stats_add_basic() and gnet_stats_add_queue() add up the statistics so they can be used directly for both the per-CPU and global case. gnet_stats_add_queue() copies either Qdisc's per-CPU gnet_stats_queue::qlen or the global member. The global gnet_stats_queue::qlen isn't touched in the per-CPU case so there is no need to consider it in the global-case. In the per-CPU case, the sum of global gnet_stats_queue::qlen and the per-CPU gnet_stats_queue::qlen was assigned to sch->q.qlen and sch->qstats.qlen. Now both fields are copied individually. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: David S. Miller --- net/sched/sch_mq.c | 24 +++++---------------- net/sched/sch_mqprio.c | 49 +++++++++++------------------------------- 2 files changed, 17 insertions(+), 56 deletions(-) diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c index 1edd98a50e33..9d58ecb4e80c 100644 --- a/net/sched/sch_mq.c +++ b/net/sched/sch_mq.c @@ -130,7 +130,6 @@ static int mq_dump(struct Qdisc *sch, struct sk_buff *skb) struct net_device *dev = qdisc_dev(sch); struct Qdisc *qdisc; unsigned int ntx; - __u32 qlen = 0; sch->q.qlen = 0; memset(&sch->bstats, 0, sizeof(sch->bstats)); @@ -145,24 +144,11 @@ static int mq_dump(struct Qdisc *sch, struct sk_buff *skb) qdisc = netdev_get_tx_queue(dev, ntx)->qdisc_sleeping; spin_lock_bh(qdisc_lock(qdisc)); - if (qdisc_is_percpu_stats(qdisc)) { - qlen = qdisc_qlen_sum(qdisc); - gnet_stats_add_basic(NULL, &sch->bstats, - qdisc->cpu_bstats, &qdisc->bstats); - __gnet_stats_copy_queue(&sch->qstats, - qdisc->cpu_qstats, - &qdisc->qstats, qlen); - sch->q.qlen += qlen; - } else { - sch->q.qlen += qdisc->q.qlen; - sch->bstats.bytes += qdisc->bstats.bytes; - sch->bstats.packets += qdisc->bstats.packets; - sch->qstats.qlen += qdisc->qstats.qlen; - sch->qstats.backlog += qdisc->qstats.backlog; - sch->qstats.drops += qdisc->qstats.drops; - sch->qstats.requeues += qdisc->qstats.requeues; - sch->qstats.overlimits += qdisc->qstats.overlimits; - } + gnet_stats_add_basic(NULL, &sch->bstats, qdisc->cpu_bstats, + &qdisc->bstats); + gnet_stats_add_queue(&sch->qstats, qdisc->cpu_qstats, + &qdisc->qstats); + sch->q.qlen += qdisc_qlen(qdisc); spin_unlock_bh(qdisc_lock(qdisc)); } diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index 4bae601e15e1..57427b40f0d2 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -402,24 +402,11 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb) qdisc = netdev_get_tx_queue(dev, ntx)->qdisc_sleeping; spin_lock_bh(qdisc_lock(qdisc)); - if (qdisc_is_percpu_stats(qdisc)) { - __u32 qlen = qdisc_qlen_sum(qdisc); - - gnet_stats_add_basic(NULL, &sch->bstats, - qdisc->cpu_bstats, &qdisc->bstats); - __gnet_stats_copy_queue(&sch->qstats, - qdisc->cpu_qstats, - &qdisc->qstats, qlen); - sch->q.qlen += qlen; - } else { - sch->q.qlen += qdisc->q.qlen; - sch->bstats.bytes += qdisc->bstats.bytes; - sch->bstats.packets += qdisc->bstats.packets; - sch->qstats.backlog += qdisc->qstats.backlog; - sch->qstats.drops += qdisc->qstats.drops; - sch->qstats.requeues += qdisc->qstats.requeues; - sch->qstats.overlimits += qdisc->qstats.overlimits; - } + gnet_stats_add_basic(NULL, &sch->bstats, qdisc->cpu_bstats, + &qdisc->bstats); + gnet_stats_add_queue(&sch->qstats, qdisc->cpu_qstats, + &qdisc->qstats); + sch->q.qlen += qdisc_qlen(qdisc); spin_unlock_bh(qdisc_lock(qdisc)); } @@ -511,7 +498,7 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, { if (cl >= TC_H_MIN_PRIORITY) { int i; - __u32 qlen = 0; + __u32 qlen; struct gnet_stats_queue qstats = {0}; struct gnet_stats_basic_packed bstats = {0}; struct net_device *dev = qdisc_dev(sch); @@ -531,27 +518,15 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, spin_lock_bh(qdisc_lock(qdisc)); - if (qdisc_is_percpu_stats(qdisc)) { - qlen = qdisc_qlen_sum(qdisc); + gnet_stats_add_basic(NULL, &bstats, qdisc->cpu_bstats, + &qdisc->bstats); + gnet_stats_add_queue(&qstats, qdisc->cpu_qstats, + &qdisc->qstats); + sch->q.qlen += qdisc_qlen(qdisc); - gnet_stats_add_basic(NULL, &bstats, - qdisc->cpu_bstats, - &qdisc->bstats); - __gnet_stats_copy_queue(&qstats, - qdisc->cpu_qstats, - &qdisc->qstats, - qlen); - } else { - qlen += qdisc->q.qlen; - bstats.bytes += qdisc->bstats.bytes; - bstats.packets += qdisc->bstats.packets; - qstats.backlog += qdisc->qstats.backlog; - qstats.drops += qdisc->qstats.drops; - qstats.requeues += qdisc->qstats.requeues; - qstats.overlimits += qdisc->qstats.overlimits; - } spin_unlock_bh(qdisc_lock(qdisc)); } + qlen = qdisc_qlen(sch) + qstats.qlen; /* Reclaim root sleeping lock before completing stats */ if (d->lock) From 10940eb746d417734fb9e5eba6df927e593e4f13 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Sat, 16 Oct 2021 10:49:05 +0200 Subject: [PATCH 282/440] gen_stats: Move remaining users to gnet_stats_add_queue(). The gnet_stats_queue::qlen member is only used in the SMP-case. qdisc_qstats_qlen_backlog() needs to add qdisc_qlen() to qstats.qlen to have the same value as that provided by qdisc_qlen_sum(). gnet_stats_copy_queue() needs to overwritte the resulting qstats.qlen field whith the caller submitted qlen value. It might be differ from the submitted value. Let both functions use gnet_stats_add_queue() and remove unused __gnet_stats_copy_queue(). Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: David S. Miller --- include/net/gen_stats.h | 3 --- include/net/sch_generic.h | 5 ++--- net/core/gen_stats.c | 39 ++------------------------------------- 3 files changed, 4 insertions(+), 43 deletions(-) diff --git a/include/net/gen_stats.h b/include/net/gen_stats.h index 148f0ba85f25..d47155f5db5d 100644 --- a/include/net/gen_stats.h +++ b/include/net/gen_stats.h @@ -59,9 +59,6 @@ int gnet_stats_copy_rate_est(struct gnet_dump *d, int gnet_stats_copy_queue(struct gnet_dump *d, struct gnet_stats_queue __percpu *cpu_q, struct gnet_stats_queue *q, __u32 qlen); -void __gnet_stats_copy_queue(struct gnet_stats_queue *qstats, - const struct gnet_stats_queue __percpu *cpu_q, - const struct gnet_stats_queue *q, __u32 qlen); void gnet_stats_add_queue(struct gnet_stats_queue *qstats, const struct gnet_stats_queue __percpu *cpu_q, const struct gnet_stats_queue *q); diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 5a011f8d394e..7bc2d30b5c06 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -972,10 +972,9 @@ static inline void qdisc_qstats_qlen_backlog(struct Qdisc *sch, __u32 *qlen, __u32 *backlog) { struct gnet_stats_queue qstats = { 0 }; - __u32 len = qdisc_qlen_sum(sch); - __gnet_stats_copy_queue(&qstats, sch->cpu_qstats, &sch->qstats, len); - *qlen = qstats.qlen; + gnet_stats_add_queue(&qstats, sch->cpu_qstats, &sch->qstats); + *qlen = qstats.qlen + qdisc_qlen(sch); *backlog = qstats.backlog; } diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c index 26c020a7ead4..6ec11289140b 100644 --- a/net/core/gen_stats.c +++ b/net/core/gen_stats.c @@ -285,42 +285,6 @@ gnet_stats_copy_rate_est(struct gnet_dump *d, } EXPORT_SYMBOL(gnet_stats_copy_rate_est); -static void -__gnet_stats_copy_queue_cpu(struct gnet_stats_queue *qstats, - const struct gnet_stats_queue __percpu *q) -{ - int i; - - for_each_possible_cpu(i) { - const struct gnet_stats_queue *qcpu = per_cpu_ptr(q, i); - - qstats->qlen = 0; - qstats->backlog += qcpu->backlog; - qstats->drops += qcpu->drops; - qstats->requeues += qcpu->requeues; - qstats->overlimits += qcpu->overlimits; - } -} - -void __gnet_stats_copy_queue(struct gnet_stats_queue *qstats, - const struct gnet_stats_queue __percpu *cpu, - const struct gnet_stats_queue *q, - __u32 qlen) -{ - if (cpu) { - __gnet_stats_copy_queue_cpu(qstats, cpu); - } else { - qstats->qlen = q->qlen; - qstats->backlog = q->backlog; - qstats->drops = q->drops; - qstats->requeues = q->requeues; - qstats->overlimits = q->overlimits; - } - - qstats->qlen = qlen; -} -EXPORT_SYMBOL(__gnet_stats_copy_queue); - static void gnet_stats_add_queue_cpu(struct gnet_stats_queue *qstats, const struct gnet_stats_queue __percpu *q) { @@ -374,7 +338,8 @@ gnet_stats_copy_queue(struct gnet_dump *d, { struct gnet_stats_queue qstats = {0}; - __gnet_stats_copy_queue(&qstats, cpu_q, q, qlen); + gnet_stats_add_queue(&qstats, cpu_q, q); + qstats.qlen = qlen; if (d->compat_tc_stats) { d->tc_stats.drops = qstats.drops; From f2efdb17928924c9c935c136dea764a081032006 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Sat, 16 Oct 2021 10:49:06 +0200 Subject: [PATCH 283/440] u64_stats: Introduce u64_stats_set() Allow to directly set a u64_stats_t value which is used to provide an init function which sets it directly to zero intead of memset() the value. Add u64_stats_set() to the u64_stats API. [bigeasy: commit message. ] Signed-off-by: Ahmed S. Darwish Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: David S. Miller --- include/linux/u64_stats_sync.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/include/linux/u64_stats_sync.h b/include/linux/u64_stats_sync.h index e81856c0ba13..e8ec116c916b 100644 --- a/include/linux/u64_stats_sync.h +++ b/include/linux/u64_stats_sync.h @@ -83,6 +83,11 @@ static inline u64 u64_stats_read(const u64_stats_t *p) return local64_read(&p->v); } +static inline void u64_stats_set(u64_stats_t *p, u64 val) +{ + local64_set(&p->v, val); +} + static inline void u64_stats_add(u64_stats_t *p, unsigned long val) { local64_add(val, &p->v); @@ -104,6 +109,11 @@ static inline u64 u64_stats_read(const u64_stats_t *p) return p->v; } +static inline void u64_stats_set(u64_stats_t *p, u64 val) +{ + p->v = val; +} + static inline void u64_stats_add(u64_stats_t *p, unsigned long val) { p->v += val; From 67c9e6270f3013e4d86ec57c4e7f27459f2a0652 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Sat, 16 Oct 2021 10:49:07 +0200 Subject: [PATCH 284/440] net: sched: Protect Qdisc::bstats with u64_stats The not-per-CPU variant of qdisc tc (traffic control) statistics, Qdisc::gnet_stats_basic_packed bstats, is protected with Qdisc::running sequence counter. This sequence counter is used for reliably protecting bstats reads from parallel writes. Meanwhile, the seqcount's write section covers a much wider area than bstats update: qdisc_run_begin() => qdisc_run_end(). That read/write section asymmetry can lead to needless retries of the read section. To prepare for removing the Qdisc::running sequence counter altogether, introduce a u64_stats sync point inside bstats instead. Modify _bstats_update() to start/end the bstats u64_stats write section. For bisectability, and finer commits granularity, the bstats read section is still protected with a Qdisc::running read/retry loop and qdisc_run_begin/end() still starts/ends that seqcount write section. Once all call sites are modified to use _bstats_update(), the Qdisc::running seqcount will be removed and bstats read/retry loop will be modified to utilize the internal u64_stats sync point. Note, using u64_stats implies no sequence counter protection for 64-bit architectures. This can lead to the statistics "packets" vs. "bytes" values getting out of sync on rare occasions. The individual values will still be valid. [bigeasy: Minor commit message edits, init all gnet_stats_basic_packed.] Signed-off-by: Ahmed S. Darwish Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: David S. Miller --- include/net/gen_stats.h | 2 ++ include/net/sch_generic.h | 2 ++ net/core/gen_estimator.c | 2 +- net/core/gen_stats.c | 14 ++++++++++++-- net/netfilter/xt_RATEEST.c | 1 + net/sched/act_api.c | 2 ++ net/sched/sch_atm.c | 1 + net/sched/sch_cbq.c | 1 + net/sched/sch_drr.c | 1 + net/sched/sch_ets.c | 2 +- net/sched/sch_generic.c | 1 + net/sched/sch_gred.c | 4 +++- net/sched/sch_hfsc.c | 1 + net/sched/sch_htb.c | 7 +++++-- net/sched/sch_mq.c | 2 +- net/sched/sch_mqprio.c | 5 +++-- net/sched/sch_qfq.c | 1 + 17 files changed, 39 insertions(+), 10 deletions(-) diff --git a/include/net/gen_stats.h b/include/net/gen_stats.h index d47155f5db5d..304d792f7977 100644 --- a/include/net/gen_stats.h +++ b/include/net/gen_stats.h @@ -11,6 +11,7 @@ struct gnet_stats_basic_packed { __u64 bytes; __u64 packets; + struct u64_stats_sync syncp; }; struct gnet_stats_basic_cpu { @@ -34,6 +35,7 @@ struct gnet_dump { struct tc_stats tc_stats; }; +void gnet_stats_basic_packed_init(struct gnet_stats_basic_packed *b); int gnet_stats_start_copy(struct sk_buff *skb, int type, spinlock_t *lock, struct gnet_dump *d, int padattr); diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 7bc2d30b5c06..d7746aea3cec 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -852,8 +852,10 @@ static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, static inline void _bstats_update(struct gnet_stats_basic_packed *bstats, __u64 bytes, __u32 packets) { + u64_stats_update_begin(&bstats->syncp); bstats->bytes += bytes; bstats->packets += packets; + u64_stats_update_end(&bstats->syncp); } static inline void bstats_update(struct gnet_stats_basic_packed *bstats, diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index 205df8b5116e..64978e77368f 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c @@ -62,7 +62,7 @@ struct net_rate_estimator { static void est_fetch_counters(struct net_rate_estimator *e, struct gnet_stats_basic_packed *b) { - memset(b, 0, sizeof(*b)); + gnet_stats_basic_packed_init(b); if (e->stats_lock) spin_lock(e->stats_lock); diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c index 6ec11289140b..f2e12fe7112b 100644 --- a/net/core/gen_stats.c +++ b/net/core/gen_stats.c @@ -18,7 +18,7 @@ #include #include #include - +#include static inline int gnet_stats_copy(struct gnet_dump *d, int type, void *buf, int size, int padattr) @@ -114,6 +114,15 @@ gnet_stats_start_copy(struct sk_buff *skb, int type, spinlock_t *lock, } EXPORT_SYMBOL(gnet_stats_start_copy); +/* Must not be inlined, due to u64_stats seqcount_t lockdep key */ +void gnet_stats_basic_packed_init(struct gnet_stats_basic_packed *b) +{ + b->bytes = 0; + b->packets = 0; + u64_stats_init(&b->syncp); +} +EXPORT_SYMBOL(gnet_stats_basic_packed_init); + static void gnet_stats_add_basic_cpu(struct gnet_stats_basic_packed *bstats, struct gnet_stats_basic_cpu __percpu *cpu) { @@ -167,8 +176,9 @@ ___gnet_stats_copy_basic(const seqcount_t *running, struct gnet_stats_basic_packed *b, int type) { - struct gnet_stats_basic_packed bstats = {0}; + struct gnet_stats_basic_packed bstats; + gnet_stats_basic_packed_init(&bstats); gnet_stats_add_basic(running, &bstats, cpu, b); if (d->compat_tc_stats && type == TCA_STATS_BASIC) { diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c index 0d5c422f8745..d5200725ca62 100644 --- a/net/netfilter/xt_RATEEST.c +++ b/net/netfilter/xt_RATEEST.c @@ -143,6 +143,7 @@ static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par) if (!est) goto err1; + gnet_stats_basic_packed_init(&est->bstats); strlcpy(est->name, info->name, sizeof(est->name)); spin_lock_init(&est->lock); est->refcnt = 1; diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 7dd3a2dc5fa4..0302dad42df1 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -490,6 +490,8 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est, if (!p->cpu_qstats) goto err3; } + gnet_stats_basic_packed_init(&p->tcfa_bstats); + gnet_stats_basic_packed_init(&p->tcfa_bstats_hw); spin_lock_init(&p->tcfa_lock); p->tcfa_index = index; p->tcfa_tm.install = jiffies; diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 7d8518176b45..c8e1771383f9 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -548,6 +548,7 @@ static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt, pr_debug("atm_tc_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt); INIT_LIST_HEAD(&p->flows); INIT_LIST_HEAD(&p->link.list); + gnet_stats_basic_packed_init(&p->link.bstats); list_add(&p->link.list, &p->flows); p->link.q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, sch->handle, extack); diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index e0da15530f0e..d01f6ec315f8 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -1611,6 +1611,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t if (cl == NULL) goto failure; + gnet_stats_basic_packed_init(&cl->bstats); err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack); if (err) { kfree(cl); diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index 642cd179b7a7..319906e19a6b 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -106,6 +106,7 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (cl == NULL) return -ENOBUFS; + gnet_stats_basic_packed_init(&cl->bstats); cl->common.classid = classid; cl->quantum = quantum; cl->qdisc = qdisc_create_dflt(sch->dev_queue, diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c index ed86b7021f6d..83693107371f 100644 --- a/net/sched/sch_ets.c +++ b/net/sched/sch_ets.c @@ -689,7 +689,7 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt, q->classes[i].qdisc = NULL; q->classes[i].quantum = 0; q->classes[i].deficit = 0; - memset(&q->classes[i].bstats, 0, sizeof(q->classes[i].bstats)); + gnet_stats_basic_packed_init(&q->classes[i].bstats); memset(&q->classes[i].qstats, 0, sizeof(q->classes[i].qstats)); } return 0; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 8c64a552a64f..ef27ff3ddee4 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -892,6 +892,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, __skb_queue_head_init(&sch->gso_skb); __skb_queue_head_init(&sch->skb_bad_txq); qdisc_skb_head_init(&sch->q); + gnet_stats_basic_packed_init(&sch->bstats); spin_lock_init(&sch->q.lock); if (ops->static_flags & TCQ_F_CPUSTATS) { diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index 621dc6afde8f..2ddcbb2efdbb 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -364,9 +364,11 @@ static int gred_offload_dump_stats(struct Qdisc *sch) hw_stats->handle = sch->handle; hw_stats->parent = sch->parent; - for (i = 0; i < MAX_DPs; i++) + for (i = 0; i < MAX_DPs; i++) { + gnet_stats_basic_packed_init(&hw_stats->stats.bstats[i]); if (table->tab[i]) hw_stats->stats.xstats[i] = &table->tab[i]->stats; + } ret = qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_GRED, hw_stats); /* Even if driver returns failure adjust the stats - in case offload diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index b7ac30cca035..ff6ff54806fc 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1406,6 +1406,7 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt, if (err) return err; + gnet_stats_basic_packed_init(&q->root.bstats); q->root.cl_common.classid = sch->handle; q->root.sched = q; q->root.qdisc = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 5067a6e5d4fd..2e805b17efcf 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1311,7 +1311,7 @@ static void htb_offload_aggregate_stats(struct htb_sched *q, struct htb_class *c; unsigned int i; - memset(&cl->bstats, 0, sizeof(cl->bstats)); + gnet_stats_basic_packed_init(&cl->bstats); for (i = 0; i < q->clhash.hashsize; i++) { hlist_for_each_entry(c, &q->clhash.hash[i], common.hnode) { @@ -1357,7 +1357,7 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d) if (cl->leaf.q) cl->bstats = cl->leaf.q->bstats; else - memset(&cl->bstats, 0, sizeof(cl->bstats)); + gnet_stats_basic_packed_init(&cl->bstats); cl->bstats.bytes += cl->bstats_bias.bytes; cl->bstats.packets += cl->bstats_bias.packets; } else { @@ -1849,6 +1849,9 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, if (!cl) goto failure; + gnet_stats_basic_packed_init(&cl->bstats); + gnet_stats_basic_packed_init(&cl->bstats_bias); + err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack); if (err) { kfree(cl); diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c index 9d58ecb4e80c..704e14a58f09 100644 --- a/net/sched/sch_mq.c +++ b/net/sched/sch_mq.c @@ -132,7 +132,7 @@ static int mq_dump(struct Qdisc *sch, struct sk_buff *skb) unsigned int ntx; sch->q.qlen = 0; - memset(&sch->bstats, 0, sizeof(sch->bstats)); + gnet_stats_basic_packed_init(&sch->bstats); memset(&sch->qstats, 0, sizeof(sch->qstats)); /* MQ supports lockless qdiscs. However, statistics accounting needs diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index 57427b40f0d2..fe6b4a178fc9 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -390,7 +390,7 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb) unsigned int ntx, tc; sch->q.qlen = 0; - memset(&sch->bstats, 0, sizeof(sch->bstats)); + gnet_stats_basic_packed_init(&sch->bstats); memset(&sch->qstats, 0, sizeof(sch->qstats)); /* MQ supports lockless qdiscs. However, statistics accounting needs @@ -500,10 +500,11 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, int i; __u32 qlen; struct gnet_stats_queue qstats = {0}; - struct gnet_stats_basic_packed bstats = {0}; + struct gnet_stats_basic_packed bstats; struct net_device *dev = qdisc_dev(sch); struct netdev_tc_txq tc = dev->tc_to_txq[cl & TC_BITMASK]; + gnet_stats_basic_packed_init(&bstats); /* Drop lock here it will be reclaimed before touching * statistics this is required because the d->lock we * hold here is the look on dev_queue->qdisc_sleeping diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index 58a9d42b52b8..b6d989b69324 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -465,6 +465,7 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (cl == NULL) return -ENOBUFS; + gnet_stats_basic_packed_init(&cl->bstats); cl->common.classid = classid; cl->deficit = lmax; From f56940daa5a74fb20b5f5487535549949f2d8d0c Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Sat, 16 Oct 2021 10:49:08 +0200 Subject: [PATCH 285/440] net: sched: Use _bstats_update/set() instead of raw writes The Qdisc::running sequence counter, used to protect Qdisc::bstats reads from parallel writes, is in the process of being removed. Qdisc::bstats read/writes will synchronize using an internal u64_stats sync point instead. Modify all bstats writes to use _bstats_update(). This ensures that the internal u64_stats sync point is always acquired and released as appropriate. Signed-off-by: Ahmed S. Darwish Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: David S. Miller --- net/core/gen_stats.c | 9 +++++---- net/sched/sch_cbq.c | 3 +-- net/sched/sch_gred.c | 7 ++++--- net/sched/sch_htb.c | 25 +++++++++++++++---------- net/sched/sch_qfq.c | 3 +-- 5 files changed, 26 insertions(+), 21 deletions(-) diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c index f2e12fe7112b..69576972a25f 100644 --- a/net/core/gen_stats.c +++ b/net/core/gen_stats.c @@ -126,6 +126,7 @@ EXPORT_SYMBOL(gnet_stats_basic_packed_init); static void gnet_stats_add_basic_cpu(struct gnet_stats_basic_packed *bstats, struct gnet_stats_basic_cpu __percpu *cpu) { + u64 t_bytes = 0, t_packets = 0; int i; for_each_possible_cpu(i) { @@ -139,9 +140,10 @@ static void gnet_stats_add_basic_cpu(struct gnet_stats_basic_packed *bstats, packets = bcpu->bstats.packets; } while (u64_stats_fetch_retry_irq(&bcpu->syncp, start)); - bstats->bytes += bytes; - bstats->packets += packets; + t_bytes += bytes; + t_packets += packets; } + _bstats_update(bstats, t_bytes, t_packets); } void gnet_stats_add_basic(const seqcount_t *running, @@ -164,8 +166,7 @@ void gnet_stats_add_basic(const seqcount_t *running, packets = b->packets; } while (running && read_seqcount_retry(running, seq)); - bstats->bytes += bytes; - bstats->packets += packets; + _bstats_update(bstats, bytes, packets); } EXPORT_SYMBOL(gnet_stats_add_basic); diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index d01f6ec315f8..ef9e87175d35 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -565,8 +565,7 @@ cbq_update(struct cbq_sched_data *q) long avgidle = cl->avgidle; long idle; - cl->bstats.packets++; - cl->bstats.bytes += len; + _bstats_update(&cl->bstats, len, 1); /* * (now - last) is total time between packet right edges. diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index 2ddcbb2efdbb..02b03d6d24ea 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -353,6 +353,7 @@ static int gred_offload_dump_stats(struct Qdisc *sch) { struct gred_sched *table = qdisc_priv(sch); struct tc_gred_qopt_offload *hw_stats; + u64 bytes = 0, packets = 0; unsigned int i; int ret; @@ -381,15 +382,15 @@ static int gred_offload_dump_stats(struct Qdisc *sch) table->tab[i]->bytesin += hw_stats->stats.bstats[i].bytes; table->tab[i]->backlog += hw_stats->stats.qstats[i].backlog; - _bstats_update(&sch->bstats, - hw_stats->stats.bstats[i].bytes, - hw_stats->stats.bstats[i].packets); + bytes += hw_stats->stats.bstats[i].bytes; + packets += hw_stats->stats.bstats[i].packets; sch->qstats.qlen += hw_stats->stats.qstats[i].qlen; sch->qstats.backlog += hw_stats->stats.qstats[i].backlog; sch->qstats.drops += hw_stats->stats.qstats[i].drops; sch->qstats.requeues += hw_stats->stats.qstats[i].requeues; sch->qstats.overlimits += hw_stats->stats.qstats[i].overlimits; } + _bstats_update(&sch->bstats, bytes, packets); kfree(hw_stats); return ret; diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 2e805b17efcf..324ecfdf842a 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1308,6 +1308,7 @@ nla_put_failure: static void htb_offload_aggregate_stats(struct htb_sched *q, struct htb_class *cl) { + u64 bytes = 0, packets = 0; struct htb_class *c; unsigned int i; @@ -1323,14 +1324,15 @@ static void htb_offload_aggregate_stats(struct htb_sched *q, if (p != cl) continue; - cl->bstats.bytes += c->bstats_bias.bytes; - cl->bstats.packets += c->bstats_bias.packets; + bytes += c->bstats_bias.bytes; + packets += c->bstats_bias.packets; if (c->level == 0) { - cl->bstats.bytes += c->leaf.q->bstats.bytes; - cl->bstats.packets += c->leaf.q->bstats.packets; + bytes += c->leaf.q->bstats.bytes; + packets += c->leaf.q->bstats.packets; } } } + _bstats_update(&cl->bstats, bytes, packets); } static int @@ -1358,8 +1360,9 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d) cl->bstats = cl->leaf.q->bstats; else gnet_stats_basic_packed_init(&cl->bstats); - cl->bstats.bytes += cl->bstats_bias.bytes; - cl->bstats.packets += cl->bstats_bias.packets; + _bstats_update(&cl->bstats, + cl->bstats_bias.bytes, + cl->bstats_bias.packets); } else { htb_offload_aggregate_stats(q, cl); } @@ -1578,8 +1581,9 @@ static int htb_destroy_class_offload(struct Qdisc *sch, struct htb_class *cl, WARN_ON(old != q); if (cl->parent) { - cl->parent->bstats_bias.bytes += q->bstats.bytes; - cl->parent->bstats_bias.packets += q->bstats.packets; + _bstats_update(&cl->parent->bstats_bias, + q->bstats.bytes, + q->bstats.packets); } offload_opt = (struct tc_htb_qopt_offload) { @@ -1925,8 +1929,9 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, htb_graft_helper(dev_queue, old_q); goto err_kill_estimator; } - parent->bstats_bias.bytes += old_q->bstats.bytes; - parent->bstats_bias.packets += old_q->bstats.packets; + _bstats_update(&parent->bstats_bias, + old_q->bstats.bytes, + old_q->bstats.packets); qdisc_put(old_q); } new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops, diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index b6d989b69324..bea68c91027a 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -1235,8 +1235,7 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, return err; } - cl->bstats.bytes += len; - cl->bstats.packets += gso_segs; + _bstats_update(&cl->bstats, len, gso_segs); sch->qstats.backlog += len; ++sch->q.qlen; From 50dc9a8572aa4d7cdc56670228fcde40289ed289 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Sat, 16 Oct 2021 10:49:09 +0200 Subject: [PATCH 286/440] net: sched: Merge Qdisc::bstats and Qdisc::cpu_bstats data types The only factor differentiating per-CPU bstats data type (struct gnet_stats_basic_cpu) from the packed non-per-CPU one (struct gnet_stats_basic_packed) was a u64_stats sync point inside the former. The two data types are now equivalent: earlier commits added a u64_stats sync point to the latter. Combine both data types into "struct gnet_stats_basic_sync". This eliminates redundancy and simplifies the bstats read/write APIs. Use u64_stats_t for bstats "packets" and "bytes" data types. On 64-bit architectures, u64_stats sync points do not use sequence counter protection. Signed-off-by: Ahmed S. Darwish Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: David S. Miller --- .../net/ethernet/netronome/nfp/abm/qdisc.c | 2 +- include/net/act_api.h | 10 +-- include/net/gen_stats.h | 44 ++++++------- include/net/netfilter/xt_rateest.h | 2 +- include/net/pkt_cls.h | 4 +- include/net/sch_generic.h | 34 +++------- net/core/gen_estimator.c | 36 ++++++----- net/core/gen_stats.c | 62 ++++++++++--------- net/netfilter/xt_RATEEST.c | 8 +-- net/sched/act_api.c | 14 ++--- net/sched/act_bpf.c | 2 +- net/sched/act_ife.c | 4 +- net/sched/act_mpls.c | 2 +- net/sched/act_police.c | 2 +- net/sched/act_sample.c | 2 +- net/sched/act_simple.c | 3 +- net/sched/act_skbedit.c | 2 +- net/sched/act_skbmod.c | 2 +- net/sched/sch_api.c | 2 +- net/sched/sch_atm.c | 4 +- net/sched/sch_cbq.c | 4 +- net/sched/sch_drr.c | 4 +- net/sched/sch_ets.c | 4 +- net/sched/sch_generic.c | 4 +- net/sched/sch_gred.c | 10 +-- net/sched/sch_hfsc.c | 4 +- net/sched/sch_htb.c | 32 +++++----- net/sched/sch_mq.c | 2 +- net/sched/sch_mqprio.c | 6 +- net/sched/sch_qfq.c | 4 +- 30 files changed, 155 insertions(+), 160 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/abm/qdisc.c b/drivers/net/ethernet/netronome/nfp/abm/qdisc.c index 2473fb5f75e5..2a5cc64227e9 100644 --- a/drivers/net/ethernet/netronome/nfp/abm/qdisc.c +++ b/drivers/net/ethernet/netronome/nfp/abm/qdisc.c @@ -458,7 +458,7 @@ nfp_abm_qdisc_graft(struct nfp_abm_link *alink, u32 handle, u32 child_handle, static void nfp_abm_stats_calculate(struct nfp_alink_stats *new, struct nfp_alink_stats *old, - struct gnet_stats_basic_packed *bstats, + struct gnet_stats_basic_sync *bstats, struct gnet_stats_queue *qstats) { _bstats_update(bstats, new->tx_bytes - old->tx_bytes, diff --git a/include/net/act_api.h b/include/net/act_api.h index f19f7f4a463c..b5b624c7e488 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -30,13 +30,13 @@ struct tc_action { atomic_t tcfa_bindcnt; int tcfa_action; struct tcf_t tcfa_tm; - struct gnet_stats_basic_packed tcfa_bstats; - struct gnet_stats_basic_packed tcfa_bstats_hw; + struct gnet_stats_basic_sync tcfa_bstats; + struct gnet_stats_basic_sync tcfa_bstats_hw; struct gnet_stats_queue tcfa_qstats; struct net_rate_estimator __rcu *tcfa_rate_est; spinlock_t tcfa_lock; - struct gnet_stats_basic_cpu __percpu *cpu_bstats; - struct gnet_stats_basic_cpu __percpu *cpu_bstats_hw; + struct gnet_stats_basic_sync __percpu *cpu_bstats; + struct gnet_stats_basic_sync __percpu *cpu_bstats_hw; struct gnet_stats_queue __percpu *cpu_qstats; struct tc_cookie __rcu *act_cookie; struct tcf_chain __rcu *goto_chain; @@ -206,7 +206,7 @@ static inline void tcf_action_update_bstats(struct tc_action *a, struct sk_buff *skb) { if (likely(a->cpu_bstats)) { - bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), skb); + bstats_update(this_cpu_ptr(a->cpu_bstats), skb); return; } spin_lock(&a->tcfa_lock); diff --git a/include/net/gen_stats.h b/include/net/gen_stats.h index 304d792f7977..52b87588f467 100644 --- a/include/net/gen_stats.h +++ b/include/net/gen_stats.h @@ -7,15 +7,17 @@ #include #include -/* Note: this used to be in include/uapi/linux/gen_stats.h */ -struct gnet_stats_basic_packed { - __u64 bytes; - __u64 packets; - struct u64_stats_sync syncp; -}; - -struct gnet_stats_basic_cpu { - struct gnet_stats_basic_packed bstats; +/* Throughput stats. + * Must be initialized beforehand with gnet_stats_basic_sync_init(). + * + * If no reads can ever occur parallel to writes (e.g. stack-allocated + * bstats), then the internal stat values can be written to and read + * from directly. Otherwise, use _bstats_set/update() for writes and + * gnet_stats_add_basic() for reads. + */ +struct gnet_stats_basic_sync { + u64_stats_t bytes; + u64_stats_t packets; struct u64_stats_sync syncp; } __aligned(2 * sizeof(u64)); @@ -35,7 +37,7 @@ struct gnet_dump { struct tc_stats tc_stats; }; -void gnet_stats_basic_packed_init(struct gnet_stats_basic_packed *b); +void gnet_stats_basic_sync_init(struct gnet_stats_basic_sync *b); int gnet_stats_start_copy(struct sk_buff *skb, int type, spinlock_t *lock, struct gnet_dump *d, int padattr); @@ -46,16 +48,16 @@ int gnet_stats_start_copy_compat(struct sk_buff *skb, int type, int gnet_stats_copy_basic(const seqcount_t *running, struct gnet_dump *d, - struct gnet_stats_basic_cpu __percpu *cpu, - struct gnet_stats_basic_packed *b); + struct gnet_stats_basic_sync __percpu *cpu, + struct gnet_stats_basic_sync *b); void gnet_stats_add_basic(const seqcount_t *running, - struct gnet_stats_basic_packed *bstats, - struct gnet_stats_basic_cpu __percpu *cpu, - struct gnet_stats_basic_packed *b); + struct gnet_stats_basic_sync *bstats, + struct gnet_stats_basic_sync __percpu *cpu, + struct gnet_stats_basic_sync *b); int gnet_stats_copy_basic_hw(const seqcount_t *running, struct gnet_dump *d, - struct gnet_stats_basic_cpu __percpu *cpu, - struct gnet_stats_basic_packed *b); + struct gnet_stats_basic_sync __percpu *cpu, + struct gnet_stats_basic_sync *b); int gnet_stats_copy_rate_est(struct gnet_dump *d, struct net_rate_estimator __rcu **ptr); int gnet_stats_copy_queue(struct gnet_dump *d, @@ -68,14 +70,14 @@ int gnet_stats_copy_app(struct gnet_dump *d, void *st, int len); int gnet_stats_finish_copy(struct gnet_dump *d); -int gen_new_estimator(struct gnet_stats_basic_packed *bstats, - struct gnet_stats_basic_cpu __percpu *cpu_bstats, +int gen_new_estimator(struct gnet_stats_basic_sync *bstats, + struct gnet_stats_basic_sync __percpu *cpu_bstats, struct net_rate_estimator __rcu **rate_est, spinlock_t *lock, seqcount_t *running, struct nlattr *opt); void gen_kill_estimator(struct net_rate_estimator __rcu **ptr); -int gen_replace_estimator(struct gnet_stats_basic_packed *bstats, - struct gnet_stats_basic_cpu __percpu *cpu_bstats, +int gen_replace_estimator(struct gnet_stats_basic_sync *bstats, + struct gnet_stats_basic_sync __percpu *cpu_bstats, struct net_rate_estimator __rcu **ptr, spinlock_t *lock, seqcount_t *running, struct nlattr *opt); diff --git a/include/net/netfilter/xt_rateest.h b/include/net/netfilter/xt_rateest.h index 832ab69efda5..4c3809e141f4 100644 --- a/include/net/netfilter/xt_rateest.h +++ b/include/net/netfilter/xt_rateest.h @@ -6,7 +6,7 @@ struct xt_rateest { /* keep lock and bstats on same cache line to speedup xt_rateest_tg() */ - struct gnet_stats_basic_packed bstats; + struct gnet_stats_basic_sync bstats; spinlock_t lock; diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 83a6d0792180..4a5833108083 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -765,7 +765,7 @@ struct tc_cookie { }; struct tc_qopt_offload_stats { - struct gnet_stats_basic_packed *bstats; + struct gnet_stats_basic_sync *bstats; struct gnet_stats_queue *qstats; }; @@ -885,7 +885,7 @@ struct tc_gred_qopt_offload_params { }; struct tc_gred_qopt_offload_stats { - struct gnet_stats_basic_packed bstats[MAX_DPs]; + struct gnet_stats_basic_sync bstats[MAX_DPs]; struct gnet_stats_queue qstats[MAX_DPs]; struct red_stats *xstats[MAX_DPs]; }; diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index d7746aea3cec..7882e3aa6448 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -97,7 +97,7 @@ struct Qdisc { struct netdev_queue *dev_queue; struct net_rate_estimator __rcu *rate_est; - struct gnet_stats_basic_cpu __percpu *cpu_bstats; + struct gnet_stats_basic_sync __percpu *cpu_bstats; struct gnet_stats_queue __percpu *cpu_qstats; int pad; refcount_t refcnt; @@ -107,7 +107,7 @@ struct Qdisc { */ struct sk_buff_head gso_skb ____cacheline_aligned_in_smp; struct qdisc_skb_head q; - struct gnet_stats_basic_packed bstats; + struct gnet_stats_basic_sync bstats; seqcount_t running; struct gnet_stats_queue qstats; unsigned long state; @@ -849,16 +849,16 @@ static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, return sch->enqueue(skb, sch, to_free); } -static inline void _bstats_update(struct gnet_stats_basic_packed *bstats, +static inline void _bstats_update(struct gnet_stats_basic_sync *bstats, __u64 bytes, __u32 packets) { u64_stats_update_begin(&bstats->syncp); - bstats->bytes += bytes; - bstats->packets += packets; + u64_stats_add(&bstats->bytes, bytes); + u64_stats_add(&bstats->packets, packets); u64_stats_update_end(&bstats->syncp); } -static inline void bstats_update(struct gnet_stats_basic_packed *bstats, +static inline void bstats_update(struct gnet_stats_basic_sync *bstats, const struct sk_buff *skb) { _bstats_update(bstats, @@ -866,26 +866,10 @@ static inline void bstats_update(struct gnet_stats_basic_packed *bstats, skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1); } -static inline void _bstats_cpu_update(struct gnet_stats_basic_cpu *bstats, - __u64 bytes, __u32 packets) -{ - u64_stats_update_begin(&bstats->syncp); - _bstats_update(&bstats->bstats, bytes, packets); - u64_stats_update_end(&bstats->syncp); -} - -static inline void bstats_cpu_update(struct gnet_stats_basic_cpu *bstats, - const struct sk_buff *skb) -{ - u64_stats_update_begin(&bstats->syncp); - bstats_update(&bstats->bstats, skb); - u64_stats_update_end(&bstats->syncp); -} - static inline void qdisc_bstats_cpu_update(struct Qdisc *sch, const struct sk_buff *skb) { - bstats_cpu_update(this_cpu_ptr(sch->cpu_bstats), skb); + bstats_update(this_cpu_ptr(sch->cpu_bstats), skb); } static inline void qdisc_bstats_update(struct Qdisc *sch, @@ -1317,7 +1301,7 @@ void psched_ppscfg_precompute(struct psched_pktrate *r, u64 pktrate64); struct mini_Qdisc { struct tcf_proto *filter_list; struct tcf_block *block; - struct gnet_stats_basic_cpu __percpu *cpu_bstats; + struct gnet_stats_basic_sync __percpu *cpu_bstats; struct gnet_stats_queue __percpu *cpu_qstats; struct rcu_head rcu; }; @@ -1325,7 +1309,7 @@ struct mini_Qdisc { static inline void mini_qdisc_bstats_cpu_update(struct mini_Qdisc *miniq, const struct sk_buff *skb) { - bstats_cpu_update(this_cpu_ptr(miniq->cpu_bstats), skb); + bstats_update(this_cpu_ptr(miniq->cpu_bstats), skb); } static inline void mini_qdisc_qstats_cpu_drop(struct mini_Qdisc *miniq) diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index 64978e77368f..a73ad0bf324c 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c @@ -40,10 +40,10 @@ */ struct net_rate_estimator { - struct gnet_stats_basic_packed *bstats; + struct gnet_stats_basic_sync *bstats; spinlock_t *stats_lock; seqcount_t *running; - struct gnet_stats_basic_cpu __percpu *cpu_bstats; + struct gnet_stats_basic_sync __percpu *cpu_bstats; u8 ewma_log; u8 intvl_log; /* period : (250ms << intvl_log) */ @@ -60,9 +60,9 @@ struct net_rate_estimator { }; static void est_fetch_counters(struct net_rate_estimator *e, - struct gnet_stats_basic_packed *b) + struct gnet_stats_basic_sync *b) { - gnet_stats_basic_packed_init(b); + gnet_stats_basic_sync_init(b); if (e->stats_lock) spin_lock(e->stats_lock); @@ -76,14 +76,18 @@ static void est_fetch_counters(struct net_rate_estimator *e, static void est_timer(struct timer_list *t) { struct net_rate_estimator *est = from_timer(est, t, timer); - struct gnet_stats_basic_packed b; + struct gnet_stats_basic_sync b; + u64 b_bytes, b_packets; u64 rate, brate; est_fetch_counters(est, &b); - brate = (b.bytes - est->last_bytes) << (10 - est->intvl_log); + b_bytes = u64_stats_read(&b.bytes); + b_packets = u64_stats_read(&b.packets); + + brate = (b_bytes - est->last_bytes) << (10 - est->intvl_log); brate = (brate >> est->ewma_log) - (est->avbps >> est->ewma_log); - rate = (b.packets - est->last_packets) << (10 - est->intvl_log); + rate = (b_packets - est->last_packets) << (10 - est->intvl_log); rate = (rate >> est->ewma_log) - (est->avpps >> est->ewma_log); write_seqcount_begin(&est->seq); @@ -91,8 +95,8 @@ static void est_timer(struct timer_list *t) est->avpps += rate; write_seqcount_end(&est->seq); - est->last_bytes = b.bytes; - est->last_packets = b.packets; + est->last_bytes = b_bytes; + est->last_packets = b_packets; est->next_jiffies += ((HZ/4) << est->intvl_log); @@ -121,8 +125,8 @@ static void est_timer(struct timer_list *t) * Returns 0 on success or a negative error code. * */ -int gen_new_estimator(struct gnet_stats_basic_packed *bstats, - struct gnet_stats_basic_cpu __percpu *cpu_bstats, +int gen_new_estimator(struct gnet_stats_basic_sync *bstats, + struct gnet_stats_basic_sync __percpu *cpu_bstats, struct net_rate_estimator __rcu **rate_est, spinlock_t *lock, seqcount_t *running, @@ -130,7 +134,7 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats, { struct gnet_estimator *parm = nla_data(opt); struct net_rate_estimator *old, *est; - struct gnet_stats_basic_packed b; + struct gnet_stats_basic_sync b; int intvl_log; if (nla_len(opt) < sizeof(*parm)) @@ -164,8 +168,8 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats, est_fetch_counters(est, &b); if (lock) local_bh_enable(); - est->last_bytes = b.bytes; - est->last_packets = b.packets; + est->last_bytes = u64_stats_read(&b.bytes); + est->last_packets = u64_stats_read(&b.packets); if (lock) spin_lock_bh(lock); @@ -222,8 +226,8 @@ EXPORT_SYMBOL(gen_kill_estimator); * * Returns 0 on success or a negative error code. */ -int gen_replace_estimator(struct gnet_stats_basic_packed *bstats, - struct gnet_stats_basic_cpu __percpu *cpu_bstats, +int gen_replace_estimator(struct gnet_stats_basic_sync *bstats, + struct gnet_stats_basic_sync __percpu *cpu_bstats, struct net_rate_estimator __rcu **rate_est, spinlock_t *lock, seqcount_t *running, struct nlattr *opt) diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c index 69576972a25f..5f57f761def6 100644 --- a/net/core/gen_stats.c +++ b/net/core/gen_stats.c @@ -115,29 +115,29 @@ gnet_stats_start_copy(struct sk_buff *skb, int type, spinlock_t *lock, EXPORT_SYMBOL(gnet_stats_start_copy); /* Must not be inlined, due to u64_stats seqcount_t lockdep key */ -void gnet_stats_basic_packed_init(struct gnet_stats_basic_packed *b) +void gnet_stats_basic_sync_init(struct gnet_stats_basic_sync *b) { - b->bytes = 0; - b->packets = 0; + u64_stats_set(&b->bytes, 0); + u64_stats_set(&b->packets, 0); u64_stats_init(&b->syncp); } -EXPORT_SYMBOL(gnet_stats_basic_packed_init); +EXPORT_SYMBOL(gnet_stats_basic_sync_init); -static void gnet_stats_add_basic_cpu(struct gnet_stats_basic_packed *bstats, - struct gnet_stats_basic_cpu __percpu *cpu) +static void gnet_stats_add_basic_cpu(struct gnet_stats_basic_sync *bstats, + struct gnet_stats_basic_sync __percpu *cpu) { u64 t_bytes = 0, t_packets = 0; int i; for_each_possible_cpu(i) { - struct gnet_stats_basic_cpu *bcpu = per_cpu_ptr(cpu, i); + struct gnet_stats_basic_sync *bcpu = per_cpu_ptr(cpu, i); unsigned int start; u64 bytes, packets; do { start = u64_stats_fetch_begin_irq(&bcpu->syncp); - bytes = bcpu->bstats.bytes; - packets = bcpu->bstats.packets; + bytes = u64_stats_read(&bcpu->bytes); + packets = u64_stats_read(&bcpu->packets); } while (u64_stats_fetch_retry_irq(&bcpu->syncp, start)); t_bytes += bytes; @@ -147,9 +147,9 @@ static void gnet_stats_add_basic_cpu(struct gnet_stats_basic_packed *bstats, } void gnet_stats_add_basic(const seqcount_t *running, - struct gnet_stats_basic_packed *bstats, - struct gnet_stats_basic_cpu __percpu *cpu, - struct gnet_stats_basic_packed *b) + struct gnet_stats_basic_sync *bstats, + struct gnet_stats_basic_sync __percpu *cpu, + struct gnet_stats_basic_sync *b) { unsigned int seq; u64 bytes = 0; @@ -162,8 +162,8 @@ void gnet_stats_add_basic(const seqcount_t *running, do { if (running) seq = read_seqcount_begin(running); - bytes = b->bytes; - packets = b->packets; + bytes = u64_stats_read(&b->bytes); + packets = u64_stats_read(&b->packets); } while (running && read_seqcount_retry(running, seq)); _bstats_update(bstats, bytes, packets); @@ -173,18 +173,22 @@ EXPORT_SYMBOL(gnet_stats_add_basic); static int ___gnet_stats_copy_basic(const seqcount_t *running, struct gnet_dump *d, - struct gnet_stats_basic_cpu __percpu *cpu, - struct gnet_stats_basic_packed *b, + struct gnet_stats_basic_sync __percpu *cpu, + struct gnet_stats_basic_sync *b, int type) { - struct gnet_stats_basic_packed bstats; + struct gnet_stats_basic_sync bstats; + u64 bstats_bytes, bstats_packets; - gnet_stats_basic_packed_init(&bstats); + gnet_stats_basic_sync_init(&bstats); gnet_stats_add_basic(running, &bstats, cpu, b); + bstats_bytes = u64_stats_read(&bstats.bytes); + bstats_packets = u64_stats_read(&bstats.packets); + if (d->compat_tc_stats && type == TCA_STATS_BASIC) { - d->tc_stats.bytes = bstats.bytes; - d->tc_stats.packets = bstats.packets; + d->tc_stats.bytes = bstats_bytes; + d->tc_stats.packets = bstats_packets; } if (d->tail) { @@ -192,14 +196,14 @@ ___gnet_stats_copy_basic(const seqcount_t *running, int res; memset(&sb, 0, sizeof(sb)); - sb.bytes = bstats.bytes; - sb.packets = bstats.packets; + sb.bytes = bstats_bytes; + sb.packets = bstats_packets; res = gnet_stats_copy(d, type, &sb, sizeof(sb), TCA_STATS_PAD); - if (res < 0 || sb.packets == bstats.packets) + if (res < 0 || sb.packets == bstats_packets) return res; /* emit 64bit stats only if needed */ - return gnet_stats_copy(d, TCA_STATS_PKT64, &bstats.packets, - sizeof(bstats.packets), TCA_STATS_PAD); + return gnet_stats_copy(d, TCA_STATS_PKT64, &bstats_packets, + sizeof(bstats_packets), TCA_STATS_PAD); } return 0; } @@ -220,8 +224,8 @@ ___gnet_stats_copy_basic(const seqcount_t *running, int gnet_stats_copy_basic(const seqcount_t *running, struct gnet_dump *d, - struct gnet_stats_basic_cpu __percpu *cpu, - struct gnet_stats_basic_packed *b) + struct gnet_stats_basic_sync __percpu *cpu, + struct gnet_stats_basic_sync *b) { return ___gnet_stats_copy_basic(running, d, cpu, b, TCA_STATS_BASIC); @@ -244,8 +248,8 @@ EXPORT_SYMBOL(gnet_stats_copy_basic); int gnet_stats_copy_basic_hw(const seqcount_t *running, struct gnet_dump *d, - struct gnet_stats_basic_cpu __percpu *cpu, - struct gnet_stats_basic_packed *b) + struct gnet_stats_basic_sync __percpu *cpu, + struct gnet_stats_basic_sync *b) { return ___gnet_stats_copy_basic(running, d, cpu, b, TCA_STATS_BASIC_HW); diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c index d5200725ca62..8aec1b529364 100644 --- a/net/netfilter/xt_RATEEST.c +++ b/net/netfilter/xt_RATEEST.c @@ -94,11 +94,11 @@ static unsigned int xt_rateest_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_rateest_target_info *info = par->targinfo; - struct gnet_stats_basic_packed *stats = &info->est->bstats; + struct gnet_stats_basic_sync *stats = &info->est->bstats; spin_lock_bh(&info->est->lock); - stats->bytes += skb->len; - stats->packets++; + u64_stats_add(&stats->bytes, skb->len); + u64_stats_inc(&stats->packets); spin_unlock_bh(&info->est->lock); return XT_CONTINUE; @@ -143,7 +143,7 @@ static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par) if (!est) goto err1; - gnet_stats_basic_packed_init(&est->bstats); + gnet_stats_basic_sync_init(&est->bstats); strlcpy(est->name, info->name, sizeof(est->name)); spin_lock_init(&est->lock); est->refcnt = 1; diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 0302dad42df1..585829ffa0c4 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -480,18 +480,18 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est, atomic_set(&p->tcfa_bindcnt, 1); if (cpustats) { - p->cpu_bstats = netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu); + p->cpu_bstats = netdev_alloc_pcpu_stats(struct gnet_stats_basic_sync); if (!p->cpu_bstats) goto err1; - p->cpu_bstats_hw = netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu); + p->cpu_bstats_hw = netdev_alloc_pcpu_stats(struct gnet_stats_basic_sync); if (!p->cpu_bstats_hw) goto err2; p->cpu_qstats = alloc_percpu(struct gnet_stats_queue); if (!p->cpu_qstats) goto err3; } - gnet_stats_basic_packed_init(&p->tcfa_bstats); - gnet_stats_basic_packed_init(&p->tcfa_bstats_hw); + gnet_stats_basic_sync_init(&p->tcfa_bstats); + gnet_stats_basic_sync_init(&p->tcfa_bstats_hw); spin_lock_init(&p->tcfa_lock); p->tcfa_index = index; p->tcfa_tm.install = jiffies; @@ -1128,13 +1128,13 @@ void tcf_action_update_stats(struct tc_action *a, u64 bytes, u64 packets, u64 drops, bool hw) { if (a->cpu_bstats) { - _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets); + _bstats_update(this_cpu_ptr(a->cpu_bstats), bytes, packets); this_cpu_ptr(a->cpu_qstats)->drops += drops; if (hw) - _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats_hw), - bytes, packets); + _bstats_update(this_cpu_ptr(a->cpu_bstats_hw), + bytes, packets); return; } diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index 5c36013339e1..f2bf896331a5 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -41,7 +41,7 @@ static int tcf_bpf_act(struct sk_buff *skb, const struct tc_action *act, int action, filter_res; tcf_lastuse_update(&prog->tcf_tm); - bstats_cpu_update(this_cpu_ptr(prog->common.cpu_bstats), skb); + bstats_update(this_cpu_ptr(prog->common.cpu_bstats), skb); filter = rcu_dereference(prog->filter); if (at_ingress) { diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 7064a365a1a9..b757f90a2d58 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -718,7 +718,7 @@ static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a, u8 *tlv_data; u16 metalen; - bstats_cpu_update(this_cpu_ptr(ife->common.cpu_bstats), skb); + bstats_update(this_cpu_ptr(ife->common.cpu_bstats), skb); tcf_lastuse_update(&ife->tcf_tm); if (skb_at_tc_ingress(skb)) @@ -806,7 +806,7 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a, exceed_mtu = true; } - bstats_cpu_update(this_cpu_ptr(ife->common.cpu_bstats), skb); + bstats_update(this_cpu_ptr(ife->common.cpu_bstats), skb); tcf_lastuse_update(&ife->tcf_tm); if (!metalen) { /* no metadata to send */ diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c index e4529b428cf4..8faa4c58305e 100644 --- a/net/sched/act_mpls.c +++ b/net/sched/act_mpls.c @@ -59,7 +59,7 @@ static int tcf_mpls_act(struct sk_buff *skb, const struct tc_action *a, int ret, mac_len; tcf_lastuse_update(&m->tcf_tm); - bstats_cpu_update(this_cpu_ptr(m->common.cpu_bstats), skb); + bstats_update(this_cpu_ptr(m->common.cpu_bstats), skb); /* Ensure 'data' points at mac_header prior calling mpls manipulating * functions. diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 832157a840fc..c9383805222d 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -248,7 +248,7 @@ static int tcf_police_act(struct sk_buff *skb, const struct tc_action *a, int ret; tcf_lastuse_update(&police->tcf_tm); - bstats_cpu_update(this_cpu_ptr(police->common.cpu_bstats), skb); + bstats_update(this_cpu_ptr(police->common.cpu_bstats), skb); ret = READ_ONCE(police->tcf_action); p = rcu_dereference_bh(police->params); diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index 230501eb9e06..ce859b0e0deb 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c @@ -163,7 +163,7 @@ static int tcf_sample_act(struct sk_buff *skb, const struct tc_action *a, int retval; tcf_lastuse_update(&s->tcf_tm); - bstats_cpu_update(this_cpu_ptr(s->common.cpu_bstats), skb); + bstats_update(this_cpu_ptr(s->common.cpu_bstats), skb); retval = READ_ONCE(s->tcf_action); psample_group = rcu_dereference_bh(s->psample_group); diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index cbbe1861d3a2..e617ab4505ca 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -36,7 +36,8 @@ static int tcf_simp_act(struct sk_buff *skb, const struct tc_action *a, * then it would look like "hello_3" (without quotes) */ pr_info("simple: %s_%llu\n", - (char *)d->tcfd_defdata, d->tcf_bstats.packets); + (char *)d->tcfd_defdata, + u64_stats_read(&d->tcf_bstats.packets)); spin_unlock(&d->tcf_lock); return d->tcf_action; } diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index 605418538347..d30ecbfc8f84 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -31,7 +31,7 @@ static int tcf_skbedit_act(struct sk_buff *skb, const struct tc_action *a, int action; tcf_lastuse_update(&d->tcf_tm); - bstats_cpu_update(this_cpu_ptr(d->common.cpu_bstats), skb); + bstats_update(this_cpu_ptr(d->common.cpu_bstats), skb); params = rcu_dereference_bh(d->params); action = READ_ONCE(d->tcf_action); diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c index ecb9ee666095..9b6b52c5e24e 100644 --- a/net/sched/act_skbmod.c +++ b/net/sched/act_skbmod.c @@ -31,7 +31,7 @@ static int tcf_skbmod_act(struct sk_buff *skb, const struct tc_action *a, u64 flags; tcf_lastuse_update(&d->tcf_tm); - bstats_cpu_update(this_cpu_ptr(d->common.cpu_bstats), skb); + bstats_update(this_cpu_ptr(d->common.cpu_bstats), skb); action = READ_ONCE(d->tcf_action); if (unlikely(action == TC_ACT_SHOT)) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 91820f67275c..70f006cbf212 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -885,7 +885,7 @@ static void qdisc_offload_graft_root(struct net_device *dev, static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, u32 portid, u32 seq, u16 flags, int event) { - struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL; + struct gnet_stats_basic_sync __percpu *cpu_bstats = NULL; struct gnet_stats_queue __percpu *cpu_qstats = NULL; struct tcmsg *tcm; struct nlmsghdr *nlh; diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index c8e1771383f9..fbfe4ce9497b 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -52,7 +52,7 @@ struct atm_flow_data { struct atm_qdisc_data *parent; /* parent qdisc */ struct socket *sock; /* for closing */ int ref; /* reference count */ - struct gnet_stats_basic_packed bstats; + struct gnet_stats_basic_sync bstats; struct gnet_stats_queue qstats; struct list_head list; struct atm_flow_data *excess; /* flow for excess traffic; @@ -548,7 +548,7 @@ static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt, pr_debug("atm_tc_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt); INIT_LIST_HEAD(&p->flows); INIT_LIST_HEAD(&p->link.list); - gnet_stats_basic_packed_init(&p->link.bstats); + gnet_stats_basic_sync_init(&p->link.bstats); list_add(&p->link.list, &p->flows); p->link.q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, sch->handle, extack); diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index ef9e87175d35..f0b1282fae11 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -116,7 +116,7 @@ struct cbq_class { long avgidle; long deficit; /* Saved deficit for WRR */ psched_time_t penalized; - struct gnet_stats_basic_packed bstats; + struct gnet_stats_basic_sync bstats; struct gnet_stats_queue qstats; struct net_rate_estimator __rcu *rate_est; struct tc_cbq_xstats xstats; @@ -1610,7 +1610,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t if (cl == NULL) goto failure; - gnet_stats_basic_packed_init(&cl->bstats); + gnet_stats_basic_sync_init(&cl->bstats); err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack); if (err) { kfree(cl); diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index 319906e19a6b..7243617a3595 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -19,7 +19,7 @@ struct drr_class { struct Qdisc_class_common common; unsigned int filter_cnt; - struct gnet_stats_basic_packed bstats; + struct gnet_stats_basic_sync bstats; struct gnet_stats_queue qstats; struct net_rate_estimator __rcu *rate_est; struct list_head alist; @@ -106,7 +106,7 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (cl == NULL) return -ENOBUFS; - gnet_stats_basic_packed_init(&cl->bstats); + gnet_stats_basic_sync_init(&cl->bstats); cl->common.classid = classid; cl->quantum = quantum; cl->qdisc = qdisc_create_dflt(sch->dev_queue, diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c index 83693107371f..af56d155e7fc 100644 --- a/net/sched/sch_ets.c +++ b/net/sched/sch_ets.c @@ -41,7 +41,7 @@ struct ets_class { struct Qdisc *qdisc; u32 quantum; u32 deficit; - struct gnet_stats_basic_packed bstats; + struct gnet_stats_basic_sync bstats; struct gnet_stats_queue qstats; }; @@ -689,7 +689,7 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt, q->classes[i].qdisc = NULL; q->classes[i].quantum = 0; q->classes[i].deficit = 0; - gnet_stats_basic_packed_init(&q->classes[i].bstats); + gnet_stats_basic_sync_init(&q->classes[i].bstats); memset(&q->classes[i].qstats, 0, sizeof(q->classes[i].qstats)); } return 0; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index ef27ff3ddee4..989186e7f1a0 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -892,12 +892,12 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, __skb_queue_head_init(&sch->gso_skb); __skb_queue_head_init(&sch->skb_bad_txq); qdisc_skb_head_init(&sch->q); - gnet_stats_basic_packed_init(&sch->bstats); + gnet_stats_basic_sync_init(&sch->bstats); spin_lock_init(&sch->q.lock); if (ops->static_flags & TCQ_F_CPUSTATS) { sch->cpu_bstats = - netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu); + netdev_alloc_pcpu_stats(struct gnet_stats_basic_sync); if (!sch->cpu_bstats) goto errout1; diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index 02b03d6d24ea..72de08ef8335 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -366,7 +366,7 @@ static int gred_offload_dump_stats(struct Qdisc *sch) hw_stats->parent = sch->parent; for (i = 0; i < MAX_DPs; i++) { - gnet_stats_basic_packed_init(&hw_stats->stats.bstats[i]); + gnet_stats_basic_sync_init(&hw_stats->stats.bstats[i]); if (table->tab[i]) hw_stats->stats.xstats[i] = &table->tab[i]->stats; } @@ -378,12 +378,12 @@ static int gred_offload_dump_stats(struct Qdisc *sch) for (i = 0; i < MAX_DPs; i++) { if (!table->tab[i]) continue; - table->tab[i]->packetsin += hw_stats->stats.bstats[i].packets; - table->tab[i]->bytesin += hw_stats->stats.bstats[i].bytes; + table->tab[i]->packetsin += u64_stats_read(&hw_stats->stats.bstats[i].packets); + table->tab[i]->bytesin += u64_stats_read(&hw_stats->stats.bstats[i].bytes); table->tab[i]->backlog += hw_stats->stats.qstats[i].backlog; - bytes += hw_stats->stats.bstats[i].bytes; - packets += hw_stats->stats.bstats[i].packets; + bytes += u64_stats_read(&hw_stats->stats.bstats[i].bytes); + packets += u64_stats_read(&hw_stats->stats.bstats[i].packets); sch->qstats.qlen += hw_stats->stats.qstats[i].qlen; sch->qstats.backlog += hw_stats->stats.qstats[i].backlog; sch->qstats.drops += hw_stats->stats.qstats[i].drops; diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index ff6ff54806fc..181c2905ff98 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -111,7 +111,7 @@ enum hfsc_class_flags { struct hfsc_class { struct Qdisc_class_common cl_common; - struct gnet_stats_basic_packed bstats; + struct gnet_stats_basic_sync bstats; struct gnet_stats_queue qstats; struct net_rate_estimator __rcu *rate_est; struct tcf_proto __rcu *filter_list; /* filter list */ @@ -1406,7 +1406,7 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt, if (err) return err; - gnet_stats_basic_packed_init(&q->root.bstats); + gnet_stats_basic_sync_init(&q->root.bstats); q->root.cl_common.classid = sch->handle; q->root.sched = q; q->root.qdisc = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 324ecfdf842a..adceb9e210f6 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -113,8 +113,8 @@ struct htb_class { /* * Written often fields */ - struct gnet_stats_basic_packed bstats; - struct gnet_stats_basic_packed bstats_bias; + struct gnet_stats_basic_sync bstats; + struct gnet_stats_basic_sync bstats_bias; struct tc_htb_xstats xstats; /* our special stats */ /* token bucket parameters */ @@ -1312,7 +1312,7 @@ static void htb_offload_aggregate_stats(struct htb_sched *q, struct htb_class *c; unsigned int i; - gnet_stats_basic_packed_init(&cl->bstats); + gnet_stats_basic_sync_init(&cl->bstats); for (i = 0; i < q->clhash.hashsize; i++) { hlist_for_each_entry(c, &q->clhash.hash[i], common.hnode) { @@ -1324,11 +1324,11 @@ static void htb_offload_aggregate_stats(struct htb_sched *q, if (p != cl) continue; - bytes += c->bstats_bias.bytes; - packets += c->bstats_bias.packets; + bytes += u64_stats_read(&c->bstats_bias.bytes); + packets += u64_stats_read(&c->bstats_bias.packets); if (c->level == 0) { - bytes += c->leaf.q->bstats.bytes; - packets += c->leaf.q->bstats.packets; + bytes += u64_stats_read(&c->leaf.q->bstats.bytes); + packets += u64_stats_read(&c->leaf.q->bstats.packets); } } } @@ -1359,10 +1359,10 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d) if (cl->leaf.q) cl->bstats = cl->leaf.q->bstats; else - gnet_stats_basic_packed_init(&cl->bstats); + gnet_stats_basic_sync_init(&cl->bstats); _bstats_update(&cl->bstats, - cl->bstats_bias.bytes, - cl->bstats_bias.packets); + u64_stats_read(&cl->bstats_bias.bytes), + u64_stats_read(&cl->bstats_bias.packets)); } else { htb_offload_aggregate_stats(q, cl); } @@ -1582,8 +1582,8 @@ static int htb_destroy_class_offload(struct Qdisc *sch, struct htb_class *cl, if (cl->parent) { _bstats_update(&cl->parent->bstats_bias, - q->bstats.bytes, - q->bstats.packets); + u64_stats_read(&q->bstats.bytes), + u64_stats_read(&q->bstats.packets)); } offload_opt = (struct tc_htb_qopt_offload) { @@ -1853,8 +1853,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, if (!cl) goto failure; - gnet_stats_basic_packed_init(&cl->bstats); - gnet_stats_basic_packed_init(&cl->bstats_bias); + gnet_stats_basic_sync_init(&cl->bstats); + gnet_stats_basic_sync_init(&cl->bstats_bias); err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack); if (err) { @@ -1930,8 +1930,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, goto err_kill_estimator; } _bstats_update(&parent->bstats_bias, - old_q->bstats.bytes, - old_q->bstats.packets); + u64_stats_read(&old_q->bstats.bytes), + u64_stats_read(&old_q->bstats.packets)); qdisc_put(old_q); } new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops, diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c index 704e14a58f09..cedd0b3ef9cf 100644 --- a/net/sched/sch_mq.c +++ b/net/sched/sch_mq.c @@ -132,7 +132,7 @@ static int mq_dump(struct Qdisc *sch, struct sk_buff *skb) unsigned int ntx; sch->q.qlen = 0; - gnet_stats_basic_packed_init(&sch->bstats); + gnet_stats_basic_sync_init(&sch->bstats); memset(&sch->qstats, 0, sizeof(sch->qstats)); /* MQ supports lockless qdiscs. However, statistics accounting needs diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index fe6b4a178fc9..3f7f756f92ca 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -390,7 +390,7 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb) unsigned int ntx, tc; sch->q.qlen = 0; - gnet_stats_basic_packed_init(&sch->bstats); + gnet_stats_basic_sync_init(&sch->bstats); memset(&sch->qstats, 0, sizeof(sch->qstats)); /* MQ supports lockless qdiscs. However, statistics accounting needs @@ -500,11 +500,11 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, int i; __u32 qlen; struct gnet_stats_queue qstats = {0}; - struct gnet_stats_basic_packed bstats; + struct gnet_stats_basic_sync bstats; struct net_device *dev = qdisc_dev(sch); struct netdev_tc_txq tc = dev->tc_to_txq[cl & TC_BITMASK]; - gnet_stats_basic_packed_init(&bstats); + gnet_stats_basic_sync_init(&bstats); /* Drop lock here it will be reclaimed before touching * statistics this is required because the d->lock we * hold here is the look on dev_queue->qdisc_sleeping diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index bea68c91027a..a35200f591a2 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -131,7 +131,7 @@ struct qfq_class { unsigned int filter_cnt; - struct gnet_stats_basic_packed bstats; + struct gnet_stats_basic_sync bstats; struct gnet_stats_queue qstats; struct net_rate_estimator __rcu *rate_est; struct Qdisc *qdisc; @@ -465,7 +465,7 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (cl == NULL) return -ENOBUFS; - gnet_stats_basic_packed_init(&cl->bstats); + gnet_stats_basic_sync_init(&cl->bstats); cl->common.classid = classid; cl->deficit = lmax; From 29cbcd85828372333aa87542c51f2b2b0fd4380c Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Sat, 16 Oct 2021 10:49:10 +0200 Subject: [PATCH 287/440] net: sched: Remove Qdisc::running sequence counter The Qdisc::running sequence counter has two uses: 1. Reliably reading qdisc's tc statistics while the qdisc is running (a seqcount read/retry loop at gnet_stats_add_basic()). 2. As a flag, indicating whether the qdisc in question is running (without any retry loops). For the first usage, the Qdisc::running sequence counter write section, qdisc_run_begin() => qdisc_run_end(), covers a much wider area than what is actually needed: the raw qdisc's bstats update. A u64_stats sync point was thus introduced (in previous commits) inside the bstats structure itself. A local u64_stats write section is then started and stopped for the bstats updates. Use that u64_stats sync point mechanism for the bstats read/retry loop at gnet_stats_add_basic(). For the second qdisc->running usage, a __QDISC_STATE_RUNNING bit flag, accessed with atomic bitops, is sufficient. Using a bit flag instead of a sequence counter at qdisc_run_begin/end() and qdisc_is_running() leads to the SMP barriers implicitly added through raw_read_seqcount() and write_seqcount_begin/end() getting removed. All call sites have been surveyed though, and no required ordering was identified. Now that the qdisc->running sequence counter is no longer used, remove it. Note, using u64_stats implies no sequence counter protection for 64-bit architectures. This can lead to the qdisc tc statistics "packets" vs. "bytes" values getting out of sync on rare occasions. The individual values will still be valid. Signed-off-by: Ahmed S. Darwish Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 ---- include/net/gen_stats.h | 19 +++++++-------- include/net/sch_generic.h | 33 +++++++++++--------------- net/core/gen_estimator.c | 16 ++++++++----- net/core/gen_stats.c | 50 ++++++++++++++++++++++----------------- net/sched/act_api.c | 9 +++---- net/sched/act_police.c | 2 +- net/sched/sch_api.c | 16 +++---------- net/sched/sch_atm.c | 3 +-- net/sched/sch_cbq.c | 9 +++---- net/sched/sch_drr.c | 10 +++----- net/sched/sch_ets.c | 3 +-- net/sched/sch_generic.c | 10 ++------ net/sched/sch_hfsc.c | 8 +++---- net/sched/sch_htb.c | 7 +++--- net/sched/sch_mq.c | 7 +++--- net/sched/sch_mqprio.c | 14 +++++------ net/sched/sch_multiq.c | 3 +-- net/sched/sch_prio.c | 4 ++-- net/sched/sch_qfq.c | 7 +++--- net/sched/sch_taprio.c | 2 +- 21 files changed, 102 insertions(+), 134 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 173984414f38..f9cd6fea213f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1916,7 +1916,6 @@ enum netdev_ml_priv_type { * @sfp_bus: attached &struct sfp_bus structure. * * @qdisc_tx_busylock: lockdep class annotating Qdisc->busylock spinlock - * @qdisc_running_key: lockdep class annotating Qdisc->running seqcount * * @proto_down: protocol port state information can be sent to the * switch driver and used to set the phys state of the @@ -2250,7 +2249,6 @@ struct net_device { struct phy_device *phydev; struct sfp_bus *sfp_bus; struct lock_class_key *qdisc_tx_busylock; - struct lock_class_key *qdisc_running_key; bool proto_down; unsigned wol_enabled:1; unsigned threaded:1; @@ -2360,13 +2358,11 @@ static inline void netdev_for_each_tx_queue(struct net_device *dev, #define netdev_lockdep_set_classes(dev) \ { \ static struct lock_class_key qdisc_tx_busylock_key; \ - static struct lock_class_key qdisc_running_key; \ static struct lock_class_key qdisc_xmit_lock_key; \ static struct lock_class_key dev_addr_list_lock_key; \ unsigned int i; \ \ (dev)->qdisc_tx_busylock = &qdisc_tx_busylock_key; \ - (dev)->qdisc_running_key = &qdisc_running_key; \ lockdep_set_class(&(dev)->addr_list_lock, \ &dev_addr_list_lock_key); \ for (i = 0; i < (dev)->num_tx_queues; i++) \ diff --git a/include/net/gen_stats.h b/include/net/gen_stats.h index 52b87588f467..7aa2b8e1fb29 100644 --- a/include/net/gen_stats.h +++ b/include/net/gen_stats.h @@ -46,18 +46,15 @@ int gnet_stats_start_copy_compat(struct sk_buff *skb, int type, spinlock_t *lock, struct gnet_dump *d, int padattr); -int gnet_stats_copy_basic(const seqcount_t *running, - struct gnet_dump *d, +int gnet_stats_copy_basic(struct gnet_dump *d, struct gnet_stats_basic_sync __percpu *cpu, - struct gnet_stats_basic_sync *b); -void gnet_stats_add_basic(const seqcount_t *running, - struct gnet_stats_basic_sync *bstats, + struct gnet_stats_basic_sync *b, bool running); +void gnet_stats_add_basic(struct gnet_stats_basic_sync *bstats, struct gnet_stats_basic_sync __percpu *cpu, - struct gnet_stats_basic_sync *b); -int gnet_stats_copy_basic_hw(const seqcount_t *running, - struct gnet_dump *d, + struct gnet_stats_basic_sync *b, bool running); +int gnet_stats_copy_basic_hw(struct gnet_dump *d, struct gnet_stats_basic_sync __percpu *cpu, - struct gnet_stats_basic_sync *b); + struct gnet_stats_basic_sync *b, bool running); int gnet_stats_copy_rate_est(struct gnet_dump *d, struct net_rate_estimator __rcu **ptr); int gnet_stats_copy_queue(struct gnet_dump *d, @@ -74,13 +71,13 @@ int gen_new_estimator(struct gnet_stats_basic_sync *bstats, struct gnet_stats_basic_sync __percpu *cpu_bstats, struct net_rate_estimator __rcu **rate_est, spinlock_t *lock, - seqcount_t *running, struct nlattr *opt); + bool running, struct nlattr *opt); void gen_kill_estimator(struct net_rate_estimator __rcu **ptr); int gen_replace_estimator(struct gnet_stats_basic_sync *bstats, struct gnet_stats_basic_sync __percpu *cpu_bstats, struct net_rate_estimator __rcu **ptr, spinlock_t *lock, - seqcount_t *running, struct nlattr *opt); + bool running, struct nlattr *opt); bool gen_estimator_active(struct net_rate_estimator __rcu **ptr); bool gen_estimator_read(struct net_rate_estimator __rcu **ptr, struct gnet_stats_rate_est64 *sample); diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 7882e3aa6448..baad2ab4d971 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -38,6 +38,10 @@ enum qdisc_state_t { __QDISC_STATE_DEACTIVATED, __QDISC_STATE_MISSED, __QDISC_STATE_DRAINING, + /* Only for !TCQ_F_NOLOCK qdisc. Never access it directly. + * Use qdisc_run_begin/end() or qdisc_is_running() instead. + */ + __QDISC_STATE_RUNNING, }; #define QDISC_STATE_MISSED BIT(__QDISC_STATE_MISSED) @@ -108,7 +112,6 @@ struct Qdisc { struct sk_buff_head gso_skb ____cacheline_aligned_in_smp; struct qdisc_skb_head q; struct gnet_stats_basic_sync bstats; - seqcount_t running; struct gnet_stats_queue qstats; unsigned long state; struct Qdisc *next_sched; @@ -143,11 +146,15 @@ static inline struct Qdisc *qdisc_refcount_inc_nz(struct Qdisc *qdisc) return NULL; } +/* For !TCQ_F_NOLOCK qdisc: callers must either call this within a qdisc + * root_lock section, or provide their own memory barriers -- ordering + * against qdisc_run_begin/end() atomic bit operations. + */ static inline bool qdisc_is_running(struct Qdisc *qdisc) { if (qdisc->flags & TCQ_F_NOLOCK) return spin_is_locked(&qdisc->seqlock); - return (raw_read_seqcount(&qdisc->running) & 1) ? true : false; + return test_bit(__QDISC_STATE_RUNNING, &qdisc->state); } static inline bool nolock_qdisc_is_empty(const struct Qdisc *qdisc) @@ -167,6 +174,9 @@ static inline bool qdisc_is_empty(const struct Qdisc *qdisc) return !READ_ONCE(qdisc->q.qlen); } +/* For !TCQ_F_NOLOCK qdisc, qdisc_run_begin/end() must be invoked with + * the qdisc root lock acquired. + */ static inline bool qdisc_run_begin(struct Qdisc *qdisc) { if (qdisc->flags & TCQ_F_NOLOCK) { @@ -206,15 +216,8 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc) * after it releases the lock at the end of qdisc_run_end(). */ return spin_trylock(&qdisc->seqlock); - } else if (qdisc_is_running(qdisc)) { - return false; } - /* Variant of write_seqcount_begin() telling lockdep a trylock - * was attempted. - */ - raw_write_seqcount_begin(&qdisc->running); - seqcount_acquire(&qdisc->running.dep_map, 0, 1, _RET_IP_); - return true; + return test_and_set_bit(__QDISC_STATE_RUNNING, &qdisc->state); } static inline void qdisc_run_end(struct Qdisc *qdisc) @@ -226,7 +229,7 @@ static inline void qdisc_run_end(struct Qdisc *qdisc) &qdisc->state))) __netif_schedule(qdisc); } else { - write_seqcount_end(&qdisc->running); + clear_bit(__QDISC_STATE_RUNNING, &qdisc->state); } } @@ -592,14 +595,6 @@ static inline spinlock_t *qdisc_root_sleeping_lock(const struct Qdisc *qdisc) return qdisc_lock(root); } -static inline seqcount_t *qdisc_root_sleeping_running(const struct Qdisc *qdisc) -{ - struct Qdisc *root = qdisc_root_sleeping(qdisc); - - ASSERT_RTNL(); - return &root->running; -} - static inline struct net_device *qdisc_dev(const struct Qdisc *qdisc) { return qdisc->dev_queue->dev; diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index a73ad0bf324c..4fcbdd71c59f 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c @@ -42,7 +42,7 @@ struct net_rate_estimator { struct gnet_stats_basic_sync *bstats; spinlock_t *stats_lock; - seqcount_t *running; + bool running; struct gnet_stats_basic_sync __percpu *cpu_bstats; u8 ewma_log; u8 intvl_log; /* period : (250ms << intvl_log) */ @@ -66,7 +66,7 @@ static void est_fetch_counters(struct net_rate_estimator *e, if (e->stats_lock) spin_lock(e->stats_lock); - gnet_stats_add_basic(e->running, b, e->cpu_bstats, e->bstats); + gnet_stats_add_basic(b, e->cpu_bstats, e->bstats, e->running); if (e->stats_lock) spin_unlock(e->stats_lock); @@ -113,7 +113,9 @@ static void est_timer(struct timer_list *t) * @cpu_bstats: bstats per cpu * @rate_est: rate estimator statistics * @lock: lock for statistics and control path - * @running: qdisc running seqcount + * @running: true if @bstats represents a running qdisc, thus @bstats' + * internal values might change during basic reads. Only used + * if @bstats_cpu is NULL * @opt: rate estimator configuration TLV * * Creates a new rate estimator with &bstats as source and &rate_est @@ -129,7 +131,7 @@ int gen_new_estimator(struct gnet_stats_basic_sync *bstats, struct gnet_stats_basic_sync __percpu *cpu_bstats, struct net_rate_estimator __rcu **rate_est, spinlock_t *lock, - seqcount_t *running, + bool running, struct nlattr *opt) { struct gnet_estimator *parm = nla_data(opt); @@ -218,7 +220,9 @@ EXPORT_SYMBOL(gen_kill_estimator); * @cpu_bstats: bstats per cpu * @rate_est: rate estimator statistics * @lock: lock for statistics and control path - * @running: qdisc running seqcount (might be NULL) + * @running: true if @bstats represents a running qdisc, thus @bstats' + * internal values might change during basic reads. Only used + * if @cpu_bstats is NULL * @opt: rate estimator configuration TLV * * Replaces the configuration of a rate estimator by calling @@ -230,7 +234,7 @@ int gen_replace_estimator(struct gnet_stats_basic_sync *bstats, struct gnet_stats_basic_sync __percpu *cpu_bstats, struct net_rate_estimator __rcu **rate_est, spinlock_t *lock, - seqcount_t *running, struct nlattr *opt) + bool running, struct nlattr *opt) { return gen_new_estimator(bstats, cpu_bstats, rate_est, lock, running, opt); diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c index 5f57f761def6..5516ea0d5da0 100644 --- a/net/core/gen_stats.c +++ b/net/core/gen_stats.c @@ -146,42 +146,42 @@ static void gnet_stats_add_basic_cpu(struct gnet_stats_basic_sync *bstats, _bstats_update(bstats, t_bytes, t_packets); } -void gnet_stats_add_basic(const seqcount_t *running, - struct gnet_stats_basic_sync *bstats, +void gnet_stats_add_basic(struct gnet_stats_basic_sync *bstats, struct gnet_stats_basic_sync __percpu *cpu, - struct gnet_stats_basic_sync *b) + struct gnet_stats_basic_sync *b, bool running) { - unsigned int seq; + unsigned int start; u64 bytes = 0; u64 packets = 0; + WARN_ON_ONCE((cpu || running) && !in_task()); + if (cpu) { gnet_stats_add_basic_cpu(bstats, cpu); return; } do { if (running) - seq = read_seqcount_begin(running); + start = u64_stats_fetch_begin_irq(&b->syncp); bytes = u64_stats_read(&b->bytes); packets = u64_stats_read(&b->packets); - } while (running && read_seqcount_retry(running, seq)); + } while (running && u64_stats_fetch_retry_irq(&b->syncp, start)); _bstats_update(bstats, bytes, packets); } EXPORT_SYMBOL(gnet_stats_add_basic); static int -___gnet_stats_copy_basic(const seqcount_t *running, - struct gnet_dump *d, +___gnet_stats_copy_basic(struct gnet_dump *d, struct gnet_stats_basic_sync __percpu *cpu, struct gnet_stats_basic_sync *b, - int type) + int type, bool running) { struct gnet_stats_basic_sync bstats; u64 bstats_bytes, bstats_packets; gnet_stats_basic_sync_init(&bstats); - gnet_stats_add_basic(running, &bstats, cpu, b); + gnet_stats_add_basic(&bstats, cpu, b, running); bstats_bytes = u64_stats_read(&bstats.bytes); bstats_packets = u64_stats_read(&bstats.packets); @@ -210,10 +210,14 @@ ___gnet_stats_copy_basic(const seqcount_t *running, /** * gnet_stats_copy_basic - copy basic statistics into statistic TLV - * @running: seqcount_t pointer * @d: dumping handle * @cpu: copy statistic per cpu * @b: basic statistics + * @running: true if @b represents a running qdisc, thus @b's + * internal values might change during basic reads. + * Only used if @cpu is NULL + * + * Context: task; must not be run from IRQ or BH contexts * * Appends the basic statistics to the top level TLV created by * gnet_stats_start_copy(). @@ -222,22 +226,25 @@ ___gnet_stats_copy_basic(const seqcount_t *running, * if the room in the socket buffer was not sufficient. */ int -gnet_stats_copy_basic(const seqcount_t *running, - struct gnet_dump *d, +gnet_stats_copy_basic(struct gnet_dump *d, struct gnet_stats_basic_sync __percpu *cpu, - struct gnet_stats_basic_sync *b) + struct gnet_stats_basic_sync *b, + bool running) { - return ___gnet_stats_copy_basic(running, d, cpu, b, - TCA_STATS_BASIC); + return ___gnet_stats_copy_basic(d, cpu, b, TCA_STATS_BASIC, running); } EXPORT_SYMBOL(gnet_stats_copy_basic); /** * gnet_stats_copy_basic_hw - copy basic hw statistics into statistic TLV - * @running: seqcount_t pointer * @d: dumping handle * @cpu: copy statistic per cpu * @b: basic statistics + * @running: true if @b represents a running qdisc, thus @b's + * internal values might change during basic reads. + * Only used if @cpu is NULL + * + * Context: task; must not be run from IRQ or BH contexts * * Appends the basic statistics to the top level TLV created by * gnet_stats_start_copy(). @@ -246,13 +253,12 @@ EXPORT_SYMBOL(gnet_stats_copy_basic); * if the room in the socket buffer was not sufficient. */ int -gnet_stats_copy_basic_hw(const seqcount_t *running, - struct gnet_dump *d, +gnet_stats_copy_basic_hw(struct gnet_dump *d, struct gnet_stats_basic_sync __percpu *cpu, - struct gnet_stats_basic_sync *b) + struct gnet_stats_basic_sync *b, + bool running) { - return ___gnet_stats_copy_basic(running, d, cpu, b, - TCA_STATS_BASIC_HW); + return ___gnet_stats_copy_basic(d, cpu, b, TCA_STATS_BASIC_HW, running); } EXPORT_SYMBOL(gnet_stats_copy_basic_hw); diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 585829ffa0c4..3258da3d5bed 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -501,7 +501,7 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est, if (est) { err = gen_new_estimator(&p->tcfa_bstats, p->cpu_bstats, &p->tcfa_rate_est, - &p->tcfa_lock, NULL, est); + &p->tcfa_lock, false, est); if (err) goto err4; } @@ -1173,9 +1173,10 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *p, if (err < 0) goto errout; - if (gnet_stats_copy_basic(NULL, &d, p->cpu_bstats, &p->tcfa_bstats) < 0 || - gnet_stats_copy_basic_hw(NULL, &d, p->cpu_bstats_hw, - &p->tcfa_bstats_hw) < 0 || + if (gnet_stats_copy_basic(&d, p->cpu_bstats, + &p->tcfa_bstats, false) < 0 || + gnet_stats_copy_basic_hw(&d, p->cpu_bstats_hw, + &p->tcfa_bstats_hw, false) < 0 || gnet_stats_copy_rate_est(&d, &p->tcfa_rate_est) < 0 || gnet_stats_copy_queue(&d, p->cpu_qstats, &p->tcfa_qstats, diff --git a/net/sched/act_police.c b/net/sched/act_police.c index c9383805222d..9e77ba8401e5 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -125,7 +125,7 @@ static int tcf_police_init(struct net *net, struct nlattr *nla, police->common.cpu_bstats, &police->tcf_rate_est, &police->tcf_lock, - NULL, est); + false, est); if (err) goto failure; } else if (tb[TCA_POLICE_AVRATE] && diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 70f006cbf212..efcd0b5e9a32 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -943,8 +943,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, cpu_qstats = q->cpu_qstats; } - if (gnet_stats_copy_basic(qdisc_root_sleeping_running(q), - &d, cpu_bstats, &q->bstats) < 0 || + if (gnet_stats_copy_basic(&d, cpu_bstats, &q->bstats, true) < 0 || gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 || gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0) goto nla_put_failure; @@ -1265,26 +1264,17 @@ static struct Qdisc *qdisc_create(struct net_device *dev, rcu_assign_pointer(sch->stab, stab); } if (tca[TCA_RATE]) { - seqcount_t *running; - err = -EOPNOTSUPP; if (sch->flags & TCQ_F_MQROOT) { NL_SET_ERR_MSG(extack, "Cannot attach rate estimator to a multi-queue root qdisc"); goto err_out4; } - if (sch->parent != TC_H_ROOT && - !(sch->flags & TCQ_F_INGRESS) && - (!p || !(p->flags & TCQ_F_MQROOT))) - running = qdisc_root_sleeping_running(sch); - else - running = &sch->running; - err = gen_new_estimator(&sch->bstats, sch->cpu_bstats, &sch->rate_est, NULL, - running, + true, tca[TCA_RATE]); if (err) { NL_SET_ERR_MSG(extack, "Failed to generate new estimator"); @@ -1360,7 +1350,7 @@ static int qdisc_change(struct Qdisc *sch, struct nlattr **tca, sch->cpu_bstats, &sch->rate_est, NULL, - qdisc_root_sleeping_running(sch), + true, tca[TCA_RATE]); } out: diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index fbfe4ce9497b..4c8e994cf0a5 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -653,8 +653,7 @@ atm_tc_dump_class_stats(struct Qdisc *sch, unsigned long arg, { struct atm_flow_data *flow = (struct atm_flow_data *)arg; - if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), - d, NULL, &flow->bstats) < 0 || + if (gnet_stats_copy_basic(d, NULL, &flow->bstats, true) < 0 || gnet_stats_copy_queue(d, NULL, &flow->qstats, flow->q->q.qlen) < 0) return -1; diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index f0b1282fae11..02d9f0dfe356 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -1383,8 +1383,7 @@ cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg, if (cl->undertime != PSCHED_PASTPERFECT) cl->xstats.undertime = cl->undertime - q->now; - if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), - d, NULL, &cl->bstats) < 0 || + if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 || gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || gnet_stats_copy_queue(d, NULL, &cl->qstats, qlen) < 0) return -1; @@ -1518,7 +1517,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t err = gen_replace_estimator(&cl->bstats, NULL, &cl->rate_est, NULL, - qdisc_root_sleeping_running(sch), + true, tca[TCA_RATE]); if (err) { NL_SET_ERR_MSG(extack, "Failed to replace specified rate estimator"); @@ -1619,9 +1618,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t if (tca[TCA_RATE]) { err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est, - NULL, - qdisc_root_sleeping_running(sch), - tca[TCA_RATE]); + NULL, true, tca[TCA_RATE]); if (err) { NL_SET_ERR_MSG(extack, "Couldn't create new estimator"); tcf_block_put(cl->block); diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index 7243617a3595..18e4f7a0b291 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -85,8 +85,7 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (tca[TCA_RATE]) { err = gen_replace_estimator(&cl->bstats, NULL, &cl->rate_est, - NULL, - qdisc_root_sleeping_running(sch), + NULL, true, tca[TCA_RATE]); if (err) { NL_SET_ERR_MSG(extack, "Failed to replace estimator"); @@ -119,9 +118,7 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (tca[TCA_RATE]) { err = gen_replace_estimator(&cl->bstats, NULL, &cl->rate_est, - NULL, - qdisc_root_sleeping_running(sch), - tca[TCA_RATE]); + NULL, true, tca[TCA_RATE]); if (err) { NL_SET_ERR_MSG(extack, "Failed to replace estimator"); qdisc_put(cl->qdisc); @@ -268,8 +265,7 @@ static int drr_dump_class_stats(struct Qdisc *sch, unsigned long arg, if (qlen) xstats.deficit = cl->deficit; - if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), - d, NULL, &cl->bstats) < 0 || + if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 || gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || gnet_stats_copy_queue(d, cl_q->cpu_qstats, &cl_q->qstats, qlen) < 0) return -1; diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c index af56d155e7fc..0eae9ff5edf6 100644 --- a/net/sched/sch_ets.c +++ b/net/sched/sch_ets.c @@ -325,8 +325,7 @@ static int ets_class_dump_stats(struct Qdisc *sch, unsigned long arg, struct ets_class *cl = ets_class_from_arg(sch, arg); struct Qdisc *cl_q = cl->qdisc; - if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), - d, NULL, &cl_q->bstats) < 0 || + if (gnet_stats_copy_basic(d, NULL, &cl_q->bstats, true) < 0 || qdisc_qstats_copy(d, cl_q) < 0) return -1; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 989186e7f1a0..b0ff0dff2773 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -304,8 +304,8 @@ trace: /* * Transmit possibly several skbs, and handle the return status as - * required. Owning running seqcount bit guarantees that - * only one CPU can execute this function. + * required. Owning qdisc running bit guarantees that only one CPU + * can execute this function. * * Returns to the caller: * false - hardware queue frozen backoff @@ -606,7 +606,6 @@ struct Qdisc noop_qdisc = { .ops = &noop_qdisc_ops, .q.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock), .dev_queue = &noop_netdev_queue, - .running = SEQCNT_ZERO(noop_qdisc.running), .busylock = __SPIN_LOCK_UNLOCKED(noop_qdisc.busylock), .gso_skb = { .next = (struct sk_buff *)&noop_qdisc.gso_skb, @@ -867,7 +866,6 @@ struct Qdisc_ops pfifo_fast_ops __read_mostly = { EXPORT_SYMBOL(pfifo_fast_ops); static struct lock_class_key qdisc_tx_busylock; -static struct lock_class_key qdisc_running_key; struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, const struct Qdisc_ops *ops, @@ -917,10 +915,6 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, lockdep_set_class(&sch->seqlock, dev->qdisc_tx_busylock ?: &qdisc_tx_busylock); - seqcount_init(&sch->running); - lockdep_set_class(&sch->running, - dev->qdisc_running_key ?: &qdisc_running_key); - sch->ops = ops; sch->flags = ops->static_flags; sch->enqueue = ops->enqueue; diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 181c2905ff98..d3979a6000e7 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -965,7 +965,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, err = gen_replace_estimator(&cl->bstats, NULL, &cl->rate_est, NULL, - qdisc_root_sleeping_running(sch), + true, tca[TCA_RATE]); if (err) return err; @@ -1033,9 +1033,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (tca[TCA_RATE]) { err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est, - NULL, - qdisc_root_sleeping_running(sch), - tca[TCA_RATE]); + NULL, true, tca[TCA_RATE]); if (err) { tcf_block_put(cl->block); kfree(cl); @@ -1328,7 +1326,7 @@ hfsc_dump_class_stats(struct Qdisc *sch, unsigned long arg, xstats.work = cl->cl_total; xstats.rtwork = cl->cl_cumul; - if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, NULL, &cl->bstats) < 0 || + if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 || gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || gnet_stats_copy_queue(d, NULL, &cl->qstats, qlen) < 0) return -1; diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index adceb9e210f6..cf1d45db4e84 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1368,8 +1368,7 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d) } } - if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), - d, NULL, &cl->bstats) < 0 || + if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 || gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || gnet_stats_copy_queue(d, NULL, &qs, qlen) < 0) return -1; @@ -1865,7 +1864,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est, NULL, - qdisc_root_sleeping_running(sch), + true, tca[TCA_RATE] ? : &est.nla); if (err) goto err_block_put; @@ -1991,7 +1990,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, err = gen_replace_estimator(&cl->bstats, NULL, &cl->rate_est, NULL, - qdisc_root_sleeping_running(sch), + true, tca[TCA_RATE]); if (err) return err; diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c index cedd0b3ef9cf..83d2e54bf303 100644 --- a/net/sched/sch_mq.c +++ b/net/sched/sch_mq.c @@ -144,8 +144,8 @@ static int mq_dump(struct Qdisc *sch, struct sk_buff *skb) qdisc = netdev_get_tx_queue(dev, ntx)->qdisc_sleeping; spin_lock_bh(qdisc_lock(qdisc)); - gnet_stats_add_basic(NULL, &sch->bstats, qdisc->cpu_bstats, - &qdisc->bstats); + gnet_stats_add_basic(&sch->bstats, qdisc->cpu_bstats, + &qdisc->bstats, false); gnet_stats_add_queue(&sch->qstats, qdisc->cpu_qstats, &qdisc->qstats); sch->q.qlen += qdisc_qlen(qdisc); @@ -231,8 +231,7 @@ static int mq_dump_class_stats(struct Qdisc *sch, unsigned long cl, struct netdev_queue *dev_queue = mq_queue_get(sch, cl); sch = dev_queue->qdisc_sleeping; - if (gnet_stats_copy_basic(&sch->running, d, sch->cpu_bstats, - &sch->bstats) < 0 || + if (gnet_stats_copy_basic(d, sch->cpu_bstats, &sch->bstats, true) < 0 || qdisc_qstats_copy(d, sch) < 0) return -1; return 0; diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index 3f7f756f92ca..b29f3453c6ea 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -402,8 +402,8 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb) qdisc = netdev_get_tx_queue(dev, ntx)->qdisc_sleeping; spin_lock_bh(qdisc_lock(qdisc)); - gnet_stats_add_basic(NULL, &sch->bstats, qdisc->cpu_bstats, - &qdisc->bstats); + gnet_stats_add_basic(&sch->bstats, qdisc->cpu_bstats, + &qdisc->bstats, false); gnet_stats_add_queue(&sch->qstats, qdisc->cpu_qstats, &qdisc->qstats); sch->q.qlen += qdisc_qlen(qdisc); @@ -519,8 +519,8 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, spin_lock_bh(qdisc_lock(qdisc)); - gnet_stats_add_basic(NULL, &bstats, qdisc->cpu_bstats, - &qdisc->bstats); + gnet_stats_add_basic(&bstats, qdisc->cpu_bstats, + &qdisc->bstats, false); gnet_stats_add_queue(&qstats, qdisc->cpu_qstats, &qdisc->qstats); sch->q.qlen += qdisc_qlen(qdisc); @@ -532,15 +532,15 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, /* Reclaim root sleeping lock before completing stats */ if (d->lock) spin_lock_bh(d->lock); - if (gnet_stats_copy_basic(NULL, d, NULL, &bstats) < 0 || + if (gnet_stats_copy_basic(d, NULL, &bstats, false) < 0 || gnet_stats_copy_queue(d, NULL, &qstats, qlen) < 0) return -1; } else { struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl); sch = dev_queue->qdisc_sleeping; - if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, - sch->cpu_bstats, &sch->bstats) < 0 || + if (gnet_stats_copy_basic(d, sch->cpu_bstats, + &sch->bstats, true) < 0 || qdisc_qstats_copy(d, sch) < 0) return -1; } diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c index e282e7382117..cd8ab90c4765 100644 --- a/net/sched/sch_multiq.c +++ b/net/sched/sch_multiq.c @@ -338,8 +338,7 @@ static int multiq_dump_class_stats(struct Qdisc *sch, unsigned long cl, struct Qdisc *cl_q; cl_q = q->queues[cl - 1]; - if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), - d, cl_q->cpu_bstats, &cl_q->bstats) < 0 || + if (gnet_stats_copy_basic(d, cl_q->cpu_bstats, &cl_q->bstats, true) < 0 || qdisc_qstats_copy(d, cl_q) < 0) return -1; diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 03fdf31ccb6a..3b8d7197c06b 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -361,8 +361,8 @@ static int prio_dump_class_stats(struct Qdisc *sch, unsigned long cl, struct Qdisc *cl_q; cl_q = q->queues[cl - 1]; - if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), - d, cl_q->cpu_bstats, &cl_q->bstats) < 0 || + if (gnet_stats_copy_basic(d, cl_q->cpu_bstats, + &cl_q->bstats, true) < 0 || qdisc_qstats_copy(d, cl_q) < 0) return -1; diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index a35200f591a2..0b7f9ba28deb 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -451,7 +451,7 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, err = gen_replace_estimator(&cl->bstats, NULL, &cl->rate_est, NULL, - qdisc_root_sleeping_running(sch), + true, tca[TCA_RATE]); if (err) return err; @@ -478,7 +478,7 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est, NULL, - qdisc_root_sleeping_running(sch), + true, tca[TCA_RATE]); if (err) goto destroy_class; @@ -640,8 +640,7 @@ static int qfq_dump_class_stats(struct Qdisc *sch, unsigned long arg, xstats.weight = cl->agg->class_weight; xstats.lmax = cl->agg->lmax; - if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), - d, NULL, &cl->bstats) < 0 || + if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 || gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || qdisc_qstats_copy(d, cl->qdisc) < 0) return -1; diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index b9fd18d98646..9ab068fa2672 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -1977,7 +1977,7 @@ static int taprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, struct netdev_queue *dev_queue = taprio_queue_get(sch, cl); sch = dev_queue->qdisc_sleeping; - if (gnet_stats_copy_basic(&sch->running, d, NULL, &sch->bstats) < 0 || + if (gnet_stats_copy_basic(d, NULL, &sch->bstats, true) < 0 || qdisc_qstats_copy(d, sch) < 0) return -1; return 0; From 2841bfd10aa7bc07aecc452c10158a5df10ebd04 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 15 Oct 2021 08:56:14 +0200 Subject: [PATCH 288/440] net: ks8851: Make ks8851_remove_common() return void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Up to now ks8851_remove_common() returns zero unconditionally. Make it return void instead which makes it easier to see in the callers that there is no error to handle. Also the return value of platform and spi remove callbacks is ignored anyway. Signed-off-by: Uwe Kleine-König Signed-off-by: David S. Miller --- drivers/net/ethernet/micrel/ks8851.h | 2 +- drivers/net/ethernet/micrel/ks8851_common.c | 4 +--- drivers/net/ethernet/micrel/ks8851_par.c | 4 +++- drivers/net/ethernet/micrel/ks8851_spi.c | 4 +++- 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/micrel/ks8851.h b/drivers/net/ethernet/micrel/ks8851.h index e2eb0caeac82..6f34a61739b6 100644 --- a/drivers/net/ethernet/micrel/ks8851.h +++ b/drivers/net/ethernet/micrel/ks8851.h @@ -427,7 +427,7 @@ struct ks8851_net { int ks8851_probe_common(struct net_device *netdev, struct device *dev, int msg_en); -int ks8851_remove_common(struct device *dev); +void ks8851_remove_common(struct device *dev); int ks8851_suspend(struct device *dev); int ks8851_resume(struct device *dev); diff --git a/drivers/net/ethernet/micrel/ks8851_common.c b/drivers/net/ethernet/micrel/ks8851_common.c index 2c4e5e602be7..499258b8662a 100644 --- a/drivers/net/ethernet/micrel/ks8851_common.c +++ b/drivers/net/ethernet/micrel/ks8851_common.c @@ -1247,7 +1247,7 @@ err_reg_io: } EXPORT_SYMBOL_GPL(ks8851_probe_common); -int ks8851_remove_common(struct device *dev) +void ks8851_remove_common(struct device *dev) { struct ks8851_net *priv = dev_get_drvdata(dev); @@ -1261,8 +1261,6 @@ int ks8851_remove_common(struct device *dev) gpio_set_value(priv->gpio, 0); regulator_disable(priv->vdd_reg); regulator_disable(priv->vdd_io); - - return 0; } EXPORT_SYMBOL_GPL(ks8851_remove_common); diff --git a/drivers/net/ethernet/micrel/ks8851_par.c b/drivers/net/ethernet/micrel/ks8851_par.c index 2e8fcce50f9d..2e25798c610e 100644 --- a/drivers/net/ethernet/micrel/ks8851_par.c +++ b/drivers/net/ethernet/micrel/ks8851_par.c @@ -327,7 +327,9 @@ static int ks8851_probe_par(struct platform_device *pdev) static int ks8851_remove_par(struct platform_device *pdev) { - return ks8851_remove_common(&pdev->dev); + ks8851_remove_common(&pdev->dev); + + return 0; } static const struct of_device_id ks8851_match_table[] = { diff --git a/drivers/net/ethernet/micrel/ks8851_spi.c b/drivers/net/ethernet/micrel/ks8851_spi.c index 479406ecbaa3..0303e727e99f 100644 --- a/drivers/net/ethernet/micrel/ks8851_spi.c +++ b/drivers/net/ethernet/micrel/ks8851_spi.c @@ -454,7 +454,9 @@ static int ks8851_probe_spi(struct spi_device *spi) static int ks8851_remove_spi(struct spi_device *spi) { - return ks8851_remove_common(&spi->dev); + ks8851_remove_common(&spi->dev); + + return 0; } static const struct of_device_id ks8851_match_table[] = { From d40dfa0cebd8197aaca2fcac4b9fa61da6e1c9fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 15 Oct 2021 08:56:15 +0200 Subject: [PATCH 289/440] net: w5100: Make w5100_remove() return void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Up to now w5100_remove() returns zero unconditionally. Make it return void instead which makes it easier to see in the callers that there is no error to handle. Also the return value of platform and spi remove callbacks is ignored anyway. Signed-off-by: Uwe Kleine-König Signed-off-by: David S. Miller --- drivers/net/ethernet/wiznet/w5100-spi.c | 4 +++- drivers/net/ethernet/wiznet/w5100.c | 7 ++++--- drivers/net/ethernet/wiznet/w5100.h | 2 +- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/wiznet/w5100-spi.c b/drivers/net/ethernet/wiznet/w5100-spi.c index 2b84848dc26a..7779a36da3c8 100644 --- a/drivers/net/ethernet/wiznet/w5100-spi.c +++ b/drivers/net/ethernet/wiznet/w5100-spi.c @@ -463,7 +463,9 @@ static int w5100_spi_probe(struct spi_device *spi) static int w5100_spi_remove(struct spi_device *spi) { - return w5100_remove(&spi->dev); + w5100_remove(&spi->dev); + + return 0; } static const struct spi_device_id w5100_spi_ids[] = { diff --git a/drivers/net/ethernet/wiznet/w5100.c b/drivers/net/ethernet/wiznet/w5100.c index 88fc65ed0017..ae24d6b86803 100644 --- a/drivers/net/ethernet/wiznet/w5100.c +++ b/drivers/net/ethernet/wiznet/w5100.c @@ -1064,7 +1064,9 @@ static int w5100_mmio_probe(struct platform_device *pdev) static int w5100_mmio_remove(struct platform_device *pdev) { - return w5100_remove(&pdev->dev); + w5100_remove(&pdev->dev); + + return 0; } void *w5100_ops_priv(const struct net_device *ndev) @@ -1210,7 +1212,7 @@ err_register: } EXPORT_SYMBOL_GPL(w5100_probe); -int w5100_remove(struct device *dev) +void w5100_remove(struct device *dev) { struct net_device *ndev = dev_get_drvdata(dev); struct w5100_priv *priv = netdev_priv(ndev); @@ -1226,7 +1228,6 @@ int w5100_remove(struct device *dev) unregister_netdev(ndev); free_netdev(ndev); - return 0; } EXPORT_SYMBOL_GPL(w5100_remove); diff --git a/drivers/net/ethernet/wiznet/w5100.h b/drivers/net/ethernet/wiznet/w5100.h index 5d3d4b541fec..481af3b6d9e8 100644 --- a/drivers/net/ethernet/wiznet/w5100.h +++ b/drivers/net/ethernet/wiznet/w5100.h @@ -31,6 +31,6 @@ void *w5100_ops_priv(const struct net_device *ndev); int w5100_probe(struct device *dev, const struct w5100_ops *ops, int sizeof_ops_priv, const void *mac_addr, int irq, int link_gpio); -int w5100_remove(struct device *dev); +void w5100_remove(struct device *dev); extern const struct dev_pm_ops w5100_pm_ops; From 8e60189d937c6cd5446a8eedaa12b727f1b2a33f Mon Sep 17 00:00:00 2001 From: Kunihiko Hayashi Date: Mon, 18 Oct 2021 10:27:36 +0900 Subject: [PATCH 290/440] dt-bindings: net: ave: Add bindings for NX1 SoC Update AVE binding document for UniPhier NX1 SoC. Signed-off-by: Kunihiko Hayashi Signed-off-by: David S. Miller --- .../devicetree/bindings/net/socionext,uniphier-ave4.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/net/socionext,uniphier-ave4.yaml b/Documentation/devicetree/bindings/net/socionext,uniphier-ave4.yaml index 8a03a24a2019..6bc61c42418f 100644 --- a/Documentation/devicetree/bindings/net/socionext,uniphier-ave4.yaml +++ b/Documentation/devicetree/bindings/net/socionext,uniphier-ave4.yaml @@ -24,6 +24,7 @@ properties: - socionext,uniphier-ld11-ave4 - socionext,uniphier-ld20-ave4 - socionext,uniphier-pxs3-ave4 + - socionext,uniphier-nx1-ave4 reg: maxItems: 1 From 9fd3d5dced976640f588e0a866b9611db2d2cb37 Mon Sep 17 00:00:00 2001 From: Kunihiko Hayashi Date: Mon, 18 Oct 2021 10:27:37 +0900 Subject: [PATCH 291/440] net: ethernet: ave: Add compatible string and SoC-dependent data for NX1 SoC Add basic support for UniPhier NX1 SoC. This includes a compatible string and SoC-dependent data. Signed-off-by: Kunihiko Hayashi Signed-off-by: David S. Miller --- drivers/net/ethernet/socionext/sni_ave.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/net/ethernet/socionext/sni_ave.c b/drivers/net/ethernet/socionext/sni_ave.c index 4b0fe0f58bbf..2c48f8b8ab71 100644 --- a/drivers/net/ethernet/socionext/sni_ave.c +++ b/drivers/net/ethernet/socionext/sni_ave.c @@ -1935,6 +1935,17 @@ static const struct ave_soc_data ave_pxs3_data = { .get_pinmode = ave_pxs3_get_pinmode, }; +static const struct ave_soc_data ave_nx1_data = { + .is_desc_64bit = true, + .clock_names = { + "ether", + }, + .reset_names = { + "ether", + }, + .get_pinmode = ave_pxs3_get_pinmode, +}; + static const struct of_device_id of_ave_match[] = { { .compatible = "socionext,uniphier-pro4-ave4", @@ -1956,6 +1967,10 @@ static const struct of_device_id of_ave_match[] = { .compatible = "socionext,uniphier-pxs3-ave4", .data = &ave_pxs3_data, }, + { + .compatible = "socionext,uniphier-nx1-ave4", + .data = &ave_nx1_data, + }, { /* Sentinel */ } }; MODULE_DEVICE_TABLE(of, of_ave_match); From 0e9e7598c68f1dc17ce1053b08494b8ed6dd4985 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 15 Oct 2021 23:06:01 +0200 Subject: [PATCH 292/440] octeontx2-nic: fix mixed module build Building the VF and PF side of this driver differently, with one being a loadable module and the other one built-in results in a link failure for the common PTP driver: ld.lld: error: undefined symbol: __this_module >>> referenced by otx2_ptp.c >>> net/ethernet/marvell/octeontx2/nic/otx2_ptp.o:(otx2_ptp_init) in archive drivers/built-in.a >>> referenced by otx2_ptp.c >>> net/ethernet/marvell/octeontx2/nic/otx2_ptp.o:(otx2_ptp_init) in archive drivers/built-in.a Move the otx2_ptp.c code into a separate module that gets built for both configurations, making it built-in if at least one of the other two is built-in. Fixes: 43510ef4ddad ("octeontx2-nicvf: Add PTP hardware clock support to NIX VF") Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/nic/Makefile | 8 ++++---- drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.c | 8 ++++++++ 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/Makefile b/drivers/net/ethernet/marvell/octeontx2/nic/Makefile index aaf9accc40ed..0048b5946712 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/Makefile +++ b/drivers/net/ethernet/marvell/octeontx2/nic/Makefile @@ -3,12 +3,12 @@ # Makefile for Marvell's RVU Ethernet device drivers # -obj-$(CONFIG_OCTEONTX2_PF) += rvu_nicpf.o -obj-$(CONFIG_OCTEONTX2_VF) += rvu_nicvf.o +obj-$(CONFIG_OCTEONTX2_PF) += rvu_nicpf.o otx2_ptp.o +obj-$(CONFIG_OCTEONTX2_VF) += rvu_nicvf.o otx2_ptp.o rvu_nicpf-y := otx2_pf.o otx2_common.o otx2_txrx.o otx2_ethtool.o \ - otx2_ptp.o otx2_flows.o otx2_tc.o cn10k.o otx2_dmac_flt.o \ + otx2_flows.o otx2_tc.o cn10k.o otx2_dmac_flt.o \ otx2_devlink.o -rvu_nicvf-y := otx2_vf.o otx2_devlink.o otx2_ptp.o +rvu_nicvf-y := otx2_vf.o otx2_devlink.o ccflags-y += -I$(srctree)/drivers/net/ethernet/marvell/octeontx2/af diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.c index 85b1f140d3dd..0ef68fdd1f26 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.c @@ -297,6 +297,7 @@ int otx2_ptp_init(struct otx2_nic *pfvf) error: return err; } +EXPORT_SYMBOL_GPL(otx2_ptp_init); void otx2_ptp_destroy(struct otx2_nic *pfvf) { @@ -309,6 +310,7 @@ void otx2_ptp_destroy(struct otx2_nic *pfvf) kfree(ptp); pfvf->ptp = NULL; } +EXPORT_SYMBOL_GPL(otx2_ptp_destroy); int otx2_ptp_clock_index(struct otx2_nic *pfvf) { @@ -317,6 +319,7 @@ int otx2_ptp_clock_index(struct otx2_nic *pfvf) return ptp_clock_index(pfvf->ptp->ptp_clock); } +EXPORT_SYMBOL_GPL(otx2_ptp_clock_index); int otx2_ptp_tstamp2time(struct otx2_nic *pfvf, u64 tstamp, u64 *tsns) { @@ -327,3 +330,8 @@ int otx2_ptp_tstamp2time(struct otx2_nic *pfvf, u64 tstamp, u64 *tsns) return 0; } +EXPORT_SYMBOL_GPL(otx2_ptp_tstamp2time); + +MODULE_AUTHOR("Sunil Goutham "); +MODULE_DESCRIPTION("Marvell RVU NIC PTP Driver"); +MODULE_LICENSE("GPL v2"); From 4abd7cffc09a38015fa14a22471e3de48a4ca032 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 15 Oct 2021 14:53:04 -0700 Subject: [PATCH 293/440] ethernet: use eth_hw_addr_set() in unmaintained drivers Commit 406f42fa0d3c ("net-next: When a bond have a massive amount of VLANs...") introduced a rbtree for faster Ethernet address look up. To maintain netdev->dev_addr in this tree we need to make all the writes to it got through appropriate helpers. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/calxeda/xgmac.c | 4 +++- drivers/net/ethernet/cirrus/cs89x0.c | 11 +++++++---- drivers/net/ethernet/davicom/dm9000.c | 7 +++++-- drivers/net/ethernet/ethoc.c | 8 ++++++-- drivers/net/ethernet/fealnx.c | 4 +++- drivers/net/ethernet/fujitsu/fmvj18x_cs.c | 7 +++++-- drivers/net/ethernet/huawei/hinic/hinic_main.c | 4 +++- drivers/net/ethernet/marvell/pxa168_eth.c | 8 ++++++-- drivers/net/ethernet/micrel/ks8842.c | 8 +++++--- drivers/net/ethernet/micrel/ks8851_common.c | 6 ++++-- drivers/net/ethernet/micrel/ksz884x.c | 7 +++++-- drivers/net/ethernet/microchip/encx24j600.c | 4 +++- drivers/net/ethernet/natsemi/natsemi.c | 6 ++++-- drivers/net/ethernet/natsemi/ns83820.c | 11 +++++++---- drivers/net/ethernet/packetengines/hamachi.c | 5 +++-- drivers/net/ethernet/packetengines/yellowfin.c | 6 ++++-- drivers/net/ethernet/silan/sc92031.c | 14 ++++++++------ drivers/net/ethernet/smsc/smc91c92_cs.c | 10 +++++++--- drivers/net/ethernet/sun/sunvnet.c | 4 +++- drivers/net/ethernet/toshiba/tc35815.c | 6 ++++-- drivers/net/ethernet/xircom/xirc2ps_cs.c | 12 ++++-------- 21 files changed, 99 insertions(+), 53 deletions(-) diff --git a/drivers/net/ethernet/calxeda/xgmac.c b/drivers/net/ethernet/calxeda/xgmac.c index 9ad89a53c3e6..457cb7121000 100644 --- a/drivers/net/ethernet/calxeda/xgmac.c +++ b/drivers/net/ethernet/calxeda/xgmac.c @@ -1693,6 +1693,7 @@ static int xgmac_probe(struct platform_device *pdev) struct resource *res; struct net_device *ndev = NULL; struct xgmac_priv *priv = NULL; + u8 addr[ETH_ALEN]; u32 uid; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); @@ -1785,7 +1786,8 @@ static int xgmac_probe(struct platform_device *pdev) ndev->max_mtu = XGMAC_MAX_MTU; /* Get the MAC address */ - xgmac_get_mac_addr(priv->base, ndev->dev_addr, 0); + xgmac_get_mac_addr(priv->base, addr, 0); + eth_hw_addr_set(ndev, addr); if (!is_valid_ether_addr(ndev->dev_addr)) netdev_warn(ndev, "MAC address %pM not valid", ndev->dev_addr); diff --git a/drivers/net/ethernet/cirrus/cs89x0.c b/drivers/net/ethernet/cirrus/cs89x0.c index bd7920ab166f..4a97aa8e1387 100644 --- a/drivers/net/ethernet/cirrus/cs89x0.c +++ b/drivers/net/ethernet/cirrus/cs89x0.c @@ -1314,6 +1314,7 @@ cs89x0_probe1(struct net_device *dev, void __iomem *ioaddr, int modular) int tmp; unsigned rev_type = 0; int eeprom_buff[CHKSUM_LEN]; + u8 addr[ETH_ALEN]; int retval; /* Initialize the device structure. */ @@ -1387,9 +1388,10 @@ cs89x0_probe1(struct net_device *dev, void __iomem *ioaddr, int modular) for (i = 0; i < ETH_ALEN / 2; i++) { unsigned int Addr; Addr = readreg(dev, PP_IA + i * 2); - dev->dev_addr[i * 2] = Addr & 0xFF; - dev->dev_addr[i * 2 + 1] = Addr >> 8; + addr[i * 2] = Addr & 0xFF; + addr[i * 2 + 1] = Addr >> 8; } + eth_hw_addr_set(dev, addr); /* Load the Adapter Configuration. * Note: Barring any more specific information from some @@ -1464,9 +1466,10 @@ cs89x0_probe1(struct net_device *dev, void __iomem *ioaddr, int modular) /* eeprom_buff has 32-bit ints, so we can't just memcpy it */ /* store the initial memory base address */ for (i = 0; i < ETH_ALEN / 2; i++) { - dev->dev_addr[i * 2] = eeprom_buff[i]; - dev->dev_addr[i * 2 + 1] = eeprom_buff[i] >> 8; + addr[i * 2] = eeprom_buff[i]; + addr[i * 2 + 1] = eeprom_buff[i] >> 8; } + eth_hw_addr_set(dev, addr); cs89_dbg(1, debug, "%s: new adapter_cnf: 0x%x\n", dev->name, lp->adapter_cnf); } diff --git a/drivers/net/ethernet/davicom/dm9000.c b/drivers/net/ethernet/davicom/dm9000.c index e13dd53a8b3b..0985ab216566 100644 --- a/drivers/net/ethernet/davicom/dm9000.c +++ b/drivers/net/ethernet/davicom/dm9000.c @@ -1425,6 +1425,7 @@ dm9000_probe(struct platform_device *pdev) enum of_gpio_flags flags; struct regulator *power; bool inv_mac_addr = false; + u8 addr[ETH_ALEN]; power = devm_regulator_get(dev, "vcc"); if (IS_ERR(power)) { @@ -1666,7 +1667,8 @@ dm9000_probe(struct platform_device *pdev) /* try reading the node address from the attached EEPROM */ for (i = 0; i < 6; i += 2) - dm9000_read_eeprom(db, i / 2, ndev->dev_addr+i); + dm9000_read_eeprom(db, i / 2, addr + i); + eth_hw_addr_set(ndev, addr); if (!is_valid_ether_addr(ndev->dev_addr) && pdata != NULL) { mac_src = "platform data"; @@ -1678,7 +1680,8 @@ dm9000_probe(struct platform_device *pdev) mac_src = "chip"; for (i = 0; i < 6; i++) - ndev->dev_addr[i] = ior(db, i+DM9000_PAR); + addr[i] = ior(db, i + DM9000_PAR); + eth_hw_addr_set(ndev, pdata->dev_addr); } if (!is_valid_ether_addr(ndev->dev_addr)) { diff --git a/drivers/net/ethernet/ethoc.c b/drivers/net/ethernet/ethoc.c index ed2ef167cdb2..b1c8ffea6ad2 100644 --- a/drivers/net/ethernet/ethoc.c +++ b/drivers/net/ethernet/ethoc.c @@ -1154,8 +1154,12 @@ static int ethoc_probe(struct platform_device *pdev) /* Check that the given MAC address is valid. If it isn't, read the * current MAC from the controller. */ - if (!is_valid_ether_addr(netdev->dev_addr)) - ethoc_get_mac_address(netdev, netdev->dev_addr); + if (!is_valid_ether_addr(netdev->dev_addr)) { + u8 addr[ETH_ALEN]; + + ethoc_get_mac_address(netdev, addr); + eth_hw_addr_set(netdev, addr); + } /* Check the MAC again for validity, if it still isn't choose and * program a random one. diff --git a/drivers/net/ethernet/fealnx.c b/drivers/net/ethernet/fealnx.c index ab194c9b0691..b3939a5f7b03 100644 --- a/drivers/net/ethernet/fealnx.c +++ b/drivers/net/ethernet/fealnx.c @@ -482,6 +482,7 @@ static int fealnx_init_one(struct pci_dev *pdev, struct net_device *dev; void *ring_space; dma_addr_t ring_dma; + u8 addr[ETH_ALEN]; #ifdef USE_IO_OPS int bar = 0; #else @@ -525,7 +526,8 @@ static int fealnx_init_one(struct pci_dev *pdev, /* read ethernet id */ for (i = 0; i < 6; ++i) - dev->dev_addr[i] = ioread8(ioaddr + PAR0 + i); + addr[i] = ioread8(ioaddr + PAR0 + i); + eth_hw_addr_set(dev, addr); /* Reset the chip to erase previous misconfiguration. */ iowrite32(0x00000001, ioaddr + BCR); diff --git a/drivers/net/ethernet/fujitsu/fmvj18x_cs.c b/drivers/net/ethernet/fujitsu/fmvj18x_cs.c index 62600153b964..b0d733e9a7c6 100644 --- a/drivers/net/ethernet/fujitsu/fmvj18x_cs.c +++ b/drivers/net/ethernet/fujitsu/fmvj18x_cs.c @@ -334,6 +334,7 @@ static int fmvj18x_config(struct pcmcia_device *link) u8 *buf; size_t len; u_char buggybuf[32]; + u8 addr[ETH_ALEN]; dev_dbg(&link->dev, "fmvj18x_config\n"); @@ -489,7 +490,8 @@ static int fmvj18x_config(struct pcmcia_device *link) case UNGERMANN: /* Read MACID from register */ for (i = 0; i < 6; i++) - dev->dev_addr[i] = inb(ioaddr + UNGERMANN_MAC_ID + i); + addr[i] = inb(ioaddr + UNGERMANN_MAC_ID + i); + eth_hw_addr_set(dev, addr); card_name = "Access/CARD"; break; case XXX10304: @@ -505,7 +507,8 @@ static int fmvj18x_config(struct pcmcia_device *link) default: /* Read MACID from register */ for (i = 0; i < 6; i++) - dev->dev_addr[i] = inb(ioaddr + MAC_ID + i); + addr[i] = inb(ioaddr + MAC_ID + i); + eth_hw_addr_set(dev, addr); card_name = "FMV-J181"; break; } diff --git a/drivers/net/ethernet/huawei/hinic/hinic_main.c b/drivers/net/ethernet/huawei/hinic/hinic_main.c index 6414e922cf8c..f9a766b8ac43 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_main.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_main.c @@ -1181,6 +1181,7 @@ static int nic_dev_init(struct pci_dev *pdev) struct net_device *netdev; struct hinic_hwdev *hwdev; struct devlink *devlink; + u8 addr[ETH_ALEN]; int err, num_qps; devlink = hinic_devlink_alloc(&pdev->dev); @@ -1259,11 +1260,12 @@ static int nic_dev_init(struct pci_dev *pdev) pci_set_drvdata(pdev, netdev); - err = hinic_port_get_mac(nic_dev, netdev->dev_addr); + err = hinic_port_get_mac(nic_dev, addr); if (err) { dev_err(&pdev->dev, "Failed to get mac address\n"); goto err_get_mac; } + eth_hw_addr_set(netdev, addr); if (!is_valid_ether_addr(netdev->dev_addr)) { if (!HINIC_IS_VF(nic_dev->hwdev->hwif)) { diff --git a/drivers/net/ethernet/marvell/pxa168_eth.c b/drivers/net/ethernet/marvell/pxa168_eth.c index bb5341020803..6c02e1740609 100644 --- a/drivers/net/ethernet/marvell/pxa168_eth.c +++ b/drivers/net/ethernet/marvell/pxa168_eth.c @@ -1436,9 +1436,13 @@ static int pxa168_eth_probe(struct platform_device *pdev) err = of_get_ethdev_address(pdev->dev.of_node, dev); if (err) { + u8 addr[ETH_ALEN]; + /* try reading the mac address, if set by the bootloader */ - pxa168_eth_get_mac_address(dev, dev->dev_addr); - if (!is_valid_ether_addr(dev->dev_addr)) { + pxa168_eth_get_mac_address(dev, addr); + if (is_valid_ether_addr(addr)) { + eth_hw_addr_set(dev, addr); + } else { dev_info(&pdev->dev, "Using random mac address\n"); eth_hw_addr_random(dev); } diff --git a/drivers/net/ethernet/micrel/ks8842.c b/drivers/net/ethernet/micrel/ks8842.c index 0f2cdcd4a4c0..c11b118dc415 100644 --- a/drivers/net/ethernet/micrel/ks8842.c +++ b/drivers/net/ethernet/micrel/ks8842.c @@ -348,13 +348,15 @@ static void ks8842_reset_hw(struct ks8842_adapter *adapter) ks8842_write16(adapter, 32, 0x1, REG_SW_ID_AND_ENABLE); } -static void ks8842_read_mac_addr(struct ks8842_adapter *adapter, u8 *dest) +static void ks8842_init_mac_addr(struct ks8842_adapter *adapter) { + u8 addr[ETH_ALEN]; int i; u16 mac; for (i = 0; i < ETH_ALEN; i++) - dest[ETH_ALEN - i - 1] = ks8842_read8(adapter, 2, REG_MARL + i); + addr[ETH_ALEN - i - 1] = ks8842_read8(adapter, 2, REG_MARL + i); + eth_hw_addr_set(adapter->netdev, addr); if (adapter->conf_flags & MICREL_KS884X) { /* @@ -1195,7 +1197,7 @@ static int ks8842_probe(struct platform_device *pdev) } if (i == netdev->addr_len) { - ks8842_read_mac_addr(adapter, netdev->dev_addr); + ks8842_init_mac_addr(adapter); if (!is_valid_ether_addr(netdev->dev_addr)) eth_hw_addr_random(netdev); diff --git a/drivers/net/ethernet/micrel/ks8851_common.c b/drivers/net/ethernet/micrel/ks8851_common.c index 499258b8662a..691206f19ea7 100644 --- a/drivers/net/ethernet/micrel/ks8851_common.c +++ b/drivers/net/ethernet/micrel/ks8851_common.c @@ -165,6 +165,7 @@ static void ks8851_read_mac_addr(struct net_device *dev) { struct ks8851_net *ks = netdev_priv(dev); unsigned long flags; + u8 addr[ETH_ALEN]; u16 reg; int i; @@ -172,9 +173,10 @@ static void ks8851_read_mac_addr(struct net_device *dev) for (i = 0; i < ETH_ALEN; i += 2) { reg = ks8851_rdreg16(ks, KS_MAR(i)); - dev->dev_addr[i] = reg >> 8; - dev->dev_addr[i + 1] = reg & 0xff; + addr[i] = reg >> 8; + addr[i + 1] = reg & 0xff; } + eth_hw_addr_set(dev, addr); ks8851_unlock(ks, &flags); } diff --git a/drivers/net/ethernet/micrel/ksz884x.c b/drivers/net/ethernet/micrel/ksz884x.c index 03ad8bdc1f0d..99c0c1491af2 100644 --- a/drivers/net/ethernet/micrel/ksz884x.c +++ b/drivers/net/ethernet/micrel/ksz884x.c @@ -7007,9 +7007,12 @@ static int pcidev_init(struct pci_dev *pdev, const struct pci_device_id *id) if (MAIN_PORT == i) eth_hw_addr_set(dev, hw_priv->hw.override_addr); else { - eth_hw_addr_set(dev, sw->other_addr); + u8 addr[ETH_ALEN]; + + ether_addr_copy(addr, sw->other_addr); if (ether_addr_equal(sw->other_addr, hw->override_addr)) - dev->dev_addr[5] += port->first_port; + addr[5] += port->first_port; + eth_hw_addr_set(dev, addr); } dev->netdev_ops = &netdev_ops; diff --git a/drivers/net/ethernet/microchip/encx24j600.c b/drivers/net/ethernet/microchip/encx24j600.c index 79167c3a3179..b90efc80fb59 100644 --- a/drivers/net/ethernet/microchip/encx24j600.c +++ b/drivers/net/ethernet/microchip/encx24j600.c @@ -1001,6 +1001,7 @@ static int encx24j600_spi_probe(struct spi_device *spi) struct net_device *ndev; struct encx24j600_priv *priv; u16 eidled; + u8 addr[ETH_ALEN]; ndev = alloc_etherdev(sizeof(struct encx24j600_priv)); @@ -1056,7 +1057,8 @@ static int encx24j600_spi_probe(struct spi_device *spi) } /* Get the MAC address from the chip */ - encx24j600_hw_get_macaddr(priv, ndev->dev_addr); + encx24j600_hw_get_macaddr(priv, addr); + eth_hw_addr_set(ndev, addr); ndev->ethtool_ops = &encx24j600_ethtool_ops; diff --git a/drivers/net/ethernet/natsemi/natsemi.c b/drivers/net/ethernet/natsemi/natsemi.c index 3f982033944b..82a22711ce45 100644 --- a/drivers/net/ethernet/natsemi/natsemi.c +++ b/drivers/net/ethernet/natsemi/natsemi.c @@ -809,6 +809,7 @@ static int natsemi_probe1(struct pci_dev *pdev, const struct pci_device_id *ent) unsigned long iosize; void __iomem *ioaddr; const int pcibar = 1; /* PCI base address register */ + u8 addr[ETH_ALEN]; int prev_eedata; u32 tmp; @@ -859,10 +860,11 @@ static int natsemi_probe1(struct pci_dev *pdev, const struct pci_device_id *ent) prev_eedata = eeprom_read(ioaddr, 6); for (i = 0; i < 3; i++) { int eedata = eeprom_read(ioaddr, i + 7); - dev->dev_addr[i*2] = (eedata << 1) + (prev_eedata >> 15); - dev->dev_addr[i*2+1] = eedata >> 7; + addr[i*2] = (eedata << 1) + (prev_eedata >> 15); + addr[i*2+1] = eedata >> 7; prev_eedata = eedata; } + eth_hw_addr_set(dev, addr); np = netdev_priv(dev); np->ioaddr = ioaddr; diff --git a/drivers/net/ethernet/natsemi/ns83820.c b/drivers/net/ethernet/natsemi/ns83820.c index 72794d158871..49ea130c9067 100644 --- a/drivers/net/ethernet/natsemi/ns83820.c +++ b/drivers/net/ethernet/natsemi/ns83820.c @@ -1649,9 +1649,11 @@ failed: return ret; } -static void ns83820_getmac(struct ns83820 *dev, u8 *mac) +static void ns83820_getmac(struct ns83820 *dev, struct net_device *ndev) { + u8 mac[ETH_ALEN]; unsigned i; + for (i=0; i<3; i++) { u32 data; @@ -1661,9 +1663,10 @@ static void ns83820_getmac(struct ns83820 *dev, u8 *mac) writel(i*2, dev->base + RFCR); data = readl(dev->base + RFDR); - *mac++ = data; - *mac++ = data >> 8; + mac[i * 2] = data; + mac[i * 2 + 1] = data >> 8; } + eth_hw_addr_set(ndev, mac); } static void ns83820_set_multicast(struct net_device *ndev) @@ -2136,7 +2139,7 @@ static int ns83820_init_one(struct pci_dev *pci_dev, /* Disable Wake On Lan */ writel(0, dev->base + WCSR); - ns83820_getmac(dev, ndev->dev_addr); + ns83820_getmac(dev, ndev); /* Yes, we support dumb IP checksum on transmit */ ndev->features |= NETIF_F_SG; diff --git a/drivers/net/ethernet/packetengines/hamachi.c b/drivers/net/ethernet/packetengines/hamachi.c index 1a6336a56d3d..9c408328be0d 100644 --- a/drivers/net/ethernet/packetengines/hamachi.c +++ b/drivers/net/ethernet/packetengines/hamachi.c @@ -592,6 +592,7 @@ static int hamachi_init_one(struct pci_dev *pdev, void *ring_space; dma_addr_t ring_dma; int ret = -ENOMEM; + u8 addr[ETH_ALEN]; /* when built into the kernel, we only print version if device is found */ #ifndef MODULE @@ -628,8 +629,8 @@ static int hamachi_init_one(struct pci_dev *pdev, SET_NETDEV_DEV(dev, &pdev->dev); for (i = 0; i < 6; i++) - dev->dev_addr[i] = 1 ? read_eeprom(ioaddr, 4 + i) - : readb(ioaddr + StationAddr + i); + addr[i] = read_eeprom(ioaddr, 4 + i); + eth_hw_addr_set(dev, addr); #if ! defined(final_version) if (hamachi_debug > 4) diff --git a/drivers/net/ethernet/packetengines/yellowfin.c b/drivers/net/ethernet/packetengines/yellowfin.c index f5cd8f51be7c..12105f62cbdd 100644 --- a/drivers/net/ethernet/packetengines/yellowfin.c +++ b/drivers/net/ethernet/packetengines/yellowfin.c @@ -384,6 +384,7 @@ static int yellowfin_init_one(struct pci_dev *pdev, #else int bar = 1; #endif + u8 addr[ETH_ALEN]; /* when built into the kernel, we only print version if device is found */ #ifndef MODULE @@ -416,12 +417,13 @@ static int yellowfin_init_one(struct pci_dev *pdev, if (drv_flags & DontUseEeprom) for (i = 0; i < 6; i++) - dev->dev_addr[i] = ioread8(ioaddr + StnAddr + i); + addr[i] = ioread8(ioaddr + StnAddr + i); else { int ee_offset = (read_eeprom(ioaddr, 6) == 0xff ? 0x100 : 0); for (i = 0; i < 6; i++) - dev->dev_addr[i] = read_eeprom(ioaddr, ee_offset + i); + addr[i] = read_eeprom(ioaddr, ee_offset + i); } + eth_hw_addr_set(dev, addr); /* Reset the chip. */ iowrite32(0x80000000, ioaddr + DMACtrl); diff --git a/drivers/net/ethernet/silan/sc92031.c b/drivers/net/ethernet/silan/sc92031.c index 1fd08a04bd4e..ff4197f5e46d 100644 --- a/drivers/net/ethernet/silan/sc92031.c +++ b/drivers/net/ethernet/silan/sc92031.c @@ -1400,6 +1400,7 @@ static int sc92031_probe(struct pci_dev *pdev, const struct pci_device_id *id) void __iomem* port_base; struct net_device *dev; struct sc92031_priv *priv; + u8 addr[ETH_ALEN]; u32 mac0, mac1; err = pci_enable_device(pdev); @@ -1458,12 +1459,13 @@ static int sc92031_probe(struct pci_dev *pdev, const struct pci_device_id *id) mac0 = ioread32(port_base + MAC0); mac1 = ioread32(port_base + MAC0 + 4); - dev->dev_addr[0] = mac0 >> 24; - dev->dev_addr[1] = mac0 >> 16; - dev->dev_addr[2] = mac0 >> 8; - dev->dev_addr[3] = mac0; - dev->dev_addr[4] = mac1 >> 8; - dev->dev_addr[5] = mac1; + addr[0] = mac0 >> 24; + addr[1] = mac0 >> 16; + addr[2] = mac0 >> 8; + addr[3] = mac0; + addr[4] = mac1 >> 8; + addr[5] = mac1; + eth_hw_addr_set(dev, addr); err = register_netdev(dev); if (err < 0) diff --git a/drivers/net/ethernet/smsc/smc91c92_cs.c b/drivers/net/ethernet/smsc/smc91c92_cs.c index e5658aa39765..37c822e27207 100644 --- a/drivers/net/ethernet/smsc/smc91c92_cs.c +++ b/drivers/net/ethernet/smsc/smc91c92_cs.c @@ -347,6 +347,7 @@ static void smc91c92_detach(struct pcmcia_device *link) static int cvt_ascii_address(struct net_device *dev, char *s) { + u8 mac[ETH_ALEN]; int i, j, da, c; if (strlen(s) != 12) @@ -359,8 +360,9 @@ static int cvt_ascii_address(struct net_device *dev, char *s) da += ((c >= '0') && (c <= '9')) ? (c - '0') : ((c & 0x0f) + 9); } - dev->dev_addr[i] = da; + mac[i] = da; } + eth_hw_addr_set(dev, mac); return 0; } @@ -539,6 +541,7 @@ static int mot_setup(struct pcmcia_device *link) struct net_device *dev = link->priv; unsigned int ioaddr = dev->base_addr; int i, wait, loop; + u8 mac[ETH_ALEN]; u_int addr; /* Read Ethernet address from Serial EEPROM */ @@ -559,9 +562,10 @@ static int mot_setup(struct pcmcia_device *link) return -1; addr = inw(ioaddr + GENERAL); - dev->dev_addr[2*i] = addr & 0xff; - dev->dev_addr[2*i+1] = (addr >> 8) & 0xff; + mac[2*i] = addr & 0xff; + mac[2*i+1] = (addr >> 8) & 0xff; } + eth_hw_addr_set(dev, mac); return 0; } diff --git a/drivers/net/ethernet/sun/sunvnet.c b/drivers/net/ethernet/sun/sunvnet.c index 58ee89223951..da8119625cf3 100644 --- a/drivers/net/ethernet/sun/sunvnet.c +++ b/drivers/net/ethernet/sun/sunvnet.c @@ -285,6 +285,7 @@ static struct vnet *vnet_new(const u64 *local_mac, struct vio_dev *vdev) { struct net_device *dev; + u8 addr[ETH_ALEN]; struct vnet *vp; int err, i; @@ -295,7 +296,8 @@ static struct vnet *vnet_new(const u64 *local_mac, dev->needed_tailroom = 8; for (i = 0; i < ETH_ALEN; i++) - dev->dev_addr[i] = (*local_mac >> (5 - i) * 8) & 0xff; + addr[i] = (*local_mac >> (5 - i) * 8) & 0xff; + eth_hw_addr_set(dev, addr); vp = netdev_priv(dev); diff --git a/drivers/net/ethernet/toshiba/tc35815.c b/drivers/net/ethernet/toshiba/tc35815.c index f8b9d10dc056..ce38f7515225 100644 --- a/drivers/net/ethernet/toshiba/tc35815.c +++ b/drivers/net/ethernet/toshiba/tc35815.c @@ -725,6 +725,7 @@ static int tc35815_init_dev_addr(struct net_device *dev) { struct tc35815_regs __iomem *tr = (struct tc35815_regs __iomem *)dev->base_addr; + u8 addr[ETH_ALEN]; int i; while (tc_readl(&tr->PROM_Ctl) & PROM_Busy) @@ -735,9 +736,10 @@ static int tc35815_init_dev_addr(struct net_device *dev) while (tc_readl(&tr->PROM_Ctl) & PROM_Busy) ; data = tc_readl(&tr->PROM_Data); - dev->dev_addr[i] = data & 0xff; - dev->dev_addr[i+1] = data >> 8; + addr[i] = data & 0xff; + addr[i+1] = data >> 8; } + eth_hw_addr_set(dev, addr); if (!is_valid_ether_addr(dev->dev_addr)) return tc35815_read_plat_dev_addr(dev); return 0; diff --git a/drivers/net/ethernet/xircom/xirc2ps_cs.c b/drivers/net/ethernet/xircom/xirc2ps_cs.c index ab513dcc3b22..f9587e55b842 100644 --- a/drivers/net/ethernet/xircom/xirc2ps_cs.c +++ b/drivers/net/ethernet/xircom/xirc2ps_cs.c @@ -671,7 +671,6 @@ static int pcmcia_get_mac_ce(struct pcmcia_device *p_dev, void *priv) { struct net_device *dev = priv; - int i; if (tuple->TupleDataLen != 13) return -EINVAL; @@ -679,8 +678,7 @@ static int pcmcia_get_mac_ce(struct pcmcia_device *p_dev, (tuple->TupleData[2] != 6)) return -EINVAL; /* another try (James Lehmer's CE2 version 4.1)*/ - for (i = 2; i < 6; i++) - dev->dev_addr[i] = tuple->TupleData[i+2]; + dev_addr_mod(dev, 2, &tuple->TupleData[2], 4); return 0; }; @@ -742,11 +740,9 @@ xirc2ps_config(struct pcmcia_device * link) len = pcmcia_get_tuple(link, 0x89, &buf); /* data layout looks like tuple 0x22 */ if (buf && len == 8) { - if (*buf == CISTPL_FUNCE_LAN_NODE_ID) { - int i; - for (i = 2; i < 6; i++) - dev->dev_addr[i] = buf[i+2]; - } else + if (*buf == CISTPL_FUNCE_LAN_NODE_ID) + dev_addr_mod(dev, 2, &buf[2], 4); + else err = -1; } kfree(buf); From 7bbbbfaa7a1b0b03890f25fba5f28bb8c7ef145a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alvin=20=C5=A0ipraga?= Date: Mon, 18 Oct 2021 11:37:56 +0200 Subject: [PATCH 294/440] ether: add EtherType for proprietary Realtek protocols MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a new EtherType ETH_P_REALTEK to the if_ether.h uapi header. The EtherType 0x8899 is used in a number of different protocols from Realtek Semiconductor Corp [1], so no general assumptions should be made when trying to decode such packets. Observed protocols include: 0x1 - Realtek Remote Control protocol [2] 0x2 - Echo protocol [2] 0x3 - Loop detection protocol [2] 0x4 - RTL8365MB 4- and 8-byte switch CPU tag protocols [3] 0x9 - RTL8306 switch CPU tag protocol [4] 0xA - RTL8366RB switch CPU tag protocol [4] [1] https://lore.kernel.org/netdev/CACRpkdYQthFgjwVzHyK3DeYUOdcYyWmdjDPG=Rf9B3VrJ12Rzg@mail.gmail.com/ [2] https://www.wireshark.org/lists/ethereal-dev/200409/msg00090.html [3] https://lore.kernel.org/netdev/20210822193145.1312668-4-alvin@pqrs.dk/ [4] https://lore.kernel.org/netdev/20200708122537.1341307-2-linus.walleij@linaro.org/ Suggested-by: Andrew Lunn Signed-off-by: Alvin Šipraga Reviewed-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/uapi/linux/if_ether.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index 5f589c7a8382..5da4ee234e0b 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -86,6 +86,7 @@ * over Ethernet */ #define ETH_P_PAE 0x888E /* Port Access Entity (IEEE 802.1X) */ +#define ETH_P_REALTEK 0x8899 /* Multiple proprietary protocols */ #define ETH_P_AOE 0x88A2 /* ATA over Ethernet */ #define ETH_P_8021AD 0x88A8 /* 802.1ad Service VLAN */ #define ETH_P_802_EX1 0x88B5 /* 802.1 Local Experimental 1. */ From 487d3855b641b22a5875166de914c7253b63368b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alvin=20=C5=A0ipraga?= Date: Mon, 18 Oct 2021 11:37:57 +0200 Subject: [PATCH 295/440] net: dsa: allow reporting of standard ethtool stats for slave devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Jakub pointed out that we have a new ethtool API for reporting device statistics in a standardized way, via .get_eth_{phy,mac,ctrl}_stats. Add a small amount of plumbing to allow DSA drivers to take advantage of this when exposing statistics. Suggested-by: Jakub Kicinski Signed-off-by: Alvin Šipraga Signed-off-by: David S. Miller --- include/net/dsa.h | 6 ++++++ net/dsa/slave.c | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/include/net/dsa.h b/include/net/dsa.h index d784e76113b8..8fefd58ced8f 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -645,6 +645,12 @@ struct dsa_switch_ops { int (*get_sset_count)(struct dsa_switch *ds, int port, int sset); void (*get_ethtool_phy_stats)(struct dsa_switch *ds, int port, uint64_t *data); + void (*get_eth_phy_stats)(struct dsa_switch *ds, int port, + struct ethtool_eth_phy_stats *phy_stats); + void (*get_eth_mac_stats)(struct dsa_switch *ds, int port, + struct ethtool_eth_mac_stats *mac_stats); + void (*get_eth_ctrl_stats)(struct dsa_switch *ds, int port, + struct ethtool_eth_ctrl_stats *ctrl_stats); void (*get_stats64)(struct dsa_switch *ds, int port, struct rtnl_link_stats64 *s); void (*self_test)(struct dsa_switch *ds, int port, diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 11ec9e689589..499f8d18c76d 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -789,6 +789,37 @@ static int dsa_slave_get_sset_count(struct net_device *dev, int sset) return -EOPNOTSUPP; } +static void dsa_slave_get_eth_phy_stats(struct net_device *dev, + struct ethtool_eth_phy_stats *phy_stats) +{ + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; + + if (ds->ops->get_eth_phy_stats) + ds->ops->get_eth_phy_stats(ds, dp->index, phy_stats); +} + +static void dsa_slave_get_eth_mac_stats(struct net_device *dev, + struct ethtool_eth_mac_stats *mac_stats) +{ + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; + + if (ds->ops->get_eth_mac_stats) + ds->ops->get_eth_mac_stats(ds, dp->index, mac_stats); +} + +static void +dsa_slave_get_eth_ctrl_stats(struct net_device *dev, + struct ethtool_eth_ctrl_stats *ctrl_stats) +{ + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; + + if (ds->ops->get_eth_ctrl_stats) + ds->ops->get_eth_ctrl_stats(ds, dp->index, ctrl_stats); +} + static void dsa_slave_net_selftest(struct net_device *ndev, struct ethtool_test *etest, u64 *buf) { @@ -1695,6 +1726,9 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = { .get_strings = dsa_slave_get_strings, .get_ethtool_stats = dsa_slave_get_ethtool_stats, .get_sset_count = dsa_slave_get_sset_count, + .get_eth_phy_stats = dsa_slave_get_eth_phy_stats, + .get_eth_mac_stats = dsa_slave_get_eth_mac_stats, + .get_eth_ctrl_stats = dsa_slave_get_eth_ctrl_stats, .set_wol = dsa_slave_set_wol, .get_wol = dsa_slave_get_wol, .set_eee = dsa_slave_set_eee, From 9cb8edda2157afcd70dd90eb601b4cbc786c2e70 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alvin=20=C5=A0ipraga?= Date: Mon, 18 Oct 2021 11:37:58 +0200 Subject: [PATCH 296/440] net: dsa: move NET_DSA_TAG_RTL4_A to right place in Kconfig/Makefile MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move things around a little so that this tag driver is alphabetically ordered. The Kconfig file is sorted based on the tristate text. Suggested-by: Andrew Lunn Signed-off-by: Alvin Šipraga Reviewed-by: Vladimir Oltean Reviewed-by: Linus Walleij Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/Kconfig | 14 +++++++------- net/dsa/Makefile | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig index d8ee15f1c7a9..3a09784dae63 100644 --- a/net/dsa/Kconfig +++ b/net/dsa/Kconfig @@ -92,13 +92,6 @@ config NET_DSA_TAG_KSZ Say Y if you want to enable support for tagging frames for the Microchip 8795/9477/9893 families of switches. -config NET_DSA_TAG_RTL4_A - tristate "Tag driver for Realtek 4 byte protocol A tags" - help - Say Y or M if you want to enable support for tagging frames for the - Realtek switches with 4 byte protocol A tags, sich as found in - the Realtek RTL8366RB. - config NET_DSA_TAG_OCELOT tristate "Tag driver for Ocelot family of switches, using NPI port" select PACKING @@ -126,6 +119,13 @@ config NET_DSA_TAG_QCA Say Y or M if you want to enable support for tagging frames for the Qualcomm Atheros QCA8K switches. +config NET_DSA_TAG_RTL4_A + tristate "Tag driver for Realtek 4 byte protocol A tags" + help + Say Y or M if you want to enable support for tagging frames for the + Realtek switches with 4 byte protocol A tags, sich as found in + the Realtek RTL8366RB. + config NET_DSA_TAG_LAN9303 tristate "Tag driver for SMSC/Microchip LAN9303 family of switches" help diff --git a/net/dsa/Makefile b/net/dsa/Makefile index 67ea009f242c..f78d537044db 100644 --- a/net/dsa/Makefile +++ b/net/dsa/Makefile @@ -10,12 +10,12 @@ obj-$(CONFIG_NET_DSA_TAG_DSA_COMMON) += tag_dsa.o obj-$(CONFIG_NET_DSA_TAG_GSWIP) += tag_gswip.o obj-$(CONFIG_NET_DSA_TAG_HELLCREEK) += tag_hellcreek.o obj-$(CONFIG_NET_DSA_TAG_KSZ) += tag_ksz.o -obj-$(CONFIG_NET_DSA_TAG_RTL4_A) += tag_rtl4_a.o obj-$(CONFIG_NET_DSA_TAG_LAN9303) += tag_lan9303.o obj-$(CONFIG_NET_DSA_TAG_MTK) += tag_mtk.o obj-$(CONFIG_NET_DSA_TAG_OCELOT) += tag_ocelot.o obj-$(CONFIG_NET_DSA_TAG_OCELOT_8021Q) += tag_ocelot_8021q.o obj-$(CONFIG_NET_DSA_TAG_QCA) += tag_qca.o +obj-$(CONFIG_NET_DSA_TAG_RTL4_A) += tag_rtl4_a.o obj-$(CONFIG_NET_DSA_TAG_SJA1105) += tag_sja1105.o obj-$(CONFIG_NET_DSA_TAG_TRAILER) += tag_trailer.o obj-$(CONFIG_NET_DSA_TAG_XRS700X) += tag_xrs700x.o From 2e405875f39ff24aa88a21f90f63e5e18b344e8c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alvin=20=C5=A0ipraga?= Date: Mon, 18 Oct 2021 11:37:59 +0200 Subject: [PATCH 297/440] dt-bindings: net: dsa: realtek-smi: document new compatible rtl8365mb MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit rtl8365mb is a new realtek-smi subdriver for the RTL8365MB-VC 4+1 port 10/100/1000M Ethernet switch controller. Its compatible string is "realtek,rtl8365mb". Signed-off-by: Alvin Šipraga Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Acked-by: Rob Herring Reviewed-by: Linus Walleij Signed-off-by: David S. Miller --- .../bindings/net/dsa/realtek-smi.txt | 87 +++++++++++++++++++ 1 file changed, 87 insertions(+) diff --git a/Documentation/devicetree/bindings/net/dsa/realtek-smi.txt b/Documentation/devicetree/bindings/net/dsa/realtek-smi.txt index b6ae8541bd55..7959ec237983 100644 --- a/Documentation/devicetree/bindings/net/dsa/realtek-smi.txt +++ b/Documentation/devicetree/bindings/net/dsa/realtek-smi.txt @@ -9,6 +9,7 @@ SMI-based Realtek devices. Required properties: - compatible: must be exactly one of: + "realtek,rtl8365mb" (4+1 ports) "realtek,rtl8366" "realtek,rtl8366rb" (4+1 ports) "realtek,rtl8366s" (4+1 ports) @@ -62,6 +63,8 @@ and subnodes of DSA switches. Examples: +An example for the RTL8366RB: + switch { compatible = "realtek,rtl8366rb"; /* 22 = MDIO (has input reads), 21 = MDC (clock, output only) */ @@ -151,3 +154,87 @@ switch { }; }; }; + +An example for the RTL8365MB-VC: + +switch { + compatible = "realtek,rtl8365mb"; + mdc-gpios = <&gpio1 16 GPIO_ACTIVE_HIGH>; + mdio-gpios = <&gpio1 17 GPIO_ACTIVE_HIGH>; + reset-gpios = <&gpio5 0 GPIO_ACTIVE_LOW>; + + switch_intc: interrupt-controller { + interrupt-parent = <&gpio5>; + interrupts = <1 IRQ_TYPE_LEVEL_LOW>; + interrupt-controller; + #address-cells = <0>; + #interrupt-cells = <1>; + }; + + ports { + #address-cells = <1>; + #size-cells = <0>; + reg = <0>; + port@0 { + reg = <0>; + label = "swp0"; + phy-handle = <ðphy0>; + }; + port@1 { + reg = <1>; + label = "swp1"; + phy-handle = <ðphy1>; + }; + port@2 { + reg = <2>; + label = "swp2"; + phy-handle = <ðphy2>; + }; + port@3 { + reg = <3>; + label = "swp3"; + phy-handle = <ðphy3>; + }; + port@6 { + reg = <6>; + label = "cpu"; + ethernet = <&fec1>; + phy-mode = "rgmii"; + tx-internal-delay-ps = <2000>; + rx-internal-delay-ps = <2000>; + + fixed-link { + speed = <1000>; + full-duplex; + pause; + }; + }; + }; + + mdio { + compatible = "realtek,smi-mdio"; + #address-cells = <1>; + #size-cells = <0>; + + ethphy0: phy@0 { + reg = <0>; + interrupt-parent = <&switch_intc>; + interrupts = <0>; + }; + ethphy1: phy@1 { + reg = <1>; + interrupt-parent = <&switch_intc>; + interrupts = <1>; + }; + ethphy2: phy@2 { + reg = <2>; + interrupt-parent = <&switch_intc>; + interrupts = <2>; + }; + ethphy3: phy@3 { + reg = <3>; + interrupt-parent = <&switch_intc>; + interrupts = <3>; + }; + }; +}; From 1521d5adfc2b557e15f97283c8b7ad688c3ebc40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alvin=20=C5=A0ipraga?= Date: Mon, 18 Oct 2021 11:38:00 +0200 Subject: [PATCH 298/440] net: dsa: tag_rtl8_4: add realtek 8 byte protocol 4 tag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit implements a basic version of the 8 byte tag protocol used in the Realtek RTL8365MB-VC unmanaged switch, which carries with it a protocol version of 0x04. The implementation itself only handles the parsing of the EtherType value and Realtek protocol version, together with the source or destination port fields. The rest is left unimplemented for now. The tag format is described in a confidential document provided to my company by Realtek Semiconductor Corp. Permission has been granted by the vendor to publish this driver based on that material, together with an extract from the document describing the tag format and its fields. It is hoped that this will help future implementors who do not have access to the material but who wish to extend the functionality of drivers for chips which use this protocol. In addition, two possible values of the REASON field are specified, based on experiments on my end. Realtek does not specify what value this field can take. Signed-off-by: Alvin Šipraga Reviewed-by: Vladimir Oltean Reviewed-by: Linus Walleij Reviewed-by: Florian Fainelli Tested-by: Arınç ÜNAL Signed-off-by: David S. Miller --- include/net/dsa.h | 2 + net/dsa/Kconfig | 6 ++ net/dsa/Makefile | 1 + net/dsa/tag_rtl8_4.c | 178 +++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 187 insertions(+) create mode 100644 net/dsa/tag_rtl8_4.c diff --git a/include/net/dsa.h b/include/net/dsa.h index 8fefd58ced8f..05ebdd8d5321 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -51,6 +51,7 @@ struct phylink_link_state; #define DSA_TAG_PROTO_SEVILLE_VALUE 21 #define DSA_TAG_PROTO_BRCM_LEGACY_VALUE 22 #define DSA_TAG_PROTO_SJA1110_VALUE 23 +#define DSA_TAG_PROTO_RTL8_4_VALUE 24 enum dsa_tag_protocol { DSA_TAG_PROTO_NONE = DSA_TAG_PROTO_NONE_VALUE, @@ -77,6 +78,7 @@ enum dsa_tag_protocol { DSA_TAG_PROTO_OCELOT_8021Q = DSA_TAG_PROTO_OCELOT_8021Q_VALUE, DSA_TAG_PROTO_SEVILLE = DSA_TAG_PROTO_SEVILLE_VALUE, DSA_TAG_PROTO_SJA1110 = DSA_TAG_PROTO_SJA1110_VALUE, + DSA_TAG_PROTO_RTL8_4 = DSA_TAG_PROTO_RTL8_4_VALUE, }; struct dsa_switch; diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig index 3a09784dae63..8cb87b5067ee 100644 --- a/net/dsa/Kconfig +++ b/net/dsa/Kconfig @@ -126,6 +126,12 @@ config NET_DSA_TAG_RTL4_A Realtek switches with 4 byte protocol A tags, sich as found in the Realtek RTL8366RB. +config NET_DSA_TAG_RTL8_4 + tristate "Tag driver for Realtek 8 byte protocol 4 tags" + help + Say Y or M if you want to enable support for tagging frames for Realtek + switches with 8 byte protocol 4 tags, such as the Realtek RTL8365MB-VC. + config NET_DSA_TAG_LAN9303 tristate "Tag driver for SMSC/Microchip LAN9303 family of switches" help diff --git a/net/dsa/Makefile b/net/dsa/Makefile index f78d537044db..9f75820e7c98 100644 --- a/net/dsa/Makefile +++ b/net/dsa/Makefile @@ -16,6 +16,7 @@ obj-$(CONFIG_NET_DSA_TAG_OCELOT) += tag_ocelot.o obj-$(CONFIG_NET_DSA_TAG_OCELOT_8021Q) += tag_ocelot_8021q.o obj-$(CONFIG_NET_DSA_TAG_QCA) += tag_qca.o obj-$(CONFIG_NET_DSA_TAG_RTL4_A) += tag_rtl4_a.o +obj-$(CONFIG_NET_DSA_TAG_RTL8_4) += tag_rtl8_4.o obj-$(CONFIG_NET_DSA_TAG_SJA1105) += tag_sja1105.o obj-$(CONFIG_NET_DSA_TAG_TRAILER) += tag_trailer.o obj-$(CONFIG_NET_DSA_TAG_XRS700X) += tag_xrs700x.o diff --git a/net/dsa/tag_rtl8_4.c b/net/dsa/tag_rtl8_4.c new file mode 100644 index 000000000000..02686ad4045d --- /dev/null +++ b/net/dsa/tag_rtl8_4.c @@ -0,0 +1,178 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Handler for Realtek 8 byte switch tags + * + * Copyright (C) 2021 Alvin Šipraga + * + * NOTE: Currently only supports protocol "4" found in the RTL8365MB, hence + * named tag_rtl8_4. + * + * This tag header has the following format: + * + * ------------------------------------------- + * | MAC DA | MAC SA | 8 byte tag | Type | ... + * ------------------------------------------- + * _______________/ \______________________________________ + * / \ + * 0 7|8 15 + * |-----------------------------------+-----------------------------------|--- + * | (16-bit) | ^ + * | Realtek EtherType [0x8899] | | + * |-----------------------------------+-----------------------------------| 8 + * | (8-bit) | (8-bit) | + * | Protocol [0x04] | REASON | b + * |-----------------------------------+-----------------------------------| y + * | (1) | (1) | (2) | (1) | (3) | (1) | (1) | (1) | (5) | t + * | FID_EN | X | FID | PRI_EN | PRI | KEEP | X | LEARN_DIS | X | e + * |-----------------------------------+-----------------------------------| s + * | (1) | (15-bit) | | + * | ALLOW | TX/RX | v + * |-----------------------------------+-----------------------------------|--- + * + * With the following field descriptions: + * + * field | description + * ------------+------------- + * Realtek | 0x8899: indicates that this is a proprietary Realtek tag; + * EtherType | note that Realtek uses the same EtherType for + * | other incompatible tag formats (e.g. tag_rtl4_a.c) + * Protocol | 0x04: indicates that this tag conforms to this format + * X | reserved + * ------------+------------- + * REASON | reason for forwarding packet to CPU + * | 0: packet was forwarded or flooded to CPU + * | 80: packet was trapped to CPU + * FID_EN | 1: packet has an FID + * | 0: no FID + * FID | FID of packet (if FID_EN=1) + * PRI_EN | 1: force priority of packet + * | 0: don't force priority + * PRI | priority of packet (if PRI_EN=1) + * KEEP | preserve packet VLAN tag format + * LEARN_DIS | don't learn the source MAC address of the packet + * ALLOW | 1: treat TX/RX field as an allowance port mask, meaning the + * | packet may only be forwarded to ports specified in the + * | mask + * | 0: no allowance port mask, TX/RX field is the forwarding + * | port mask + * TX/RX | TX (switch->CPU): port number the packet was received on + * | RX (CPU->switch): forwarding port mask (if ALLOW=0) + * | allowance port mask (if ALLOW=1) + */ + +#include +#include +#include + +#include "dsa_priv.h" + +/* Protocols supported: + * + * 0x04 = RTL8365MB DSA protocol + */ + +#define RTL8_4_TAG_LEN 8 + +#define RTL8_4_PROTOCOL GENMASK(15, 8) +#define RTL8_4_PROTOCOL_RTL8365MB 0x04 +#define RTL8_4_REASON GENMASK(7, 0) +#define RTL8_4_REASON_FORWARD 0 +#define RTL8_4_REASON_TRAP 80 + +#define RTL8_4_LEARN_DIS BIT(5) + +#define RTL8_4_TX GENMASK(3, 0) +#define RTL8_4_RX GENMASK(10, 0) + +static struct sk_buff *rtl8_4_tag_xmit(struct sk_buff *skb, + struct net_device *dev) +{ + struct dsa_port *dp = dsa_slave_to_port(dev); + __be16 *tag; + + skb_push(skb, RTL8_4_TAG_LEN); + + dsa_alloc_etype_header(skb, RTL8_4_TAG_LEN); + tag = dsa_etype_header_pos_tx(skb); + + /* Set Realtek EtherType */ + tag[0] = htons(ETH_P_REALTEK); + + /* Set Protocol; zero REASON */ + tag[1] = htons(FIELD_PREP(RTL8_4_PROTOCOL, RTL8_4_PROTOCOL_RTL8365MB)); + + /* Zero FID_EN, FID, PRI_EN, PRI, KEEP; set LEARN_DIS */ + tag[2] = htons(FIELD_PREP(RTL8_4_LEARN_DIS, 1)); + + /* Zero ALLOW; set RX (CPU->switch) forwarding port mask */ + tag[3] = htons(FIELD_PREP(RTL8_4_RX, BIT(dp->index))); + + return skb; +} + +static struct sk_buff *rtl8_4_tag_rcv(struct sk_buff *skb, + struct net_device *dev) +{ + __be16 *tag; + u16 etype; + u8 reason; + u8 proto; + u8 port; + + if (unlikely(!pskb_may_pull(skb, RTL8_4_TAG_LEN))) + return NULL; + + tag = dsa_etype_header_pos_rx(skb); + + /* Parse Realtek EtherType */ + etype = ntohs(tag[0]); + if (unlikely(etype != ETH_P_REALTEK)) { + dev_warn_ratelimited(&dev->dev, + "non-realtek ethertype 0x%04x\n", etype); + return NULL; + } + + /* Parse Protocol */ + proto = FIELD_GET(RTL8_4_PROTOCOL, ntohs(tag[1])); + if (unlikely(proto != RTL8_4_PROTOCOL_RTL8365MB)) { + dev_warn_ratelimited(&dev->dev, + "unknown realtek protocol 0x%02x\n", + proto); + return NULL; + } + + /* Parse REASON */ + reason = FIELD_GET(RTL8_4_REASON, ntohs(tag[1])); + + /* Parse TX (switch->CPU) */ + port = FIELD_GET(RTL8_4_TX, ntohs(tag[3])); + skb->dev = dsa_master_find_slave(dev, 0, port); + if (!skb->dev) { + dev_warn_ratelimited(&dev->dev, + "could not find slave for port %d\n", + port); + return NULL; + } + + /* Remove tag and recalculate checksum */ + skb_pull_rcsum(skb, RTL8_4_TAG_LEN); + + dsa_strip_etype_header(skb, RTL8_4_TAG_LEN); + + if (reason != RTL8_4_REASON_TRAP) + dsa_default_offload_fwd_mark(skb); + + return skb; +} + +static const struct dsa_device_ops rtl8_4_netdev_ops = { + .name = "rtl8_4", + .proto = DSA_TAG_PROTO_RTL8_4, + .xmit = rtl8_4_tag_xmit, + .rcv = rtl8_4_tag_rcv, + .needed_headroom = RTL8_4_TAG_LEN, +}; +module_dsa_tag_driver(rtl8_4_netdev_ops); + +MODULE_LICENSE("GPL"); +MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_RTL8_4); From 4af2950c50c8634ed2865cf81e607034f78b84aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alvin=20=C5=A0ipraga?= Date: Mon, 18 Oct 2021 11:38:01 +0200 Subject: [PATCH 299/440] net: dsa: realtek-smi: add rtl8365mb subdriver for RTL8365MB-VC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds a realtek-smi subdriver for the RTL8365MB-VC 4+1 port 10/100/1000M switch controller. The driver has been developed based on a GPL-licensed OS-agnostic Realtek vendor driver known as rtl8367c found in the OpenWrt source tree. Despite the name, the RTL8365MB-VC has an entirely different register layout to the already-supported RTL8366RB ASIC. Notwithstanding this, the structure of the rtl8365mb subdriver is loosely based on the rtl8366rb subdriver. Like the 'rb, it establishes its own irqchip to handle cascaded PHY link status interrupts. The RTL8365MB-VC switch is capable of offloading a large number of features from the software, but this patch introduces only the most basic DSA driver functionality. The ports always function as standalone ports, with bridging handled in software. One more thing. Realtek's nomenclature for switches makes it hard to know exactly what other ASICs might be supported by this driver. The vendor driver goes by the name rtl8367c, but as far as I can tell, no chip actually exists under this name. As such, the subdriver is named rtl8365mb to emphasize the potentially limited support. But it is clear from the vendor sources that a number of other more advanced switches share a similar register layout, and further support should not be too hard to add given access to the relevant hardware. With this in mind, the subdriver has been written with as few assumptions about the particular chip as is reasonable. But the RTL8365MB-VC is the only hardware I have available, so some further work is surely needed. Co-developed-by: Michael Rasmussen Signed-off-by: Michael Rasmussen Signed-off-by: Alvin Šipraga Reviewed-by: Vladimir Oltean Reviewed-by: Linus Walleij Reviewed-by: Florian Fainelli Tested-by: Arınç ÜNAL Signed-off-by: David S. Miller --- drivers/net/dsa/Kconfig | 1 + drivers/net/dsa/Makefile | 2 +- drivers/net/dsa/realtek-smi-core.c | 4 + drivers/net/dsa/realtek-smi-core.h | 1 + drivers/net/dsa/rtl8365mb.c | 1982 ++++++++++++++++++++++++++++ 5 files changed, 1989 insertions(+), 1 deletion(-) create mode 100644 drivers/net/dsa/rtl8365mb.c diff --git a/drivers/net/dsa/Kconfig b/drivers/net/dsa/Kconfig index a5f1aa911fe2..7b1457a6e327 100644 --- a/drivers/net/dsa/Kconfig +++ b/drivers/net/dsa/Kconfig @@ -70,6 +70,7 @@ config NET_DSA_QCA8K config NET_DSA_REALTEK_SMI tristate "Realtek SMI Ethernet switch family support" select NET_DSA_TAG_RTL4_A + select NET_DSA_TAG_RTL8_4 select FIXED_PHY select IRQ_DOMAIN select REALTEK_PHY diff --git a/drivers/net/dsa/Makefile b/drivers/net/dsa/Makefile index f3598c040994..8da1569a34e6 100644 --- a/drivers/net/dsa/Makefile +++ b/drivers/net/dsa/Makefile @@ -10,7 +10,7 @@ obj-$(CONFIG_NET_DSA_MT7530) += mt7530.o obj-$(CONFIG_NET_DSA_MV88E6060) += mv88e6060.o obj-$(CONFIG_NET_DSA_QCA8K) += qca8k.o obj-$(CONFIG_NET_DSA_REALTEK_SMI) += realtek-smi.o -realtek-smi-objs := realtek-smi-core.o rtl8366.o rtl8366rb.o +realtek-smi-objs := realtek-smi-core.o rtl8366.o rtl8366rb.o rtl8365mb.o obj-$(CONFIG_NET_DSA_SMSC_LAN9303) += lan9303-core.o obj-$(CONFIG_NET_DSA_SMSC_LAN9303_I2C) += lan9303_i2c.o obj-$(CONFIG_NET_DSA_SMSC_LAN9303_MDIO) += lan9303_mdio.o diff --git a/drivers/net/dsa/realtek-smi-core.c b/drivers/net/dsa/realtek-smi-core.c index 2fcfd917b876..c66ebd0ee217 100644 --- a/drivers/net/dsa/realtek-smi-core.c +++ b/drivers/net/dsa/realtek-smi-core.c @@ -501,6 +501,10 @@ static const struct of_device_id realtek_smi_of_match[] = { .compatible = "realtek,rtl8366s", .data = NULL, }, + { + .compatible = "realtek,rtl8365mb", + .data = &rtl8365mb_variant, + }, { /* sentinel */ }, }; MODULE_DEVICE_TABLE(of, realtek_smi_of_match); diff --git a/drivers/net/dsa/realtek-smi-core.h b/drivers/net/dsa/realtek-smi-core.h index 214f710d7dd5..5bfa53e2480a 100644 --- a/drivers/net/dsa/realtek-smi-core.h +++ b/drivers/net/dsa/realtek-smi-core.h @@ -140,5 +140,6 @@ int rtl8366_get_sset_count(struct dsa_switch *ds, int port, int sset); void rtl8366_get_ethtool_stats(struct dsa_switch *ds, int port, uint64_t *data); extern const struct realtek_smi_variant rtl8366rb_variant; +extern const struct realtek_smi_variant rtl8365mb_variant; #endif /* _REALTEK_SMI_H */ diff --git a/drivers/net/dsa/rtl8365mb.c b/drivers/net/dsa/rtl8365mb.c new file mode 100644 index 000000000000..baaae97283c5 --- /dev/null +++ b/drivers/net/dsa/rtl8365mb.c @@ -0,0 +1,1982 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Realtek SMI subdriver for the Realtek RTL8365MB-VC ethernet switch. + * + * Copyright (C) 2021 Alvin Šipraga + * Copyright (C) 2021 Michael Rasmussen + * + * The RTL8365MB-VC is a 4+1 port 10/100/1000M switch controller. It includes 4 + * integrated PHYs for the user facing ports, and an extension interface which + * can be connected to the CPU - or another PHY - via either MII, RMII, or + * RGMII. The switch is configured via the Realtek Simple Management Interface + * (SMI), which uses the MDIO/MDC lines. + * + * Below is a simplified block diagram of the chip and its relevant interfaces. + * + * .-----------------------------------. + * | | + * UTP <---------------> Giga PHY <-> PCS <-> P0 GMAC | + * UTP <---------------> Giga PHY <-> PCS <-> P1 GMAC | + * UTP <---------------> Giga PHY <-> PCS <-> P2 GMAC | + * UTP <---------------> Giga PHY <-> PCS <-> P3 GMAC | + * | | + * CPU/PHY <-MII/RMII/RGMII---> Extension <---> Extension | + * | interface 1 GMAC 1 | + * | | + * SMI driver/ <-MDC/SCL---> Management ~~~~~~~~~~~~~~ | + * EEPROM <-MDIO/SDA--> interface ~REALTEK ~~~~~ | + * | ~RTL8365MB ~~~ | + * | ~GXXXC TAIWAN~ | + * GPIO <--------------> Reset ~~~~~~~~~~~~~~ | + * | | + * Interrupt <----------> Link UP/DOWN events | + * controller | | + * '-----------------------------------' + * + * The driver uses DSA to integrate the 4 user and 1 extension ports into the + * kernel. Netdevices are created for the user ports, as are PHY devices for + * their integrated PHYs. The device tree firmware should also specify the link + * partner of the extension port - either via a fixed-link or other phy-handle. + * See the device tree bindings for more detailed information. Note that the + * driver has only been tested with a fixed-link, but in principle it should not + * matter. + * + * NOTE: Currently, only the RGMII interface is implemented in this driver. + * + * The interrupt line is asserted on link UP/DOWN events. The driver creates a + * custom irqchip to handle this interrupt and demultiplex the events by reading + * the status registers via SMI. Interrupts are then propagated to the relevant + * PHY device. + * + * The EEPROM contains initial register values which the chip will read over I2C + * upon hardware reset. It is also possible to omit the EEPROM. In both cases, + * the driver will manually reprogram some registers using jam tables to reach + * an initial state defined by the vendor driver. + * + * This Linux driver is written based on an OS-agnostic vendor driver from + * Realtek. The reference GPL-licensed sources can be found in the OpenWrt + * source tree under the name rtl8367c. The vendor driver claims to support a + * number of similar switch controllers from Realtek, but the only hardware we + * have is the RTL8365MB-VC. Moreover, there does not seem to be any chip under + * the name RTL8367C. Although one wishes that the 'C' stood for some kind of + * common hardware revision, there exist examples of chips with the suffix -VC + * which are explicitly not supported by the rtl8367c driver and which instead + * require the rtl8367d vendor driver. With all this uncertainty, the driver has + * been modestly named rtl8365mb. Future implementors may wish to rename things + * accordingly. + * + * In the same family of chips, some carry up to 8 user ports and up to 2 + * extension ports. Where possible this driver tries to make things generic, but + * more work must be done to support these configurations. According to + * documentation from Realtek, the family should include the following chips: + * + * - RTL8363NB + * - RTL8363NB-VB + * - RTL8363SC + * - RTL8363SC-VB + * - RTL8364NB + * - RTL8364NB-VB + * - RTL8365MB-VC + * - RTL8366SC + * - RTL8367RB-VB + * - RTL8367SB + * - RTL8367S + * - RTL8370MB + * - RTL8310SR + * + * Some of the register logic for these additional chips has been skipped over + * while implementing this driver. It is therefore not possible to assume that + * things will work out-of-the-box for other chips, and a careful review of the + * vendor driver may be needed to expand support. The RTL8365MB-VC seems to be + * one of the simpler chips. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "realtek-smi-core.h" + +/* Chip-specific data and limits */ +#define RTL8365MB_CHIP_ID_8365MB_VC 0x6367 +#define RTL8365MB_CPU_PORT_NUM_8365MB_VC 6 +#define RTL8365MB_LEARN_LIMIT_MAX_8365MB_VC 2112 + +/* Family-specific data and limits */ +#define RTL8365MB_NUM_PHYREGS 32 +#define RTL8365MB_PHYREGMAX (RTL8365MB_NUM_PHYREGS - 1) +#define RTL8365MB_MAX_NUM_PORTS (RTL8365MB_CPU_PORT_NUM_8365MB_VC + 1) + +/* Chip identification registers */ +#define RTL8365MB_CHIP_ID_REG 0x1300 + +#define RTL8365MB_CHIP_VER_REG 0x1301 + +#define RTL8365MB_MAGIC_REG 0x13C2 +#define RTL8365MB_MAGIC_VALUE 0x0249 + +/* Chip reset register */ +#define RTL8365MB_CHIP_RESET_REG 0x1322 +#define RTL8365MB_CHIP_RESET_SW_MASK 0x0002 +#define RTL8365MB_CHIP_RESET_HW_MASK 0x0001 + +/* Interrupt polarity register */ +#define RTL8365MB_INTR_POLARITY_REG 0x1100 +#define RTL8365MB_INTR_POLARITY_MASK 0x0001 +#define RTL8365MB_INTR_POLARITY_HIGH 0 +#define RTL8365MB_INTR_POLARITY_LOW 1 + +/* Interrupt control/status register - enable/check specific interrupt types */ +#define RTL8365MB_INTR_CTRL_REG 0x1101 +#define RTL8365MB_INTR_STATUS_REG 0x1102 +#define RTL8365MB_INTR_SLIENT_START_2_MASK 0x1000 +#define RTL8365MB_INTR_SLIENT_START_MASK 0x0800 +#define RTL8365MB_INTR_ACL_ACTION_MASK 0x0200 +#define RTL8365MB_INTR_CABLE_DIAG_FIN_MASK 0x0100 +#define RTL8365MB_INTR_INTERRUPT_8051_MASK 0x0080 +#define RTL8365MB_INTR_LOOP_DETECTION_MASK 0x0040 +#define RTL8365MB_INTR_GREEN_TIMER_MASK 0x0020 +#define RTL8365MB_INTR_SPECIAL_CONGEST_MASK 0x0010 +#define RTL8365MB_INTR_SPEED_CHANGE_MASK 0x0008 +#define RTL8365MB_INTR_LEARN_OVER_MASK 0x0004 +#define RTL8365MB_INTR_METER_EXCEEDED_MASK 0x0002 +#define RTL8365MB_INTR_LINK_CHANGE_MASK 0x0001 +#define RTL8365MB_INTR_ALL_MASK \ + (RTL8365MB_INTR_SLIENT_START_2_MASK | \ + RTL8365MB_INTR_SLIENT_START_MASK | \ + RTL8365MB_INTR_ACL_ACTION_MASK | \ + RTL8365MB_INTR_CABLE_DIAG_FIN_MASK | \ + RTL8365MB_INTR_INTERRUPT_8051_MASK | \ + RTL8365MB_INTR_LOOP_DETECTION_MASK | \ + RTL8365MB_INTR_GREEN_TIMER_MASK | \ + RTL8365MB_INTR_SPECIAL_CONGEST_MASK | \ + RTL8365MB_INTR_SPEED_CHANGE_MASK | \ + RTL8365MB_INTR_LEARN_OVER_MASK | \ + RTL8365MB_INTR_METER_EXCEEDED_MASK | \ + RTL8365MB_INTR_LINK_CHANGE_MASK) + +/* Per-port interrupt type status registers */ +#define RTL8365MB_PORT_LINKDOWN_IND_REG 0x1106 +#define RTL8365MB_PORT_LINKDOWN_IND_MASK 0x07FF + +#define RTL8365MB_PORT_LINKUP_IND_REG 0x1107 +#define RTL8365MB_PORT_LINKUP_IND_MASK 0x07FF + +/* PHY indirect access registers */ +#define RTL8365MB_INDIRECT_ACCESS_CTRL_REG 0x1F00 +#define RTL8365MB_INDIRECT_ACCESS_CTRL_RW_MASK 0x0002 +#define RTL8365MB_INDIRECT_ACCESS_CTRL_RW_READ 0 +#define RTL8365MB_INDIRECT_ACCESS_CTRL_RW_WRITE 1 +#define RTL8365MB_INDIRECT_ACCESS_CTRL_CMD_MASK 0x0001 +#define RTL8365MB_INDIRECT_ACCESS_CTRL_CMD_VALUE 1 +#define RTL8365MB_INDIRECT_ACCESS_STATUS_REG 0x1F01 +#define RTL8365MB_INDIRECT_ACCESS_ADDRESS_REG 0x1F02 +#define RTL8365MB_INDIRECT_ACCESS_ADDRESS_OCPADR_5_1_MASK GENMASK(4, 0) +#define RTL8365MB_INDIRECT_ACCESS_ADDRESS_PHYNUM_MASK GENMASK(6, 5) +#define RTL8365MB_INDIRECT_ACCESS_ADDRESS_OCPADR_9_6_MASK GENMASK(11, 8) +#define RTL8365MB_PHY_BASE 0x2000 +#define RTL8365MB_INDIRECT_ACCESS_WRITE_DATA_REG 0x1F03 +#define RTL8365MB_INDIRECT_ACCESS_READ_DATA_REG 0x1F04 + +/* PHY OCP address prefix register */ +#define RTL8365MB_GPHY_OCP_MSB_0_REG 0x1D15 +#define RTL8365MB_GPHY_OCP_MSB_0_CFG_CPU_OCPADR_MASK 0x0FC0 +#define RTL8365MB_PHY_OCP_ADDR_PREFIX_MASK 0xFC00 + +/* The PHY OCP addresses of PHY registers 0~31 start here */ +#define RTL8365MB_PHY_OCP_ADDR_PHYREG_BASE 0xA400 + +/* EXT port interface mode values - used in DIGITAL_INTERFACE_SELECT */ +#define RTL8365MB_EXT_PORT_MODE_DISABLE 0 +#define RTL8365MB_EXT_PORT_MODE_RGMII 1 +#define RTL8365MB_EXT_PORT_MODE_MII_MAC 2 +#define RTL8365MB_EXT_PORT_MODE_MII_PHY 3 +#define RTL8365MB_EXT_PORT_MODE_TMII_MAC 4 +#define RTL8365MB_EXT_PORT_MODE_TMII_PHY 5 +#define RTL8365MB_EXT_PORT_MODE_GMII 6 +#define RTL8365MB_EXT_PORT_MODE_RMII_MAC 7 +#define RTL8365MB_EXT_PORT_MODE_RMII_PHY 8 +#define RTL8365MB_EXT_PORT_MODE_SGMII 9 +#define RTL8365MB_EXT_PORT_MODE_HSGMII 10 +#define RTL8365MB_EXT_PORT_MODE_1000X_100FX 11 +#define RTL8365MB_EXT_PORT_MODE_1000X 12 +#define RTL8365MB_EXT_PORT_MODE_100FX 13 + +/* EXT port interface mode configuration registers 0~1 */ +#define RTL8365MB_DIGITAL_INTERFACE_SELECT_REG0 0x1305 +#define RTL8365MB_DIGITAL_INTERFACE_SELECT_REG1 0x13C3 +#define RTL8365MB_DIGITAL_INTERFACE_SELECT_REG(_extport) \ + (RTL8365MB_DIGITAL_INTERFACE_SELECT_REG0 + \ + ((_extport) >> 1) * (0x13C3 - 0x1305)) +#define RTL8365MB_DIGITAL_INTERFACE_SELECT_MODE_MASK(_extport) \ + (0xF << (((_extport) % 2))) +#define RTL8365MB_DIGITAL_INTERFACE_SELECT_MODE_OFFSET(_extport) \ + (((_extport) % 2) * 4) + +/* EXT port RGMII TX/RX delay configuration registers 1~2 */ +#define RTL8365MB_EXT_RGMXF_REG1 0x1307 +#define RTL8365MB_EXT_RGMXF_REG2 0x13C5 +#define RTL8365MB_EXT_RGMXF_REG(_extport) \ + (RTL8365MB_EXT_RGMXF_REG1 + \ + (((_extport) >> 1) * (0x13C5 - 0x1307))) +#define RTL8365MB_EXT_RGMXF_RXDELAY_MASK 0x0007 +#define RTL8365MB_EXT_RGMXF_TXDELAY_MASK 0x0008 + +/* External port speed values - used in DIGITAL_INTERFACE_FORCE */ +#define RTL8365MB_PORT_SPEED_10M 0 +#define RTL8365MB_PORT_SPEED_100M 1 +#define RTL8365MB_PORT_SPEED_1000M 2 + +/* EXT port force configuration registers 0~2 */ +#define RTL8365MB_DIGITAL_INTERFACE_FORCE_REG0 0x1310 +#define RTL8365MB_DIGITAL_INTERFACE_FORCE_REG1 0x1311 +#define RTL8365MB_DIGITAL_INTERFACE_FORCE_REG2 0x13C4 +#define RTL8365MB_DIGITAL_INTERFACE_FORCE_REG(_extport) \ + (RTL8365MB_DIGITAL_INTERFACE_FORCE_REG0 + \ + ((_extport) & 0x1) + \ + ((((_extport) >> 1) & 0x1) * (0x13C4 - 0x1310))) +#define RTL8365MB_DIGITAL_INTERFACE_FORCE_EN_MASK 0x1000 +#define RTL8365MB_DIGITAL_INTERFACE_FORCE_NWAY_MASK 0x0080 +#define RTL8365MB_DIGITAL_INTERFACE_FORCE_TXPAUSE_MASK 0x0040 +#define RTL8365MB_DIGITAL_INTERFACE_FORCE_RXPAUSE_MASK 0x0020 +#define RTL8365MB_DIGITAL_INTERFACE_FORCE_LINK_MASK 0x0010 +#define RTL8365MB_DIGITAL_INTERFACE_FORCE_DUPLEX_MASK 0x0004 +#define RTL8365MB_DIGITAL_INTERFACE_FORCE_SPEED_MASK 0x0003 + +/* CPU port mask register - controls which ports are treated as CPU ports */ +#define RTL8365MB_CPU_PORT_MASK_REG 0x1219 +#define RTL8365MB_CPU_PORT_MASK_MASK 0x07FF + +/* CPU control register */ +#define RTL8365MB_CPU_CTRL_REG 0x121A +#define RTL8365MB_CPU_CTRL_TRAP_PORT_EXT_MASK 0x0400 +#define RTL8365MB_CPU_CTRL_TAG_FORMAT_MASK 0x0200 +#define RTL8365MB_CPU_CTRL_RXBYTECOUNT_MASK 0x0080 +#define RTL8365MB_CPU_CTRL_TAG_POSITION_MASK 0x0040 +#define RTL8365MB_CPU_CTRL_TRAP_PORT_MASK 0x0038 +#define RTL8365MB_CPU_CTRL_INSERTMODE_MASK 0x0006 +#define RTL8365MB_CPU_CTRL_EN_MASK 0x0001 + +/* Maximum packet length register */ +#define RTL8365MB_CFG0_MAX_LEN_REG 0x088C +#define RTL8365MB_CFG0_MAX_LEN_MASK 0x3FFF + +/* Port learning limit registers */ +#define RTL8365MB_LUT_PORT_LEARN_LIMIT_BASE 0x0A20 +#define RTL8365MB_LUT_PORT_LEARN_LIMIT_REG(_physport) \ + (RTL8365MB_LUT_PORT_LEARN_LIMIT_BASE + (_physport)) + +/* Port isolation (forwarding mask) registers */ +#define RTL8365MB_PORT_ISOLATION_REG_BASE 0x08A2 +#define RTL8365MB_PORT_ISOLATION_REG(_physport) \ + (RTL8365MB_PORT_ISOLATION_REG_BASE + (_physport)) +#define RTL8365MB_PORT_ISOLATION_MASK 0x07FF + +/* MSTP port state registers - indexed by tree instancrSTI (tree ine */ +#define RTL8365MB_MSTI_CTRL_BASE 0x0A00 +#define RTL8365MB_MSTI_CTRL_REG(_msti, _physport) \ + (RTL8365MB_MSTI_CTRL_BASE + ((_msti) << 1) + ((_physport) >> 3)) +#define RTL8365MB_MSTI_CTRL_PORT_STATE_OFFSET(_physport) ((_physport) << 1) +#define RTL8365MB_MSTI_CTRL_PORT_STATE_MASK(_physport) \ + (0x3 << RTL8365MB_MSTI_CTRL_PORT_STATE_OFFSET((_physport))) + +/* MIB counter value registers */ +#define RTL8365MB_MIB_COUNTER_BASE 0x1000 +#define RTL8365MB_MIB_COUNTER_REG(_x) (RTL8365MB_MIB_COUNTER_BASE + (_x)) + +/* MIB counter address register */ +#define RTL8365MB_MIB_ADDRESS_REG 0x1004 +#define RTL8365MB_MIB_ADDRESS_PORT_OFFSET 0x007C +#define RTL8365MB_MIB_ADDRESS(_p, _x) \ + (((RTL8365MB_MIB_ADDRESS_PORT_OFFSET) * (_p) + (_x)) >> 2) + +#define RTL8365MB_MIB_CTRL0_REG 0x1005 +#define RTL8365MB_MIB_CTRL0_RESET_MASK 0x0002 +#define RTL8365MB_MIB_CTRL0_BUSY_MASK 0x0001 + +/* The DSA callback .get_stats64 runs in atomic context, so we are not allowed + * to block. On the other hand, accessing MIB counters absolutely requires us to + * block. The solution is thus to schedule work which polls the MIB counters + * asynchronously and updates some private data, which the callback can then + * fetch atomically. Three seconds should be a good enough polling interval. + */ +#define RTL8365MB_STATS_INTERVAL_JIFFIES (3 * HZ) + +enum rtl8365mb_mib_counter_index { + RTL8365MB_MIB_ifInOctets, + RTL8365MB_MIB_dot3StatsFCSErrors, + RTL8365MB_MIB_dot3StatsSymbolErrors, + RTL8365MB_MIB_dot3InPauseFrames, + RTL8365MB_MIB_dot3ControlInUnknownOpcodes, + RTL8365MB_MIB_etherStatsFragments, + RTL8365MB_MIB_etherStatsJabbers, + RTL8365MB_MIB_ifInUcastPkts, + RTL8365MB_MIB_etherStatsDropEvents, + RTL8365MB_MIB_ifInMulticastPkts, + RTL8365MB_MIB_ifInBroadcastPkts, + RTL8365MB_MIB_inMldChecksumError, + RTL8365MB_MIB_inIgmpChecksumError, + RTL8365MB_MIB_inMldSpecificQuery, + RTL8365MB_MIB_inMldGeneralQuery, + RTL8365MB_MIB_inIgmpSpecificQuery, + RTL8365MB_MIB_inIgmpGeneralQuery, + RTL8365MB_MIB_inMldLeaves, + RTL8365MB_MIB_inIgmpLeaves, + RTL8365MB_MIB_etherStatsOctets, + RTL8365MB_MIB_etherStatsUnderSizePkts, + RTL8365MB_MIB_etherOversizeStats, + RTL8365MB_MIB_etherStatsPkts64Octets, + RTL8365MB_MIB_etherStatsPkts65to127Octets, + RTL8365MB_MIB_etherStatsPkts128to255Octets, + RTL8365MB_MIB_etherStatsPkts256to511Octets, + RTL8365MB_MIB_etherStatsPkts512to1023Octets, + RTL8365MB_MIB_etherStatsPkts1024to1518Octets, + RTL8365MB_MIB_ifOutOctets, + RTL8365MB_MIB_dot3StatsSingleCollisionFrames, + RTL8365MB_MIB_dot3StatsMultipleCollisionFrames, + RTL8365MB_MIB_dot3StatsDeferredTransmissions, + RTL8365MB_MIB_dot3StatsLateCollisions, + RTL8365MB_MIB_etherStatsCollisions, + RTL8365MB_MIB_dot3StatsExcessiveCollisions, + RTL8365MB_MIB_dot3OutPauseFrames, + RTL8365MB_MIB_ifOutDiscards, + RTL8365MB_MIB_dot1dTpPortInDiscards, + RTL8365MB_MIB_ifOutUcastPkts, + RTL8365MB_MIB_ifOutMulticastPkts, + RTL8365MB_MIB_ifOutBroadcastPkts, + RTL8365MB_MIB_outOampduPkts, + RTL8365MB_MIB_inOampduPkts, + RTL8365MB_MIB_inIgmpJoinsSuccess, + RTL8365MB_MIB_inIgmpJoinsFail, + RTL8365MB_MIB_inMldJoinsSuccess, + RTL8365MB_MIB_inMldJoinsFail, + RTL8365MB_MIB_inReportSuppressionDrop, + RTL8365MB_MIB_inLeaveSuppressionDrop, + RTL8365MB_MIB_outIgmpReports, + RTL8365MB_MIB_outIgmpLeaves, + RTL8365MB_MIB_outIgmpGeneralQuery, + RTL8365MB_MIB_outIgmpSpecificQuery, + RTL8365MB_MIB_outMldReports, + RTL8365MB_MIB_outMldLeaves, + RTL8365MB_MIB_outMldGeneralQuery, + RTL8365MB_MIB_outMldSpecificQuery, + RTL8365MB_MIB_inKnownMulticastPkts, + RTL8365MB_MIB_END, +}; + +struct rtl8365mb_mib_counter { + u32 offset; + u32 length; + const char *name; +}; + +#define RTL8365MB_MAKE_MIB_COUNTER(_offset, _length, _name) \ + [RTL8365MB_MIB_ ## _name] = { _offset, _length, #_name } + +static struct rtl8365mb_mib_counter rtl8365mb_mib_counters[] = { + RTL8365MB_MAKE_MIB_COUNTER(0, 4, ifInOctets), + RTL8365MB_MAKE_MIB_COUNTER(4, 2, dot3StatsFCSErrors), + RTL8365MB_MAKE_MIB_COUNTER(6, 2, dot3StatsSymbolErrors), + RTL8365MB_MAKE_MIB_COUNTER(8, 2, dot3InPauseFrames), + RTL8365MB_MAKE_MIB_COUNTER(10, 2, dot3ControlInUnknownOpcodes), + RTL8365MB_MAKE_MIB_COUNTER(12, 2, etherStatsFragments), + RTL8365MB_MAKE_MIB_COUNTER(14, 2, etherStatsJabbers), + RTL8365MB_MAKE_MIB_COUNTER(16, 2, ifInUcastPkts), + RTL8365MB_MAKE_MIB_COUNTER(18, 2, etherStatsDropEvents), + RTL8365MB_MAKE_MIB_COUNTER(20, 2, ifInMulticastPkts), + RTL8365MB_MAKE_MIB_COUNTER(22, 2, ifInBroadcastPkts), + RTL8365MB_MAKE_MIB_COUNTER(24, 2, inMldChecksumError), + RTL8365MB_MAKE_MIB_COUNTER(26, 2, inIgmpChecksumError), + RTL8365MB_MAKE_MIB_COUNTER(28, 2, inMldSpecificQuery), + RTL8365MB_MAKE_MIB_COUNTER(30, 2, inMldGeneralQuery), + RTL8365MB_MAKE_MIB_COUNTER(32, 2, inIgmpSpecificQuery), + RTL8365MB_MAKE_MIB_COUNTER(34, 2, inIgmpGeneralQuery), + RTL8365MB_MAKE_MIB_COUNTER(36, 2, inMldLeaves), + RTL8365MB_MAKE_MIB_COUNTER(38, 2, inIgmpLeaves), + RTL8365MB_MAKE_MIB_COUNTER(40, 4, etherStatsOctets), + RTL8365MB_MAKE_MIB_COUNTER(44, 2, etherStatsUnderSizePkts), + RTL8365MB_MAKE_MIB_COUNTER(46, 2, etherOversizeStats), + RTL8365MB_MAKE_MIB_COUNTER(48, 2, etherStatsPkts64Octets), + RTL8365MB_MAKE_MIB_COUNTER(50, 2, etherStatsPkts65to127Octets), + RTL8365MB_MAKE_MIB_COUNTER(52, 2, etherStatsPkts128to255Octets), + RTL8365MB_MAKE_MIB_COUNTER(54, 2, etherStatsPkts256to511Octets), + RTL8365MB_MAKE_MIB_COUNTER(56, 2, etherStatsPkts512to1023Octets), + RTL8365MB_MAKE_MIB_COUNTER(58, 2, etherStatsPkts1024to1518Octets), + RTL8365MB_MAKE_MIB_COUNTER(60, 4, ifOutOctets), + RTL8365MB_MAKE_MIB_COUNTER(64, 2, dot3StatsSingleCollisionFrames), + RTL8365MB_MAKE_MIB_COUNTER(66, 2, dot3StatsMultipleCollisionFrames), + RTL8365MB_MAKE_MIB_COUNTER(68, 2, dot3StatsDeferredTransmissions), + RTL8365MB_MAKE_MIB_COUNTER(70, 2, dot3StatsLateCollisions), + RTL8365MB_MAKE_MIB_COUNTER(72, 2, etherStatsCollisions), + RTL8365MB_MAKE_MIB_COUNTER(74, 2, dot3StatsExcessiveCollisions), + RTL8365MB_MAKE_MIB_COUNTER(76, 2, dot3OutPauseFrames), + RTL8365MB_MAKE_MIB_COUNTER(78, 2, ifOutDiscards), + RTL8365MB_MAKE_MIB_COUNTER(80, 2, dot1dTpPortInDiscards), + RTL8365MB_MAKE_MIB_COUNTER(82, 2, ifOutUcastPkts), + RTL8365MB_MAKE_MIB_COUNTER(84, 2, ifOutMulticastPkts), + RTL8365MB_MAKE_MIB_COUNTER(86, 2, ifOutBroadcastPkts), + RTL8365MB_MAKE_MIB_COUNTER(88, 2, outOampduPkts), + RTL8365MB_MAKE_MIB_COUNTER(90, 2, inOampduPkts), + RTL8365MB_MAKE_MIB_COUNTER(92, 4, inIgmpJoinsSuccess), + RTL8365MB_MAKE_MIB_COUNTER(96, 2, inIgmpJoinsFail), + RTL8365MB_MAKE_MIB_COUNTER(98, 2, inMldJoinsSuccess), + RTL8365MB_MAKE_MIB_COUNTER(100, 2, inMldJoinsFail), + RTL8365MB_MAKE_MIB_COUNTER(102, 2, inReportSuppressionDrop), + RTL8365MB_MAKE_MIB_COUNTER(104, 2, inLeaveSuppressionDrop), + RTL8365MB_MAKE_MIB_COUNTER(106, 2, outIgmpReports), + RTL8365MB_MAKE_MIB_COUNTER(108, 2, outIgmpLeaves), + RTL8365MB_MAKE_MIB_COUNTER(110, 2, outIgmpGeneralQuery), + RTL8365MB_MAKE_MIB_COUNTER(112, 2, outIgmpSpecificQuery), + RTL8365MB_MAKE_MIB_COUNTER(114, 2, outMldReports), + RTL8365MB_MAKE_MIB_COUNTER(116, 2, outMldLeaves), + RTL8365MB_MAKE_MIB_COUNTER(118, 2, outMldGeneralQuery), + RTL8365MB_MAKE_MIB_COUNTER(120, 2, outMldSpecificQuery), + RTL8365MB_MAKE_MIB_COUNTER(122, 2, inKnownMulticastPkts), +}; + +static_assert(ARRAY_SIZE(rtl8365mb_mib_counters) == RTL8365MB_MIB_END); + +struct rtl8365mb_jam_tbl_entry { + u16 reg; + u16 val; +}; + +/* Lifted from the vendor driver sources */ +static const struct rtl8365mb_jam_tbl_entry rtl8365mb_init_jam_8365mb_vc[] = { + { 0x13EB, 0x15BB }, { 0x1303, 0x06D6 }, { 0x1304, 0x0700 }, + { 0x13E2, 0x003F }, { 0x13F9, 0x0090 }, { 0x121E, 0x03CA }, + { 0x1233, 0x0352 }, { 0x1237, 0x00A0 }, { 0x123A, 0x0030 }, + { 0x1239, 0x0084 }, { 0x0301, 0x1000 }, { 0x1349, 0x001F }, + { 0x18E0, 0x4004 }, { 0x122B, 0x241C }, { 0x1305, 0xC000 }, + { 0x13F0, 0x0000 }, +}; + +static const struct rtl8365mb_jam_tbl_entry rtl8365mb_init_jam_common[] = { + { 0x1200, 0x7FCB }, { 0x0884, 0x0003 }, { 0x06EB, 0x0001 }, + { 0x03Fa, 0x0007 }, { 0x08C8, 0x00C0 }, { 0x0A30, 0x020E }, + { 0x0800, 0x0000 }, { 0x0802, 0x0000 }, { 0x09DA, 0x0013 }, + { 0x1D32, 0x0002 }, +}; + +enum rtl8365mb_stp_state { + RTL8365MB_STP_STATE_DISABLED = 0, + RTL8365MB_STP_STATE_BLOCKING = 1, + RTL8365MB_STP_STATE_LEARNING = 2, + RTL8365MB_STP_STATE_FORWARDING = 3, +}; + +enum rtl8365mb_cpu_insert { + RTL8365MB_CPU_INSERT_TO_ALL = 0, + RTL8365MB_CPU_INSERT_TO_TRAPPING = 1, + RTL8365MB_CPU_INSERT_TO_NONE = 2, +}; + +enum rtl8365mb_cpu_position { + RTL8365MB_CPU_POS_AFTER_SA = 0, + RTL8365MB_CPU_POS_BEFORE_CRC = 1, +}; + +enum rtl8365mb_cpu_format { + RTL8365MB_CPU_FORMAT_8BYTES = 0, + RTL8365MB_CPU_FORMAT_4BYTES = 1, +}; + +enum rtl8365mb_cpu_rxlen { + RTL8365MB_CPU_RXLEN_72BYTES = 0, + RTL8365MB_CPU_RXLEN_64BYTES = 1, +}; + +/** + * struct rtl8365mb_cpu - CPU port configuration + * @enable: enable/disable hardware insertion of CPU tag in switch->CPU frames + * @mask: port mask of ports that parse should parse CPU tags + * @trap_port: forward trapped frames to this port + * @insert: CPU tag insertion mode in switch->CPU frames + * @position: position of CPU tag in frame + * @rx_length: minimum CPU RX length + * @format: CPU tag format + * + * Represents the CPU tagging and CPU port configuration of the switch. These + * settings are configurable at runtime. + */ +struct rtl8365mb_cpu { + bool enable; + u32 mask; + u32 trap_port; + enum rtl8365mb_cpu_insert insert; + enum rtl8365mb_cpu_position position; + enum rtl8365mb_cpu_rxlen rx_length; + enum rtl8365mb_cpu_format format; +}; + +/** + * struct rtl8365mb_port - private per-port data + * @smi: pointer to parent realtek_smi data + * @index: DSA port index, same as dsa_port::index + * @stats: link statistics populated by rtl8365mb_stats_poll, ready for atomic + * access via rtl8365mb_get_stats64 + * @stats_lock: protect the stats structure during read/update + * @mib_work: delayed work for polling MIB counters + */ +struct rtl8365mb_port { + struct realtek_smi *smi; + unsigned int index; + struct rtnl_link_stats64 stats; + spinlock_t stats_lock; + struct delayed_work mib_work; +}; + +/** + * struct rtl8365mb - private chip-specific driver data + * @smi: pointer to parent realtek_smi data + * @irq: registered IRQ or zero + * @chip_id: chip identifier + * @chip_ver: chip silicon revision + * @port_mask: mask of all ports + * @learn_limit_max: maximum number of L2 addresses the chip can learn + * @cpu: CPU tagging and CPU port configuration for this chip + * @mib_lock: prevent concurrent reads of MIB counters + * @ports: per-port data + * @jam_table: chip-specific initialization jam table + * @jam_size: size of the chip's jam table + * + * Private data for this driver. + */ +struct rtl8365mb { + struct realtek_smi *smi; + int irq; + u32 chip_id; + u32 chip_ver; + u32 port_mask; + u32 learn_limit_max; + struct rtl8365mb_cpu cpu; + struct mutex mib_lock; + struct rtl8365mb_port ports[RTL8365MB_MAX_NUM_PORTS]; + const struct rtl8365mb_jam_tbl_entry *jam_table; + size_t jam_size; +}; + +static int rtl8365mb_phy_poll_busy(struct realtek_smi *smi) +{ + u32 val; + + return regmap_read_poll_timeout(smi->map, + RTL8365MB_INDIRECT_ACCESS_STATUS_REG, + val, !val, 10, 100); +} + +static int rtl8365mb_phy_ocp_prepare(struct realtek_smi *smi, int phy, + u32 ocp_addr) +{ + u32 val; + int ret; + + /* Set OCP prefix */ + val = FIELD_GET(RTL8365MB_PHY_OCP_ADDR_PREFIX_MASK, ocp_addr); + ret = regmap_update_bits( + smi->map, RTL8365MB_GPHY_OCP_MSB_0_REG, + RTL8365MB_GPHY_OCP_MSB_0_CFG_CPU_OCPADR_MASK, + FIELD_PREP(RTL8365MB_GPHY_OCP_MSB_0_CFG_CPU_OCPADR_MASK, val)); + if (ret) + return ret; + + /* Set PHY register address */ + val = RTL8365MB_PHY_BASE; + val |= FIELD_PREP(RTL8365MB_INDIRECT_ACCESS_ADDRESS_PHYNUM_MASK, phy); + val |= FIELD_PREP(RTL8365MB_INDIRECT_ACCESS_ADDRESS_OCPADR_5_1_MASK, + ocp_addr >> 1); + val |= FIELD_PREP(RTL8365MB_INDIRECT_ACCESS_ADDRESS_OCPADR_9_6_MASK, + ocp_addr >> 6); + ret = regmap_write(smi->map, RTL8365MB_INDIRECT_ACCESS_ADDRESS_REG, + val); + if (ret) + return ret; + + return 0; +} + +static int rtl8365mb_phy_ocp_read(struct realtek_smi *smi, int phy, + u32 ocp_addr, u16 *data) +{ + u32 val; + int ret; + + ret = rtl8365mb_phy_poll_busy(smi); + if (ret) + return ret; + + ret = rtl8365mb_phy_ocp_prepare(smi, phy, ocp_addr); + if (ret) + return ret; + + /* Execute read operation */ + val = FIELD_PREP(RTL8365MB_INDIRECT_ACCESS_CTRL_CMD_MASK, + RTL8365MB_INDIRECT_ACCESS_CTRL_CMD_VALUE) | + FIELD_PREP(RTL8365MB_INDIRECT_ACCESS_CTRL_RW_MASK, + RTL8365MB_INDIRECT_ACCESS_CTRL_RW_READ); + ret = regmap_write(smi->map, RTL8365MB_INDIRECT_ACCESS_CTRL_REG, val); + if (ret) + return ret; + + ret = rtl8365mb_phy_poll_busy(smi); + if (ret) + return ret; + + /* Get PHY register data */ + ret = regmap_read(smi->map, RTL8365MB_INDIRECT_ACCESS_READ_DATA_REG, + &val); + if (ret) + return ret; + + *data = val & 0xFFFF; + + return 0; +} + +static int rtl8365mb_phy_ocp_write(struct realtek_smi *smi, int phy, + u32 ocp_addr, u16 data) +{ + u32 val; + int ret; + + ret = rtl8365mb_phy_poll_busy(smi); + if (ret) + return ret; + + ret = rtl8365mb_phy_ocp_prepare(smi, phy, ocp_addr); + if (ret) + return ret; + + /* Set PHY register data */ + ret = regmap_write(smi->map, RTL8365MB_INDIRECT_ACCESS_WRITE_DATA_REG, + data); + if (ret) + return ret; + + /* Execute write operation */ + val = FIELD_PREP(RTL8365MB_INDIRECT_ACCESS_CTRL_CMD_MASK, + RTL8365MB_INDIRECT_ACCESS_CTRL_CMD_VALUE) | + FIELD_PREP(RTL8365MB_INDIRECT_ACCESS_CTRL_RW_MASK, + RTL8365MB_INDIRECT_ACCESS_CTRL_RW_WRITE); + ret = regmap_write(smi->map, RTL8365MB_INDIRECT_ACCESS_CTRL_REG, val); + if (ret) + return ret; + + ret = rtl8365mb_phy_poll_busy(smi); + if (ret) + return ret; + + return 0; +} + +static int rtl8365mb_phy_read(struct realtek_smi *smi, int phy, int regnum) +{ + u32 ocp_addr; + u16 val; + int ret; + + if (regnum > RTL8365MB_PHYREGMAX) + return -EINVAL; + + ocp_addr = RTL8365MB_PHY_OCP_ADDR_PHYREG_BASE + regnum * 2; + + ret = rtl8365mb_phy_ocp_read(smi, phy, ocp_addr, &val); + if (ret) { + dev_err(smi->dev, + "failed to read PHY%d reg %02x @ %04x, ret %d\n", phy, + regnum, ocp_addr, ret); + return ret; + } + + dev_dbg(smi->dev, "read PHY%d register 0x%02x @ %04x, val <- %04x\n", + phy, regnum, ocp_addr, val); + + return val; +} + +static int rtl8365mb_phy_write(struct realtek_smi *smi, int phy, int regnum, + u16 val) +{ + u32 ocp_addr; + int ret; + + if (regnum > RTL8365MB_PHYREGMAX) + return -EINVAL; + + ocp_addr = RTL8365MB_PHY_OCP_ADDR_PHYREG_BASE + regnum * 2; + + ret = rtl8365mb_phy_ocp_write(smi, phy, ocp_addr, val); + if (ret) { + dev_err(smi->dev, + "failed to write PHY%d reg %02x @ %04x, ret %d\n", phy, + regnum, ocp_addr, ret); + return ret; + } + + dev_dbg(smi->dev, "write PHY%d register 0x%02x @ %04x, val -> %04x\n", + phy, regnum, ocp_addr, val); + + return 0; +} + +static enum dsa_tag_protocol +rtl8365mb_get_tag_protocol(struct dsa_switch *ds, int port, + enum dsa_tag_protocol mp) +{ + return DSA_TAG_PROTO_RTL8_4; +} + +static int rtl8365mb_ext_config_rgmii(struct realtek_smi *smi, int port, + phy_interface_t interface) +{ + struct device_node *dn; + struct dsa_port *dp; + int tx_delay = 0; + int rx_delay = 0; + int ext_port; + u32 val; + int ret; + + if (port == smi->cpu_port) { + ext_port = 1; + } else { + dev_err(smi->dev, "only one EXT port is currently supported\n"); + return -EINVAL; + } + + dp = dsa_to_port(smi->ds, port); + dn = dp->dn; + + /* Set the RGMII TX/RX delay + * + * The Realtek vendor driver indicates the following possible + * configuration settings: + * + * TX delay: + * 0 = no delay, 1 = 2 ns delay + * RX delay: + * 0 = no delay, 7 = maximum delay + * No units are specified, but there are a total of 8 steps. + * + * The vendor driver also states that this must be configured *before* + * forcing the external interface into a particular mode, which is done + * in the rtl8365mb_phylink_mac_link_{up,down} functions. + * + * Only configure an RGMII TX (resp. RX) delay if the + * tx-internal-delay-ps (resp. rx-internal-delay-ps) OF property is + * specified. We ignore the detail of the RGMII interface mode + * (RGMII_{RXID, TXID, etc.}), as this is considered to be a PHY-only + * property. + * + * For the RX delay, we assume that a register value of 4 corresponds to + * 2 ns. But this is just an educated guess, so ignore all other values + * to avoid too much confusion. + */ + if (!of_property_read_u32(dn, "tx-internal-delay-ps", &val)) { + val = val / 1000; /* convert to ns */ + + if (val == 0 || val == 2) + tx_delay = val / 2; + else + dev_warn(smi->dev, + "EXT port TX delay must be 0 or 2 ns\n"); + } + + if (!of_property_read_u32(dn, "rx-internal-delay-ps", &val)) { + val = val / 1000; /* convert to ns */ + + if (val == 0 || val == 2) + rx_delay = val * 2; + else + dev_warn(smi->dev, + "EXT port RX delay must be 0 to 2 ns\n"); + } + + ret = regmap_update_bits( + smi->map, RTL8365MB_EXT_RGMXF_REG(ext_port), + RTL8365MB_EXT_RGMXF_TXDELAY_MASK | + RTL8365MB_EXT_RGMXF_RXDELAY_MASK, + FIELD_PREP(RTL8365MB_EXT_RGMXF_TXDELAY_MASK, tx_delay) | + FIELD_PREP(RTL8365MB_EXT_RGMXF_RXDELAY_MASK, rx_delay)); + if (ret) + return ret; + + ret = regmap_update_bits( + smi->map, RTL8365MB_DIGITAL_INTERFACE_SELECT_REG(ext_port), + RTL8365MB_DIGITAL_INTERFACE_SELECT_MODE_MASK(ext_port), + RTL8365MB_EXT_PORT_MODE_RGMII + << RTL8365MB_DIGITAL_INTERFACE_SELECT_MODE_OFFSET( + ext_port)); + if (ret) + return ret; + + return 0; +} + +static int rtl8365mb_ext_config_forcemode(struct realtek_smi *smi, int port, + bool link, int speed, int duplex, + bool tx_pause, bool rx_pause) +{ + u32 r_tx_pause; + u32 r_rx_pause; + u32 r_duplex; + u32 r_speed; + u32 r_link; + int ext_port; + int val; + int ret; + + if (port == smi->cpu_port) { + ext_port = 1; + } else { + dev_err(smi->dev, "only one EXT port is currently supported\n"); + return -EINVAL; + } + + if (link) { + /* Force the link up with the desired configuration */ + r_link = 1; + r_rx_pause = rx_pause ? 1 : 0; + r_tx_pause = tx_pause ? 1 : 0; + + if (speed == SPEED_1000) { + r_speed = RTL8365MB_PORT_SPEED_1000M; + } else if (speed == SPEED_100) { + r_speed = RTL8365MB_PORT_SPEED_100M; + } else if (speed == SPEED_10) { + r_speed = RTL8365MB_PORT_SPEED_10M; + } else { + dev_err(smi->dev, "unsupported port speed %s\n", + phy_speed_to_str(speed)); + return -EINVAL; + } + + if (duplex == DUPLEX_FULL) { + r_duplex = 1; + } else if (duplex == DUPLEX_HALF) { + r_duplex = 0; + } else { + dev_err(smi->dev, "unsupported duplex %s\n", + phy_duplex_to_str(duplex)); + return -EINVAL; + } + } else { + /* Force the link down and reset any programmed configuration */ + r_link = 0; + r_tx_pause = 0; + r_rx_pause = 0; + r_speed = 0; + r_duplex = 0; + } + + val = FIELD_PREP(RTL8365MB_DIGITAL_INTERFACE_FORCE_EN_MASK, 1) | + FIELD_PREP(RTL8365MB_DIGITAL_INTERFACE_FORCE_TXPAUSE_MASK, + r_tx_pause) | + FIELD_PREP(RTL8365MB_DIGITAL_INTERFACE_FORCE_RXPAUSE_MASK, + r_rx_pause) | + FIELD_PREP(RTL8365MB_DIGITAL_INTERFACE_FORCE_LINK_MASK, r_link) | + FIELD_PREP(RTL8365MB_DIGITAL_INTERFACE_FORCE_DUPLEX_MASK, + r_duplex) | + FIELD_PREP(RTL8365MB_DIGITAL_INTERFACE_FORCE_SPEED_MASK, r_speed); + ret = regmap_write(smi->map, + RTL8365MB_DIGITAL_INTERFACE_FORCE_REG(ext_port), + val); + if (ret) + return ret; + + return 0; +} + +static bool rtl8365mb_phy_mode_supported(struct dsa_switch *ds, int port, + phy_interface_t interface) +{ + if (dsa_is_user_port(ds, port) && + (interface == PHY_INTERFACE_MODE_NA || + interface == PHY_INTERFACE_MODE_INTERNAL)) + /* Internal PHY */ + return true; + else if (dsa_is_cpu_port(ds, port) && + phy_interface_mode_is_rgmii(interface)) + /* Extension MAC */ + return true; + + return false; +} + +static void rtl8365mb_phylink_validate(struct dsa_switch *ds, int port, + unsigned long *supported, + struct phylink_link_state *state) +{ + struct realtek_smi *smi = ds->priv; + __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0 }; + + /* include/linux/phylink.h says: + * When @state->interface is %PHY_INTERFACE_MODE_NA, phylink + * expects the MAC driver to return all supported link modes. + */ + if (state->interface != PHY_INTERFACE_MODE_NA && + !rtl8365mb_phy_mode_supported(ds, port, state->interface)) { + dev_err(smi->dev, "phy mode %s is unsupported on port %d\n", + phy_modes(state->interface), port); + linkmode_zero(supported); + return; + } + + phylink_set_port_modes(mask); + + phylink_set(mask, Autoneg); + phylink_set(mask, Pause); + phylink_set(mask, Asym_Pause); + + phylink_set(mask, 10baseT_Half); + phylink_set(mask, 10baseT_Full); + phylink_set(mask, 100baseT_Half); + phylink_set(mask, 100baseT_Full); + phylink_set(mask, 1000baseT_Full); + + linkmode_and(supported, supported, mask); + linkmode_and(state->advertising, state->advertising, mask); +} + +static void rtl8365mb_phylink_mac_config(struct dsa_switch *ds, int port, + unsigned int mode, + const struct phylink_link_state *state) +{ + struct realtek_smi *smi = ds->priv; + int ret; + + if (!rtl8365mb_phy_mode_supported(ds, port, state->interface)) { + dev_err(smi->dev, "phy mode %s is unsupported on port %d\n", + phy_modes(state->interface), port); + return; + } + + if (mode != MLO_AN_PHY && mode != MLO_AN_FIXED) { + dev_err(smi->dev, + "port %d supports only conventional PHY or fixed-link\n", + port); + return; + } + + if (phy_interface_mode_is_rgmii(state->interface)) { + ret = rtl8365mb_ext_config_rgmii(smi, port, state->interface); + if (ret) + dev_err(smi->dev, + "failed to configure RGMII mode on port %d: %d\n", + port, ret); + return; + } + + /* TODO: Implement MII and RMII modes, which the RTL8365MB-VC also + * supports + */ +} + +static void rtl8365mb_phylink_mac_link_down(struct dsa_switch *ds, int port, + unsigned int mode, + phy_interface_t interface) +{ + struct realtek_smi *smi = ds->priv; + struct rtl8365mb_port *p; + struct rtl8365mb *mb; + int ret; + + mb = smi->chip_data; + p = &mb->ports[port]; + cancel_delayed_work_sync(&p->mib_work); + + if (phy_interface_mode_is_rgmii(interface)) { + ret = rtl8365mb_ext_config_forcemode(smi, port, false, 0, 0, + false, false); + if (ret) + dev_err(smi->dev, + "failed to reset forced mode on port %d: %d\n", + port, ret); + + return; + } +} + +static void rtl8365mb_phylink_mac_link_up(struct dsa_switch *ds, int port, + unsigned int mode, + phy_interface_t interface, + struct phy_device *phydev, int speed, + int duplex, bool tx_pause, + bool rx_pause) +{ + struct realtek_smi *smi = ds->priv; + struct rtl8365mb_port *p; + struct rtl8365mb *mb; + int ret; + + mb = smi->chip_data; + p = &mb->ports[port]; + schedule_delayed_work(&p->mib_work, 0); + + if (phy_interface_mode_is_rgmii(interface)) { + ret = rtl8365mb_ext_config_forcemode(smi, port, true, speed, + duplex, tx_pause, + rx_pause); + if (ret) + dev_err(smi->dev, + "failed to force mode on port %d: %d\n", port, + ret); + + return; + } +} + +static void rtl8365mb_port_stp_state_set(struct dsa_switch *ds, int port, + u8 state) +{ + struct realtek_smi *smi = ds->priv; + enum rtl8365mb_stp_state val; + int msti = 0; + + switch (state) { + case BR_STATE_DISABLED: + val = RTL8365MB_STP_STATE_DISABLED; + break; + case BR_STATE_BLOCKING: + case BR_STATE_LISTENING: + val = RTL8365MB_STP_STATE_BLOCKING; + break; + case BR_STATE_LEARNING: + val = RTL8365MB_STP_STATE_LEARNING; + break; + case BR_STATE_FORWARDING: + val = RTL8365MB_STP_STATE_FORWARDING; + break; + default: + dev_err(smi->dev, "invalid STP state: %u\n", state); + return; + } + + regmap_update_bits(smi->map, RTL8365MB_MSTI_CTRL_REG(msti, port), + RTL8365MB_MSTI_CTRL_PORT_STATE_MASK(port), + val << RTL8365MB_MSTI_CTRL_PORT_STATE_OFFSET(port)); +} + +static int rtl8365mb_port_set_learning(struct realtek_smi *smi, int port, + bool enable) +{ + struct rtl8365mb *mb = smi->chip_data; + + /* Enable/disable learning by limiting the number of L2 addresses the + * port can learn. Realtek documentation states that a limit of zero + * disables learning. When enabling learning, set it to the chip's + * maximum. + */ + return regmap_write(smi->map, RTL8365MB_LUT_PORT_LEARN_LIMIT_REG(port), + enable ? mb->learn_limit_max : 0); +} + +static int rtl8365mb_port_set_isolation(struct realtek_smi *smi, int port, + u32 mask) +{ + return regmap_write(smi->map, RTL8365MB_PORT_ISOLATION_REG(port), mask); +} + +static int rtl8365mb_mib_counter_read(struct realtek_smi *smi, int port, + u32 offset, u32 length, u64 *mibvalue) +{ + u64 tmpvalue = 0; + u32 val; + int ret; + int i; + + /* The MIB address is an SRAM address. We request a particular address + * and then poll the control register before reading the value from some + * counter registers. + */ + ret = regmap_write(smi->map, RTL8365MB_MIB_ADDRESS_REG, + RTL8365MB_MIB_ADDRESS(port, offset)); + if (ret) + return ret; + + /* Poll for completion */ + ret = regmap_read_poll_timeout(smi->map, RTL8365MB_MIB_CTRL0_REG, val, + !(val & RTL8365MB_MIB_CTRL0_BUSY_MASK), + 10, 100); + if (ret) + return ret; + + /* Presumably this indicates a MIB counter read failure */ + if (val & RTL8365MB_MIB_CTRL0_RESET_MASK) + return -EIO; + + /* There are four MIB counter registers each holding a 16 bit word of a + * MIB counter. Depending on the offset, we should read from the upper + * two or lower two registers. In case the MIB counter is 4 words, we + * read from all four registers. + */ + if (length == 4) + offset = 3; + else + offset = (offset + 1) % 4; + + /* Read the MIB counter 16 bits at a time */ + for (i = 0; i < length; i++) { + ret = regmap_read(smi->map, + RTL8365MB_MIB_COUNTER_REG(offset - i), &val); + if (ret) + return ret; + + tmpvalue = ((tmpvalue) << 16) | (val & 0xFFFF); + } + + /* Only commit the result if no error occurred */ + *mibvalue = tmpvalue; + + return 0; +} + +static void rtl8365mb_get_ethtool_stats(struct dsa_switch *ds, int port, u64 *data) +{ + struct realtek_smi *smi = ds->priv; + struct rtl8365mb *mb; + int ret; + int i; + + mb = smi->chip_data; + + mutex_lock(&mb->mib_lock); + for (i = 0; i < RTL8365MB_MIB_END; i++) { + struct rtl8365mb_mib_counter *mib = &rtl8365mb_mib_counters[i]; + + ret = rtl8365mb_mib_counter_read(smi, port, mib->offset, + mib->length, &data[i]); + if (ret) { + dev_err(smi->dev, + "failed to read port %d counters: %d\n", port, + ret); + break; + } + } + mutex_unlock(&mb->mib_lock); +} + +static void rtl8365mb_get_strings(struct dsa_switch *ds, int port, u32 stringset, u8 *data) +{ + int i; + + if (stringset != ETH_SS_STATS) + return; + + for (i = 0; i < RTL8365MB_MIB_END; i++) { + struct rtl8365mb_mib_counter *mib = &rtl8365mb_mib_counters[i]; + + strncpy(data + i * ETH_GSTRING_LEN, mib->name, ETH_GSTRING_LEN); + } +} + +static int rtl8365mb_get_sset_count(struct dsa_switch *ds, int port, int sset) +{ + if (sset != ETH_SS_STATS) + return -EOPNOTSUPP; + + return RTL8365MB_MIB_END; +} + +static void rtl8365mb_get_phy_stats(struct dsa_switch *ds, int port, + struct ethtool_eth_phy_stats *phy_stats) +{ + struct realtek_smi *smi = ds->priv; + struct rtl8365mb_mib_counter *mib; + struct rtl8365mb *mb; + + mb = smi->chip_data; + mib = &rtl8365mb_mib_counters[RTL8365MB_MIB_dot3StatsSymbolErrors]; + + mutex_lock(&mb->mib_lock); + rtl8365mb_mib_counter_read(smi, port, mib->offset, mib->length, + &phy_stats->SymbolErrorDuringCarrier); + mutex_unlock(&mb->mib_lock); +} + +static void rtl8365mb_get_mac_stats(struct dsa_switch *ds, int port, + struct ethtool_eth_mac_stats *mac_stats) +{ + u64 cnt[RTL8365MB_MIB_END] = { + [RTL8365MB_MIB_ifOutOctets] = 1, + [RTL8365MB_MIB_ifOutUcastPkts] = 1, + [RTL8365MB_MIB_ifOutMulticastPkts] = 1, + [RTL8365MB_MIB_ifOutBroadcastPkts] = 1, + [RTL8365MB_MIB_dot3OutPauseFrames] = 1, + [RTL8365MB_MIB_ifOutDiscards] = 1, + [RTL8365MB_MIB_ifInOctets] = 1, + [RTL8365MB_MIB_ifInUcastPkts] = 1, + [RTL8365MB_MIB_ifInMulticastPkts] = 1, + [RTL8365MB_MIB_ifInBroadcastPkts] = 1, + [RTL8365MB_MIB_dot3InPauseFrames] = 1, + [RTL8365MB_MIB_dot3StatsSingleCollisionFrames] = 1, + [RTL8365MB_MIB_dot3StatsMultipleCollisionFrames] = 1, + [RTL8365MB_MIB_dot3StatsFCSErrors] = 1, + [RTL8365MB_MIB_dot3StatsDeferredTransmissions] = 1, + [RTL8365MB_MIB_dot3StatsLateCollisions] = 1, + [RTL8365MB_MIB_dot3StatsExcessiveCollisions] = 1, + + }; + struct realtek_smi *smi = ds->priv; + struct rtl8365mb *mb; + int ret; + int i; + + mb = smi->chip_data; + + mutex_lock(&mb->mib_lock); + for (i = 0; i < RTL8365MB_MIB_END; i++) { + struct rtl8365mb_mib_counter *mib = &rtl8365mb_mib_counters[i]; + + /* Only fetch required MIB counters (marked = 1 above) */ + if (!cnt[i]) + continue; + + ret = rtl8365mb_mib_counter_read(smi, port, mib->offset, + mib->length, &cnt[i]); + if (ret) + break; + } + mutex_unlock(&mb->mib_lock); + + /* The RTL8365MB-VC exposes MIB objects, which we have to translate into + * IEEE 802.3 Managed Objects. This is not always completely faithful, + * but we try out best. See RFC 3635 for a detailed treatment of the + * subject. + */ + + mac_stats->FramesTransmittedOK = cnt[RTL8365MB_MIB_ifOutUcastPkts] + + cnt[RTL8365MB_MIB_ifOutMulticastPkts] + + cnt[RTL8365MB_MIB_ifOutBroadcastPkts] + + cnt[RTL8365MB_MIB_dot3OutPauseFrames] - + cnt[RTL8365MB_MIB_ifOutDiscards]; + mac_stats->SingleCollisionFrames = + cnt[RTL8365MB_MIB_dot3StatsSingleCollisionFrames]; + mac_stats->MultipleCollisionFrames = + cnt[RTL8365MB_MIB_dot3StatsMultipleCollisionFrames]; + mac_stats->FramesReceivedOK = cnt[RTL8365MB_MIB_ifInUcastPkts] + + cnt[RTL8365MB_MIB_ifInMulticastPkts] + + cnt[RTL8365MB_MIB_ifInBroadcastPkts] + + cnt[RTL8365MB_MIB_dot3InPauseFrames]; + mac_stats->FrameCheckSequenceErrors = + cnt[RTL8365MB_MIB_dot3StatsFCSErrors]; + mac_stats->OctetsTransmittedOK = cnt[RTL8365MB_MIB_ifOutOctets] - + 18 * mac_stats->FramesTransmittedOK; + mac_stats->FramesWithDeferredXmissions = + cnt[RTL8365MB_MIB_dot3StatsDeferredTransmissions]; + mac_stats->LateCollisions = cnt[RTL8365MB_MIB_dot3StatsLateCollisions]; + mac_stats->FramesAbortedDueToXSColls = + cnt[RTL8365MB_MIB_dot3StatsExcessiveCollisions]; + mac_stats->OctetsReceivedOK = cnt[RTL8365MB_MIB_ifInOctets] - + 18 * mac_stats->FramesReceivedOK; + mac_stats->MulticastFramesXmittedOK = + cnt[RTL8365MB_MIB_ifOutMulticastPkts]; + mac_stats->BroadcastFramesXmittedOK = + cnt[RTL8365MB_MIB_ifOutBroadcastPkts]; + mac_stats->MulticastFramesReceivedOK = + cnt[RTL8365MB_MIB_ifInMulticastPkts]; + mac_stats->BroadcastFramesReceivedOK = + cnt[RTL8365MB_MIB_ifInBroadcastPkts]; +} + +static void rtl8365mb_get_ctrl_stats(struct dsa_switch *ds, int port, + struct ethtool_eth_ctrl_stats *ctrl_stats) +{ + struct realtek_smi *smi = ds->priv; + struct rtl8365mb_mib_counter *mib; + struct rtl8365mb *mb; + + mb = smi->chip_data; + mib = &rtl8365mb_mib_counters[RTL8365MB_MIB_dot3ControlInUnknownOpcodes]; + + mutex_lock(&mb->mib_lock); + rtl8365mb_mib_counter_read(smi, port, mib->offset, mib->length, + &ctrl_stats->UnsupportedOpcodesReceived); + mutex_unlock(&mb->mib_lock); +} + +static void rtl8365mb_stats_update(struct realtek_smi *smi, int port) +{ + u64 cnt[RTL8365MB_MIB_END] = { + [RTL8365MB_MIB_ifOutOctets] = 1, + [RTL8365MB_MIB_ifOutUcastPkts] = 1, + [RTL8365MB_MIB_ifOutMulticastPkts] = 1, + [RTL8365MB_MIB_ifOutBroadcastPkts] = 1, + [RTL8365MB_MIB_ifOutDiscards] = 1, + [RTL8365MB_MIB_ifInOctets] = 1, + [RTL8365MB_MIB_ifInUcastPkts] = 1, + [RTL8365MB_MIB_ifInMulticastPkts] = 1, + [RTL8365MB_MIB_ifInBroadcastPkts] = 1, + [RTL8365MB_MIB_etherStatsDropEvents] = 1, + [RTL8365MB_MIB_etherStatsCollisions] = 1, + [RTL8365MB_MIB_etherStatsFragments] = 1, + [RTL8365MB_MIB_etherStatsJabbers] = 1, + [RTL8365MB_MIB_dot3StatsFCSErrors] = 1, + [RTL8365MB_MIB_dot3StatsLateCollisions] = 1, + }; + struct rtl8365mb *mb = smi->chip_data; + struct rtnl_link_stats64 *stats; + int ret; + int i; + + stats = &mb->ports[port].stats; + + mutex_lock(&mb->mib_lock); + for (i = 0; i < RTL8365MB_MIB_END; i++) { + struct rtl8365mb_mib_counter *c = &rtl8365mb_mib_counters[i]; + + /* Only fetch required MIB counters (marked = 1 above) */ + if (!cnt[i]) + continue; + + ret = rtl8365mb_mib_counter_read(smi, port, c->offset, + c->length, &cnt[i]); + if (ret) + break; + } + mutex_unlock(&mb->mib_lock); + + /* Don't update statistics if there was an error reading the counters */ + if (ret) + return; + + spin_lock(&mb->ports[port].stats_lock); + + stats->rx_packets = cnt[RTL8365MB_MIB_ifInUcastPkts] + + cnt[RTL8365MB_MIB_ifInMulticastPkts] + + cnt[RTL8365MB_MIB_ifInBroadcastPkts] - + cnt[RTL8365MB_MIB_ifOutDiscards]; + + stats->tx_packets = cnt[RTL8365MB_MIB_ifOutUcastPkts] + + cnt[RTL8365MB_MIB_ifOutMulticastPkts] + + cnt[RTL8365MB_MIB_ifOutBroadcastPkts]; + + /* if{In,Out}Octets includes FCS - remove it */ + stats->rx_bytes = cnt[RTL8365MB_MIB_ifInOctets] - 4 * stats->rx_packets; + stats->tx_bytes = + cnt[RTL8365MB_MIB_ifOutOctets] - 4 * stats->tx_packets; + + stats->rx_dropped = cnt[RTL8365MB_MIB_etherStatsDropEvents]; + stats->tx_dropped = cnt[RTL8365MB_MIB_ifOutDiscards]; + + stats->multicast = cnt[RTL8365MB_MIB_ifInMulticastPkts]; + stats->collisions = cnt[RTL8365MB_MIB_etherStatsCollisions]; + + stats->rx_length_errors = cnt[RTL8365MB_MIB_etherStatsFragments] + + cnt[RTL8365MB_MIB_etherStatsJabbers]; + stats->rx_crc_errors = cnt[RTL8365MB_MIB_dot3StatsFCSErrors]; + stats->rx_errors = stats->rx_length_errors + stats->rx_crc_errors; + + stats->tx_aborted_errors = cnt[RTL8365MB_MIB_ifOutDiscards]; + stats->tx_window_errors = cnt[RTL8365MB_MIB_dot3StatsLateCollisions]; + stats->tx_errors = stats->tx_aborted_errors + stats->tx_window_errors; + + spin_unlock(&mb->ports[port].stats_lock); +} + +static void rtl8365mb_stats_poll(struct work_struct *work) +{ + struct rtl8365mb_port *p = container_of(to_delayed_work(work), + struct rtl8365mb_port, + mib_work); + struct realtek_smi *smi = p->smi; + + rtl8365mb_stats_update(smi, p->index); + + schedule_delayed_work(&p->mib_work, RTL8365MB_STATS_INTERVAL_JIFFIES); +} + +static void rtl8365mb_get_stats64(struct dsa_switch *ds, int port, + struct rtnl_link_stats64 *s) +{ + struct realtek_smi *smi = ds->priv; + struct rtl8365mb_port *p; + struct rtl8365mb *mb; + + mb = smi->chip_data; + p = &mb->ports[port]; + + spin_lock(&p->stats_lock); + memcpy(s, &p->stats, sizeof(*s)); + spin_unlock(&p->stats_lock); +} + +static void rtl8365mb_stats_setup(struct realtek_smi *smi) +{ + struct rtl8365mb *mb = smi->chip_data; + int i; + + /* Per-chip global mutex to protect MIB counter access, since doing + * so requires accessing a series of registers in a particular order. + */ + mutex_init(&mb->mib_lock); + + for (i = 0; i < smi->num_ports; i++) { + struct rtl8365mb_port *p = &mb->ports[i]; + + if (dsa_is_unused_port(smi->ds, i)) + continue; + + /* Per-port spinlock to protect the stats64 data */ + spin_lock_init(&p->stats_lock); + + /* This work polls the MIB counters and keeps the stats64 data + * up-to-date. + */ + INIT_DELAYED_WORK(&p->mib_work, rtl8365mb_stats_poll); + } +} + +static void rtl8365mb_stats_teardown(struct realtek_smi *smi) +{ + struct rtl8365mb *mb = smi->chip_data; + int i; + + for (i = 0; i < smi->num_ports; i++) { + struct rtl8365mb_port *p = &mb->ports[i]; + + if (dsa_is_unused_port(smi->ds, i)) + continue; + + cancel_delayed_work_sync(&p->mib_work); + } +} + +static int rtl8365mb_get_and_clear_status_reg(struct realtek_smi *smi, u32 reg, + u32 *val) +{ + int ret; + + ret = regmap_read(smi->map, reg, val); + if (ret) + return ret; + + return regmap_write(smi->map, reg, *val); +} + +static irqreturn_t rtl8365mb_irq(int irq, void *data) +{ + struct realtek_smi *smi = data; + unsigned long line_changes = 0; + struct rtl8365mb *mb; + u32 stat; + int line; + int ret; + + mb = smi->chip_data; + + ret = rtl8365mb_get_and_clear_status_reg(smi, RTL8365MB_INTR_STATUS_REG, + &stat); + if (ret) + goto out_error; + + if (stat & RTL8365MB_INTR_LINK_CHANGE_MASK) { + u32 linkdown_ind; + u32 linkup_ind; + u32 val; + + ret = rtl8365mb_get_and_clear_status_reg( + smi, RTL8365MB_PORT_LINKUP_IND_REG, &val); + if (ret) + goto out_error; + + linkup_ind = FIELD_GET(RTL8365MB_PORT_LINKUP_IND_MASK, val); + + ret = rtl8365mb_get_and_clear_status_reg( + smi, RTL8365MB_PORT_LINKDOWN_IND_REG, &val); + if (ret) + goto out_error; + + linkdown_ind = FIELD_GET(RTL8365MB_PORT_LINKDOWN_IND_MASK, val); + + line_changes = (linkup_ind | linkdown_ind) & mb->port_mask; + } + + if (!line_changes) + goto out_none; + + for_each_set_bit(line, &line_changes, smi->num_ports) { + int child_irq = irq_find_mapping(smi->irqdomain, line); + + handle_nested_irq(child_irq); + } + + return IRQ_HANDLED; + +out_error: + dev_err(smi->dev, "failed to read interrupt status: %d\n", ret); + +out_none: + return IRQ_NONE; +} + +static struct irq_chip rtl8365mb_irq_chip = { + .name = "rtl8365mb", + /* The hardware doesn't support masking IRQs on a per-port basis */ +}; + +static int rtl8365mb_irq_map(struct irq_domain *domain, unsigned int irq, + irq_hw_number_t hwirq) +{ + irq_set_chip_data(irq, domain->host_data); + irq_set_chip_and_handler(irq, &rtl8365mb_irq_chip, handle_simple_irq); + irq_set_nested_thread(irq, 1); + irq_set_noprobe(irq); + + return 0; +} + +static void rtl8365mb_irq_unmap(struct irq_domain *d, unsigned int irq) +{ + irq_set_nested_thread(irq, 0); + irq_set_chip_and_handler(irq, NULL, NULL); + irq_set_chip_data(irq, NULL); +} + +static const struct irq_domain_ops rtl8365mb_irqdomain_ops = { + .map = rtl8365mb_irq_map, + .unmap = rtl8365mb_irq_unmap, + .xlate = irq_domain_xlate_onecell, +}; + +static int rtl8365mb_set_irq_enable(struct realtek_smi *smi, bool enable) +{ + return regmap_update_bits(smi->map, RTL8365MB_INTR_CTRL_REG, + RTL8365MB_INTR_LINK_CHANGE_MASK, + FIELD_PREP(RTL8365MB_INTR_LINK_CHANGE_MASK, + enable ? 1 : 0)); +} + +static int rtl8365mb_irq_enable(struct realtek_smi *smi) +{ + return rtl8365mb_set_irq_enable(smi, true); +} + +static int rtl8365mb_irq_disable(struct realtek_smi *smi) +{ + return rtl8365mb_set_irq_enable(smi, false); +} + +static int rtl8365mb_irq_setup(struct realtek_smi *smi) +{ + struct rtl8365mb *mb = smi->chip_data; + struct device_node *intc; + u32 irq_trig; + int virq; + int irq; + u32 val; + int ret; + int i; + + intc = of_get_child_by_name(smi->dev->of_node, "interrupt-controller"); + if (!intc) { + dev_err(smi->dev, "missing child interrupt-controller node\n"); + return -EINVAL; + } + + /* rtl8365mb IRQs cascade off this one */ + irq = of_irq_get(intc, 0); + if (irq <= 0) { + if (irq != -EPROBE_DEFER) + dev_err(smi->dev, "failed to get parent irq: %d\n", + irq); + ret = irq ? irq : -EINVAL; + goto out_put_node; + } + + smi->irqdomain = irq_domain_add_linear(intc, smi->num_ports, + &rtl8365mb_irqdomain_ops, smi); + if (!smi->irqdomain) { + dev_err(smi->dev, "failed to add irq domain\n"); + ret = -ENOMEM; + goto out_put_node; + } + + for (i = 0; i < smi->num_ports; i++) { + virq = irq_create_mapping(smi->irqdomain, i); + if (!virq) { + dev_err(smi->dev, + "failed to create irq domain mapping\n"); + ret = -EINVAL; + goto out_remove_irqdomain; + } + + irq_set_parent(virq, irq); + } + + /* Configure chip interrupt signal polarity */ + irq_trig = irqd_get_trigger_type(irq_get_irq_data(irq)); + switch (irq_trig) { + case IRQF_TRIGGER_RISING: + case IRQF_TRIGGER_HIGH: + val = RTL8365MB_INTR_POLARITY_HIGH; + break; + case IRQF_TRIGGER_FALLING: + case IRQF_TRIGGER_LOW: + val = RTL8365MB_INTR_POLARITY_LOW; + break; + default: + dev_err(smi->dev, "unsupported irq trigger type %u\n", + irq_trig); + ret = -EINVAL; + goto out_remove_irqdomain; + } + + ret = regmap_update_bits(smi->map, RTL8365MB_INTR_POLARITY_REG, + RTL8365MB_INTR_POLARITY_MASK, + FIELD_PREP(RTL8365MB_INTR_POLARITY_MASK, val)); + if (ret) + goto out_remove_irqdomain; + + /* Disable the interrupt in case the chip has it enabled on reset */ + ret = rtl8365mb_irq_disable(smi); + if (ret) + goto out_remove_irqdomain; + + /* Clear the interrupt status register */ + ret = regmap_write(smi->map, RTL8365MB_INTR_STATUS_REG, + RTL8365MB_INTR_ALL_MASK); + if (ret) + goto out_remove_irqdomain; + + ret = request_threaded_irq(irq, NULL, rtl8365mb_irq, IRQF_ONESHOT, + "rtl8365mb", smi); + if (ret) { + dev_err(smi->dev, "failed to request irq: %d\n", ret); + goto out_remove_irqdomain; + } + + /* Store the irq so that we know to free it during teardown */ + mb->irq = irq; + + ret = rtl8365mb_irq_enable(smi); + if (ret) + goto out_free_irq; + + of_node_put(intc); + + return 0; + +out_free_irq: + free_irq(mb->irq, smi); + mb->irq = 0; + +out_remove_irqdomain: + for (i = 0; i < smi->num_ports; i++) { + virq = irq_find_mapping(smi->irqdomain, i); + irq_dispose_mapping(virq); + } + + irq_domain_remove(smi->irqdomain); + smi->irqdomain = NULL; + +out_put_node: + of_node_put(intc); + + return ret; +} + +static void rtl8365mb_irq_teardown(struct realtek_smi *smi) +{ + struct rtl8365mb *mb = smi->chip_data; + int virq; + int i; + + if (mb->irq) { + free_irq(mb->irq, smi); + mb->irq = 0; + } + + if (smi->irqdomain) { + for (i = 0; i < smi->num_ports; i++) { + virq = irq_find_mapping(smi->irqdomain, i); + irq_dispose_mapping(virq); + } + + irq_domain_remove(smi->irqdomain); + smi->irqdomain = NULL; + } +} + +static int rtl8365mb_cpu_config(struct realtek_smi *smi) +{ + struct rtl8365mb *mb = smi->chip_data; + struct rtl8365mb_cpu *cpu = &mb->cpu; + u32 val; + int ret; + + ret = regmap_update_bits(smi->map, RTL8365MB_CPU_PORT_MASK_REG, + RTL8365MB_CPU_PORT_MASK_MASK, + FIELD_PREP(RTL8365MB_CPU_PORT_MASK_MASK, + cpu->mask)); + if (ret) + return ret; + + val = FIELD_PREP(RTL8365MB_CPU_CTRL_EN_MASK, cpu->enable ? 1 : 0) | + FIELD_PREP(RTL8365MB_CPU_CTRL_INSERTMODE_MASK, cpu->insert) | + FIELD_PREP(RTL8365MB_CPU_CTRL_TAG_POSITION_MASK, cpu->position) | + FIELD_PREP(RTL8365MB_CPU_CTRL_RXBYTECOUNT_MASK, cpu->rx_length) | + FIELD_PREP(RTL8365MB_CPU_CTRL_TAG_FORMAT_MASK, cpu->format) | + FIELD_PREP(RTL8365MB_CPU_CTRL_TRAP_PORT_MASK, cpu->trap_port) | + FIELD_PREP(RTL8365MB_CPU_CTRL_TRAP_PORT_EXT_MASK, + cpu->trap_port >> 3); + ret = regmap_write(smi->map, RTL8365MB_CPU_CTRL_REG, val); + if (ret) + return ret; + + return 0; +} + +static int rtl8365mb_switch_init(struct realtek_smi *smi) +{ + struct rtl8365mb *mb = smi->chip_data; + int ret; + int i; + + /* Do any chip-specific init jam before getting to the common stuff */ + if (mb->jam_table) { + for (i = 0; i < mb->jam_size; i++) { + ret = regmap_write(smi->map, mb->jam_table[i].reg, + mb->jam_table[i].val); + if (ret) + return ret; + } + } + + /* Common init jam */ + for (i = 0; i < ARRAY_SIZE(rtl8365mb_init_jam_common); i++) { + ret = regmap_write(smi->map, rtl8365mb_init_jam_common[i].reg, + rtl8365mb_init_jam_common[i].val); + if (ret) + return ret; + } + + return 0; +} + +static int rtl8365mb_reset_chip(struct realtek_smi *smi) +{ + u32 val; + + realtek_smi_write_reg_noack(smi, RTL8365MB_CHIP_RESET_REG, + FIELD_PREP(RTL8365MB_CHIP_RESET_HW_MASK, + 1)); + + /* Realtek documentation says the chip needs 1 second to reset. Sleep + * for 100 ms before accessing any registers to prevent ACK timeouts. + */ + msleep(100); + return regmap_read_poll_timeout(smi->map, RTL8365MB_CHIP_RESET_REG, val, + !(val & RTL8365MB_CHIP_RESET_HW_MASK), + 20000, 1e6); +} + +static int rtl8365mb_setup(struct dsa_switch *ds) +{ + struct realtek_smi *smi = ds->priv; + struct rtl8365mb *mb; + int ret; + int i; + + mb = smi->chip_data; + + ret = rtl8365mb_reset_chip(smi); + if (ret) { + dev_err(smi->dev, "failed to reset chip: %d\n", ret); + goto out_error; + } + + /* Configure switch to vendor-defined initial state */ + ret = rtl8365mb_switch_init(smi); + if (ret) { + dev_err(smi->dev, "failed to initialize switch: %d\n", ret); + goto out_error; + } + + /* Set up cascading IRQs */ + ret = rtl8365mb_irq_setup(smi); + if (ret == -EPROBE_DEFER) + return ret; + else if (ret) + dev_info(smi->dev, "no interrupt support\n"); + + /* Configure CPU tagging */ + ret = rtl8365mb_cpu_config(smi); + if (ret) + goto out_teardown_irq; + + /* Configure ports */ + for (i = 0; i < smi->num_ports; i++) { + struct rtl8365mb_port *p = &mb->ports[i]; + + if (dsa_is_unused_port(smi->ds, i)) + continue; + + /* Set up per-port private data */ + p->smi = smi; + p->index = i; + + /* Forward only to the CPU */ + ret = rtl8365mb_port_set_isolation(smi, i, BIT(smi->cpu_port)); + if (ret) + goto out_teardown_irq; + + /* Disable learning */ + ret = rtl8365mb_port_set_learning(smi, i, false); + if (ret) + goto out_teardown_irq; + + /* Set the initial STP state of all ports to DISABLED, otherwise + * ports will still forward frames to the CPU despite being + * administratively down by default. + */ + rtl8365mb_port_stp_state_set(smi->ds, i, BR_STATE_DISABLED); + } + + /* Set maximum packet length to 1536 bytes */ + ret = regmap_update_bits(smi->map, RTL8365MB_CFG0_MAX_LEN_REG, + RTL8365MB_CFG0_MAX_LEN_MASK, + FIELD_PREP(RTL8365MB_CFG0_MAX_LEN_MASK, 1536)); + if (ret) + goto out_teardown_irq; + + ret = realtek_smi_setup_mdio(smi); + if (ret) { + dev_err(smi->dev, "could not set up MDIO bus\n"); + goto out_teardown_irq; + } + + /* Start statistics counter polling */ + rtl8365mb_stats_setup(smi); + + return 0; + +out_teardown_irq: + rtl8365mb_irq_teardown(smi); + +out_error: + return ret; +} + +static void rtl8365mb_teardown(struct dsa_switch *ds) +{ + struct realtek_smi *smi = ds->priv; + + rtl8365mb_stats_teardown(smi); + rtl8365mb_irq_teardown(smi); +} + +static int rtl8365mb_get_chip_id_and_ver(struct regmap *map, u32 *id, u32 *ver) +{ + int ret; + + /* For some reason we have to write a magic value to an arbitrary + * register whenever accessing the chip ID/version registers. + */ + ret = regmap_write(map, RTL8365MB_MAGIC_REG, RTL8365MB_MAGIC_VALUE); + if (ret) + return ret; + + ret = regmap_read(map, RTL8365MB_CHIP_ID_REG, id); + if (ret) + return ret; + + ret = regmap_read(map, RTL8365MB_CHIP_VER_REG, ver); + if (ret) + return ret; + + /* Reset magic register */ + ret = regmap_write(map, RTL8365MB_MAGIC_REG, 0); + if (ret) + return ret; + + return 0; +} + +static int rtl8365mb_detect(struct realtek_smi *smi) +{ + struct rtl8365mb *mb = smi->chip_data; + u32 chip_id; + u32 chip_ver; + int ret; + + ret = rtl8365mb_get_chip_id_and_ver(smi->map, &chip_id, &chip_ver); + if (ret) { + dev_err(smi->dev, "failed to read chip id and version: %d\n", + ret); + return ret; + } + + switch (chip_id) { + case RTL8365MB_CHIP_ID_8365MB_VC: + dev_info(smi->dev, + "found an RTL8365MB-VC switch (ver=0x%04x)\n", + chip_ver); + + smi->cpu_port = RTL8365MB_CPU_PORT_NUM_8365MB_VC; + smi->num_ports = smi->cpu_port + 1; + + mb->smi = smi; + mb->chip_id = chip_id; + mb->chip_ver = chip_ver; + mb->port_mask = BIT(smi->num_ports) - 1; + mb->learn_limit_max = RTL8365MB_LEARN_LIMIT_MAX_8365MB_VC; + mb->jam_table = rtl8365mb_init_jam_8365mb_vc; + mb->jam_size = ARRAY_SIZE(rtl8365mb_init_jam_8365mb_vc); + + mb->cpu.enable = 1; + mb->cpu.mask = BIT(smi->cpu_port); + mb->cpu.trap_port = smi->cpu_port; + mb->cpu.insert = RTL8365MB_CPU_INSERT_TO_ALL; + mb->cpu.position = RTL8365MB_CPU_POS_AFTER_SA; + mb->cpu.rx_length = RTL8365MB_CPU_RXLEN_64BYTES; + mb->cpu.format = RTL8365MB_CPU_FORMAT_8BYTES; + + break; + default: + dev_err(smi->dev, + "found an unknown Realtek switch (id=0x%04x, ver=0x%04x)\n", + chip_id, chip_ver); + return -ENODEV; + } + + return 0; +} + +static const struct dsa_switch_ops rtl8365mb_switch_ops = { + .get_tag_protocol = rtl8365mb_get_tag_protocol, + .setup = rtl8365mb_setup, + .teardown = rtl8365mb_teardown, + .phylink_validate = rtl8365mb_phylink_validate, + .phylink_mac_config = rtl8365mb_phylink_mac_config, + .phylink_mac_link_down = rtl8365mb_phylink_mac_link_down, + .phylink_mac_link_up = rtl8365mb_phylink_mac_link_up, + .port_stp_state_set = rtl8365mb_port_stp_state_set, + .get_strings = rtl8365mb_get_strings, + .get_ethtool_stats = rtl8365mb_get_ethtool_stats, + .get_sset_count = rtl8365mb_get_sset_count, + .get_eth_phy_stats = rtl8365mb_get_phy_stats, + .get_eth_mac_stats = rtl8365mb_get_mac_stats, + .get_eth_ctrl_stats = rtl8365mb_get_ctrl_stats, + .get_stats64 = rtl8365mb_get_stats64, +}; + +static const struct realtek_smi_ops rtl8365mb_smi_ops = { + .detect = rtl8365mb_detect, + .phy_read = rtl8365mb_phy_read, + .phy_write = rtl8365mb_phy_write, +}; + +const struct realtek_smi_variant rtl8365mb_variant = { + .ds_ops = &rtl8365mb_switch_ops, + .ops = &rtl8365mb_smi_ops, + .clk_delay = 10, + .cmd_read = 0xb9, + .cmd_write = 0xb8, + .chip_data_sz = sizeof(struct rtl8365mb), +}; +EXPORT_SYMBOL_GPL(rtl8365mb_variant); From 2ca2969aae1e110fe243a50d6d5809f6f4895092 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alvin=20=C5=A0ipraga?= Date: Mon, 18 Oct 2021 11:38:02 +0200 Subject: [PATCH 300/440] net: phy: realtek: add support for RTL8365MB-VC internal PHYs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The RTL8365MB-VC ethernet switch controller has 4 internal PHYs for its user-facing ports. All that is needed is to let the PHY driver core pick up the IRQ made available by the switch driver. Signed-off-by: Alvin Šipraga Reviewed-by: Florian Fainelli Reviewed-by: Linus Walleij Signed-off-by: David S. Miller --- drivers/net/phy/realtek.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/phy/realtek.c b/drivers/net/phy/realtek.c index 11be60333fa8..a5671ab896b3 100644 --- a/drivers/net/phy/realtek.c +++ b/drivers/net/phy/realtek.c @@ -1023,6 +1023,14 @@ static struct phy_driver realtek_drvs[] = { .resume = genphy_resume, .read_page = rtl821x_read_page, .write_page = rtl821x_write_page, + }, { + PHY_ID_MATCH_EXACT(0x001cc942), + .name = "RTL8365MB-VC Gigabit Ethernet", + /* Interrupt handling analogous to RTL8366RB */ + .config_intr = genphy_no_config_intr, + .handle_interrupt = genphy_handle_interrupt_no_ack, + .suspend = genphy_suspend, + .resume = genphy_resume, }, }; From 06dd34a628ae5b6a839b757e746de165d6789ca8 Mon Sep 17 00:00:00 2001 From: Ansuel Smith Date: Sun, 17 Oct 2021 16:56:46 +0200 Subject: [PATCH 301/440] net: dsa: qca8k: fix delay applied to wrong cpu in parse_port_config Fix delay settings applied to wrong cpu in parse_port_config. The delay values is set to the wrong index as the cpu_port_index is incremented too early. Start the cpu_port_index to -1 so the correct value is applied to address also the case with invalid phy mode and not available port. Signed-off-by: Ansuel Smith Signed-off-by: David S. Miller --- drivers/net/dsa/qca8k.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c index bf58d4d1ae9b..0992790da6b6 100644 --- a/drivers/net/dsa/qca8k.c +++ b/drivers/net/dsa/qca8k.c @@ -976,7 +976,7 @@ qca8k_setup_of_pws_reg(struct qca8k_priv *priv) static int qca8k_parse_port_config(struct qca8k_priv *priv) { - int port, cpu_port_index = 0, ret; + int port, cpu_port_index = -1, ret; struct device_node *port_dn; phy_interface_t mode; struct dsa_port *dp; From 041c61488236a5a84789083e3d9f0a51139b6edf Mon Sep 17 00:00:00 2001 From: Erik Ekman Date: Sun, 17 Oct 2021 19:16:57 +0200 Subject: [PATCH 302/440] sfc: Fix reading non-legacy supported link modes Everything except the first 32 bits was lost when the pause flags were added. This makes the 50000baseCR2 mode flag (bit 34) not appear. I have tested this with a 10G card (SFN5122F-R7) by modifying it to return a non-legacy link mode (10000baseCR). Signed-off-by: Erik Ekman Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/ethtool_common.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/sfc/ethtool_common.c b/drivers/net/ethernet/sfc/ethtool_common.c index bf1443539a1a..bd552c7dffcb 100644 --- a/drivers/net/ethernet/sfc/ethtool_common.c +++ b/drivers/net/ethernet/sfc/ethtool_common.c @@ -563,20 +563,14 @@ int efx_ethtool_get_link_ksettings(struct net_device *net_dev, { struct efx_nic *efx = netdev_priv(net_dev); struct efx_link_state *link_state = &efx->link_state; - u32 supported; mutex_lock(&efx->mac_lock); efx_mcdi_phy_get_link_ksettings(efx, cmd); mutex_unlock(&efx->mac_lock); /* Both MACs support pause frames (bidirectional and respond-only) */ - ethtool_convert_link_mode_to_legacy_u32(&supported, - cmd->link_modes.supported); - - supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause; - - ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported, - supported); + ethtool_link_ksettings_add_link_mode(cmd, supported, Pause); + ethtool_link_ksettings_add_link_mode(cmd, supported, Asym_Pause); if (LOOPBACK_INTERNAL(efx)) { cmd->base.speed = link_state->speed; From ac6b7e0d9679db908d92fb96605d1ce01e8173d4 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 18 Oct 2021 10:26:08 -0700 Subject: [PATCH 303/440] mlx5: prevent 64bit divide mlx5_tout_ms() returns a u64, we can't directly divide it. This is not a problem here, @timeout which is the value that actually matters here is already a ulong, so this implies storing return value of mlx5_tout_ms() on a ulong should be fine. This fixes: ERROR: modpost: "__udivdi3" [drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.ko] undefined! Fixes: 32def4120e48 ("net/mlx5: Read timeout values from DTOR") Link: https://lore.kernel.org/r/20211018172608.1069754-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c index eaca79cc7b9d..0b0234f9d694 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c @@ -396,13 +396,14 @@ static int fw_reset_event_notifier(struct notifier_block *nb, unsigned long acti int mlx5_fw_reset_wait_reset_done(struct mlx5_core_dev *dev) { - unsigned long timeout = msecs_to_jiffies(mlx5_tout_ms(dev, PCI_SYNC_UPDATE)); + unsigned long pci_sync_update_timeout = mlx5_tout_ms(dev, PCI_SYNC_UPDATE); + unsigned long timeout = msecs_to_jiffies(pci_sync_update_timeout); struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; int err; if (!wait_for_completion_timeout(&fw_reset->done, timeout)) { - mlx5_core_warn(dev, "FW sync reset timeout after %llu seconds\n", - mlx5_tout_ms(dev, PCI_SYNC_UPDATE) / 1000); + mlx5_core_warn(dev, "FW sync reset timeout after %lu seconds\n", + pci_sync_update_timeout / 1000); err = -ETIMEDOUT; goto out; } From 45eebd62999d37d13568723524b99d828e0ce22c Mon Sep 17 00:00:00 2001 From: Mateusz Palczewski Date: Thu, 19 Aug 2021 08:47:40 +0000 Subject: [PATCH 304/440] iavf: Refactor iavf state machine tracking Replace state changes of iavf state machine with a method that also tracks the previous state the machine was on. This change is required for further work with refactoring init and watchdog state machines. Tracking of previous state would help us recover iavf after failure has occurred. Signed-off-by: Jakub Pawlak Signed-off-by: Jan Sokolowski Signed-off-by: Mateusz Palczewski Tested-by: Konrad Jankowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/iavf/iavf.h | 10 +++++ drivers/net/ethernet/intel/iavf/iavf_main.c | 37 ++++++++++--------- .../net/ethernet/intel/iavf/iavf_virtchnl.c | 2 +- 3 files changed, 31 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h index 68c80f04113c..cd359cfe5cce 100644 --- a/drivers/net/ethernet/intel/iavf/iavf.h +++ b/drivers/net/ethernet/intel/iavf/iavf.h @@ -312,6 +312,7 @@ struct iavf_adapter { struct iavf_hw hw; /* defined in iavf_type.h */ enum iavf_state_t state; + enum iavf_state_t last_state; unsigned long crit_section; struct delayed_work watchdog_task; @@ -393,6 +394,15 @@ struct iavf_device { extern char iavf_driver_name[]; extern struct workqueue_struct *iavf_wq; +static inline void iavf_change_state(struct iavf_adapter *adapter, + enum iavf_state_t state) +{ + if (adapter->state != state) { + adapter->last_state = adapter->state; + adapter->state = state; + } +} + int iavf_up(struct iavf_adapter *adapter); void iavf_down(struct iavf_adapter *adapter); int iavf_process_config(struct iavf_adapter *adapter); diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index ca4712a73574..3df018c879e1 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -960,7 +960,7 @@ static void iavf_configure(struct iavf_adapter *adapter) **/ static void iavf_up_complete(struct iavf_adapter *adapter) { - adapter->state = __IAVF_RUNNING; + iavf_change_state(adapter, __IAVF_RUNNING); clear_bit(__IAVF_VSI_DOWN, adapter->vsi.state); iavf_napi_enable_all(adapter); @@ -1729,7 +1729,7 @@ static int iavf_startup(struct iavf_adapter *adapter) iavf_shutdown_adminq(hw); goto err; } - adapter->state = __IAVF_INIT_VERSION_CHECK; + iavf_change_state(adapter, __IAVF_INIT_VERSION_CHECK); err: return err; } @@ -1753,7 +1753,7 @@ static int iavf_init_version_check(struct iavf_adapter *adapter) if (!iavf_asq_done(hw)) { dev_err(&pdev->dev, "Admin queue command never completed\n"); iavf_shutdown_adminq(hw); - adapter->state = __IAVF_STARTUP; + iavf_change_state(adapter, __IAVF_STARTUP); goto err; } @@ -1776,8 +1776,7 @@ static int iavf_init_version_check(struct iavf_adapter *adapter) err); goto err; } - adapter->state = __IAVF_INIT_GET_RESOURCES; - + iavf_change_state(adapter, __IAVF_INIT_GET_RESOURCES); err: return err; } @@ -1893,7 +1892,7 @@ static int iavf_init_get_resources(struct iavf_adapter *adapter) if (netdev->features & NETIF_F_GRO) dev_info(&pdev->dev, "GRO is enabled\n"); - adapter->state = __IAVF_DOWN; + iavf_change_state(adapter, __IAVF_DOWN); set_bit(__IAVF_VSI_DOWN, adapter->vsi.state); rtnl_unlock(); @@ -1941,7 +1940,7 @@ static void iavf_watchdog_task(struct work_struct *work) goto restart_watchdog; if (adapter->flags & IAVF_FLAG_PF_COMMS_FAILED) - adapter->state = __IAVF_COMM_FAILED; + iavf_change_state(adapter, __IAVF_COMM_FAILED); switch (adapter->state) { case __IAVF_COMM_FAILED: @@ -1952,7 +1951,7 @@ static void iavf_watchdog_task(struct work_struct *work) /* A chance for redemption! */ dev_err(&adapter->pdev->dev, "Hardware came out of reset. Attempting reinit.\n"); - adapter->state = __IAVF_STARTUP; + iavf_change_state(adapter, __IAVF_STARTUP); adapter->flags &= ~IAVF_FLAG_PF_COMMS_FAILED; queue_delayed_work(iavf_wq, &adapter->init_task, 10); mutex_unlock(&adapter->crit_lock); @@ -1999,9 +1998,10 @@ static void iavf_watchdog_task(struct work_struct *work) goto restart_watchdog; } - /* check for hw reset */ + /* check for hw reset */ reg_val = rd32(hw, IAVF_VF_ARQLEN1) & IAVF_VF_ARQLEN1_ARQENABLE_MASK; if (!reg_val) { + iavf_change_state(adapter, __IAVF_RESETTING); adapter->flags |= IAVF_FLAG_RESET_PENDING; adapter->aq_required = 0; adapter->current_op = VIRTCHNL_OP_UNKNOWN; @@ -2081,7 +2081,7 @@ static void iavf_disable_vf(struct iavf_adapter *adapter) adapter->netdev->flags &= ~IFF_UP; mutex_unlock(&adapter->crit_lock); adapter->flags &= ~IAVF_FLAG_RESET_PENDING; - adapter->state = __IAVF_DOWN; + iavf_change_state(adapter, __IAVF_DOWN); wake_up(&adapter->down_waitqueue); dev_info(&adapter->pdev->dev, "Reset task did not complete, VF disabled\n"); } @@ -2191,7 +2191,7 @@ continue_reset: } iavf_irq_disable(adapter); - adapter->state = __IAVF_RESETTING; + iavf_change_state(adapter, __IAVF_RESETTING); adapter->flags &= ~IAVF_FLAG_RESET_PENDING; /* free the Tx/Rx rings and descriptors, might be better to just @@ -2291,11 +2291,14 @@ continue_reset: iavf_configure(adapter); + /* iavf_up_complete() will switch device back + * to __IAVF_RUNNING + */ iavf_up_complete(adapter); iavf_irq_enable(adapter, true); } else { - adapter->state = __IAVF_DOWN; + iavf_change_state(adapter, __IAVF_DOWN); wake_up(&adapter->down_waitqueue); } mutex_unlock(&adapter->client_lock); @@ -3297,7 +3300,7 @@ static int iavf_close(struct net_device *netdev) adapter->flags |= IAVF_FLAG_CLIENT_NEEDS_CLOSE; iavf_down(adapter); - adapter->state = __IAVF_DOWN_PENDING; + iavf_change_state(adapter, __IAVF_DOWN_PENDING); iavf_free_traffic_irqs(adapter); mutex_unlock(&adapter->crit_lock); @@ -3679,7 +3682,7 @@ init_failed: "Failed to communicate with PF; waiting before retry\n"); adapter->flags |= IAVF_FLAG_PF_COMMS_FAILED; iavf_shutdown_adminq(hw); - adapter->state = __IAVF_STARTUP; + iavf_change_state(adapter, __IAVF_STARTUP); queue_delayed_work(iavf_wq, &adapter->init_task, HZ * 5); goto out; } @@ -3705,7 +3708,7 @@ static void iavf_shutdown(struct pci_dev *pdev) if (iavf_lock_timeout(&adapter->crit_lock, 5000)) dev_warn(&adapter->pdev->dev, "failed to acquire crit_lock in %s\n", __FUNCTION__); /* Prevent the watchdog from running. */ - adapter->state = __IAVF_REMOVE; + iavf_change_state(adapter, __IAVF_REMOVE); adapter->aq_required = 0; mutex_unlock(&adapter->crit_lock); @@ -3778,7 +3781,7 @@ static int iavf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) hw->back = adapter; adapter->msg_enable = BIT(DEFAULT_DEBUG_LEVEL_SHIFT) - 1; - adapter->state = __IAVF_STARTUP; + iavf_change_state(adapter, __IAVF_STARTUP); /* Call save state here because it relies on the adapter struct. */ pci_save_state(pdev); @@ -3954,7 +3957,7 @@ static void iavf_remove(struct pci_dev *pdev) dev_warn(&adapter->pdev->dev, "failed to acquire crit_lock in %s\n", __FUNCTION__); /* Shut down all the garbage mashers on the detention level */ - adapter->state = __IAVF_REMOVE; + iavf_change_state(adapter, __IAVF_REMOVE); adapter->aq_required = 0; adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED; iavf_free_all_tx_resources(adapter); diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c index 8eb8d4663a81..8c3f0f77cb57 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c +++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c @@ -1735,7 +1735,7 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, iavf_free_all_tx_resources(adapter); iavf_free_all_rx_resources(adapter); if (adapter->state == __IAVF_DOWN_PENDING) { - adapter->state = __IAVF_DOWN; + iavf_change_state(adapter, __IAVF_DOWN); wake_up(&adapter->down_waitqueue); } break; From 59756ad6948be91d66867ce458083b820c59b8ba Mon Sep 17 00:00:00 2001 From: Mateusz Palczewski Date: Thu, 19 Aug 2021 08:47:49 +0000 Subject: [PATCH 305/440] iavf: Add __IAVF_INIT_FAILED state This commit adds a new state, __IAVF_INIT_FAILED to the state machine. From now on initialization functions report errors not by returning an error value, but by changing the state to indicate that something went wrong. Signed-off-by: Jakub Pawlak Signed-off-by: Jan Sokolowski Signed-off-by: Mateusz Palczewski Tested-by: Konrad Jankowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/iavf/iavf.h | 1 + drivers/net/ethernet/intel/iavf/iavf_main.c | 35 ++++++++++++--------- 2 files changed, 21 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h index cd359cfe5cce..d27821d5b4ad 100644 --- a/drivers/net/ethernet/intel/iavf/iavf.h +++ b/drivers/net/ethernet/intel/iavf/iavf.h @@ -177,6 +177,7 @@ enum iavf_state_t { __IAVF_INIT_VERSION_CHECK, /* aq msg sent, awaiting reply */ __IAVF_INIT_GET_RESOURCES, /* aq msg sent, awaiting reply */ __IAVF_INIT_SW, /* got resources, setting up structs */ + __IAVF_INIT_FAILED, /* init failed, restarting procedure */ __IAVF_RESETTING, /* in reset */ __IAVF_COMM_FAILED, /* communication with PF failed */ /* Below here, watchdog is running */ diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 3df018c879e1..291473ba70af 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -14,7 +14,7 @@ static int iavf_setup_all_tx_resources(struct iavf_adapter *adapter); static int iavf_setup_all_rx_resources(struct iavf_adapter *adapter); static int iavf_close(struct net_device *netdev); -static int iavf_init_get_resources(struct iavf_adapter *adapter); +static void iavf_init_get_resources(struct iavf_adapter *adapter); static int iavf_check_reset_complete(struct iavf_hw *hw); char iavf_driver_name[] = "iavf"; @@ -1688,9 +1688,9 @@ static int iavf_process_aq_command(struct iavf_adapter *adapter) * * Function process __IAVF_STARTUP driver state. * When success the state is changed to __IAVF_INIT_VERSION_CHECK - * when fails it returns -EAGAIN + * when fails the state is changed to __IAVF_INIT_FAILED **/ -static int iavf_startup(struct iavf_adapter *adapter) +static void iavf_startup(struct iavf_adapter *adapter) { struct pci_dev *pdev = adapter->pdev; struct iavf_hw *hw = &adapter->hw; @@ -1730,8 +1730,9 @@ static int iavf_startup(struct iavf_adapter *adapter) goto err; } iavf_change_state(adapter, __IAVF_INIT_VERSION_CHECK); + return; err: - return err; + iavf_change_state(adapter, __IAVF_INIT_FAILED); } /** @@ -1740,9 +1741,9 @@ err: * * Function process __IAVF_INIT_VERSION_CHECK driver state. * When success the state is changed to __IAVF_INIT_GET_RESOURCES - * when fails it returns -EAGAIN + * when fails the state is changed to __IAVF_INIT_FAILED **/ -static int iavf_init_version_check(struct iavf_adapter *adapter) +static void iavf_init_version_check(struct iavf_adapter *adapter) { struct pci_dev *pdev = adapter->pdev; struct iavf_hw *hw = &adapter->hw; @@ -1777,8 +1778,9 @@ static int iavf_init_version_check(struct iavf_adapter *adapter) goto err; } iavf_change_state(adapter, __IAVF_INIT_GET_RESOURCES); + return; err: - return err; + iavf_change_state(adapter, __IAVF_INIT_FAILED); } /** @@ -1788,9 +1790,9 @@ err: * Function process __IAVF_INIT_GET_RESOURCES driver state and * finishes driver initialization procedure. * When success the state is changed to __IAVF_DOWN - * when fails it returns -EAGAIN + * when fails the state is changed to __IAVF_INIT_FAILED **/ -static int iavf_init_get_resources(struct iavf_adapter *adapter) +static void iavf_init_get_resources(struct iavf_adapter *adapter) { struct net_device *netdev = adapter->netdev; struct pci_dev *pdev = adapter->pdev; @@ -1818,7 +1820,7 @@ static int iavf_init_get_resources(struct iavf_adapter *adapter) */ iavf_shutdown_adminq(hw); dev_err(&pdev->dev, "Unable to get VF config due to PF error condition, not retrying\n"); - return 0; + return; } if (err) { dev_err(&pdev->dev, "Unable to get VF config (%d)\n", err); @@ -1910,7 +1912,7 @@ static int iavf_init_get_resources(struct iavf_adapter *adapter) else iavf_init_rss(adapter); - return err; + return; err_mem: iavf_free_rss(adapter); err_register: @@ -1921,7 +1923,7 @@ err_alloc: kfree(adapter->vf_res); adapter->vf_res = NULL; err: - return err; + iavf_change_state(adapter, __IAVF_INIT_FAILED); } /** @@ -3658,15 +3660,18 @@ static void iavf_init_task(struct work_struct *work) } switch (adapter->state) { case __IAVF_STARTUP: - if (iavf_startup(adapter) < 0) + iavf_startup(adapter); + if (adapter->state == __IAVF_INIT_FAILED) goto init_failed; break; case __IAVF_INIT_VERSION_CHECK: - if (iavf_init_version_check(adapter) < 0) + iavf_init_version_check(adapter); + if (adapter->state == __IAVF_INIT_FAILED) goto init_failed; break; case __IAVF_INIT_GET_RESOURCES: - if (iavf_init_get_resources(adapter) < 0) + iavf_init_get_resources(adapter); + if (adapter->state == __IAVF_INIT_FAILED) goto init_failed; goto out; default: From 898ef1cb1cb24040c3e89263e02c605af70c776a Mon Sep 17 00:00:00 2001 From: Mateusz Palczewski Date: Thu, 19 Aug 2021 08:47:58 +0000 Subject: [PATCH 306/440] iavf: Combine init and watchdog state machines Use single state machine for driver initialization and for service initialized driver. The init state machine implemented in init_task() is merged into the watchdog_task(). The init_task() function is removed. Signed-off-by: Jakub Pawlak Signed-off-by: Jan Sokolowski Signed-off-by: Mateusz Palczewski Tested-by: Konrad Jankowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/iavf/iavf.h | 1 - drivers/net/ethernet/intel/iavf/iavf_main.c | 136 ++++++++------------ 2 files changed, 57 insertions(+), 80 deletions(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h index d27821d5b4ad..e0b88ff76466 100644 --- a/drivers/net/ethernet/intel/iavf/iavf.h +++ b/drivers/net/ethernet/intel/iavf/iavf.h @@ -226,7 +226,6 @@ struct iavf_adapter { struct work_struct reset_task; struct work_struct adminq_task; struct delayed_work client_task; - struct delayed_work init_task; wait_queue_head_t down_waitqueue; struct iavf_q_vector *q_vectors; struct list_head vlan_filter_list; diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 291473ba70af..56956e449c97 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -1944,7 +1944,48 @@ static void iavf_watchdog_task(struct work_struct *work) if (adapter->flags & IAVF_FLAG_PF_COMMS_FAILED) iavf_change_state(adapter, __IAVF_COMM_FAILED); + if (adapter->flags & IAVF_FLAG_RESET_NEEDED && + adapter->state != __IAVF_RESETTING) { + iavf_change_state(adapter, __IAVF_RESETTING); + adapter->aq_required = 0; + adapter->current_op = VIRTCHNL_OP_UNKNOWN; + } + switch (adapter->state) { + case __IAVF_STARTUP: + iavf_startup(adapter); + mutex_unlock(&adapter->crit_lock); + queue_delayed_work(iavf_wq, &adapter->watchdog_task, + msecs_to_jiffies(30)); + return; + case __IAVF_INIT_VERSION_CHECK: + iavf_init_version_check(adapter); + mutex_unlock(&adapter->crit_lock); + queue_delayed_work(iavf_wq, &adapter->watchdog_task, + msecs_to_jiffies(30)); + return; + case __IAVF_INIT_GET_RESOURCES: + iavf_init_get_resources(adapter); + mutex_unlock(&adapter->crit_lock); + queue_delayed_work(iavf_wq, &adapter->watchdog_task, + msecs_to_jiffies(1)); + return; + case __IAVF_INIT_FAILED: + if (++adapter->aq_wait_count > IAVF_AQ_MAX_ERR) { + dev_err(&adapter->pdev->dev, + "Failed to communicate with PF; waiting before retry\n"); + adapter->flags |= IAVF_FLAG_PF_COMMS_FAILED; + iavf_shutdown_adminq(hw); + mutex_unlock(&adapter->crit_lock); + queue_delayed_work(iavf_wq, + &adapter->watchdog_task, (5 * HZ)); + return; + } + /* Try again from failed step*/ + iavf_change_state(adapter, adapter->last_state); + mutex_unlock(&adapter->crit_lock); + queue_delayed_work(iavf_wq, &adapter->watchdog_task, HZ); + return; case __IAVF_COMM_FAILED: reg_val = rd32(hw, IAVF_VFGEN_RSTAT) & IAVF_VFGEN_RSTAT_VFR_STATE_MASK; @@ -1953,23 +1994,19 @@ static void iavf_watchdog_task(struct work_struct *work) /* A chance for redemption! */ dev_err(&adapter->pdev->dev, "Hardware came out of reset. Attempting reinit.\n"); - iavf_change_state(adapter, __IAVF_STARTUP); - adapter->flags &= ~IAVF_FLAG_PF_COMMS_FAILED; - queue_delayed_work(iavf_wq, &adapter->init_task, 10); - mutex_unlock(&adapter->crit_lock); - /* Don't reschedule the watchdog, since we've restarted - * the init task. When init_task contacts the PF and + /* When init task contacts the PF and * gets everything set up again, it'll restart the * watchdog for us. Down, boy. Sit. Stay. Woof. */ - return; + iavf_change_state(adapter, __IAVF_STARTUP); + adapter->flags &= ~IAVF_FLAG_PF_COMMS_FAILED; } adapter->aq_required = 0; adapter->current_op = VIRTCHNL_OP_UNKNOWN; queue_delayed_work(iavf_wq, &adapter->watchdog_task, msecs_to_jiffies(10)); - goto watchdog_done; + return; case __IAVF_RESETTING: mutex_unlock(&adapter->crit_lock); queue_delayed_work(iavf_wq, &adapter->watchdog_task, HZ * 2); @@ -1992,12 +2029,14 @@ static void iavf_watchdog_task(struct work_struct *work) adapter->state == __IAVF_RUNNING) iavf_request_stats(adapter); } + if (adapter->state == __IAVF_RUNNING) + iavf_detect_recover_hung(&adapter->vsi); break; case __IAVF_REMOVE: mutex_unlock(&adapter->crit_lock); return; default: - goto restart_watchdog; + return; } /* check for hw reset */ @@ -2009,22 +2048,21 @@ static void iavf_watchdog_task(struct work_struct *work) adapter->current_op = VIRTCHNL_OP_UNKNOWN; dev_err(&adapter->pdev->dev, "Hardware reset detected\n"); queue_work(iavf_wq, &adapter->reset_task); - goto watchdog_done; + mutex_unlock(&adapter->crit_lock); + queue_delayed_work(iavf_wq, + &adapter->watchdog_task, HZ * 2); + return; } schedule_delayed_work(&adapter->client_task, msecs_to_jiffies(5)); -watchdog_done: - if (adapter->state == __IAVF_RUNNING || - adapter->state == __IAVF_COMM_FAILED) - iavf_detect_recover_hung(&adapter->vsi); mutex_unlock(&adapter->crit_lock); restart_watchdog: + queue_work(iavf_wq, &adapter->adminq_task); if (adapter->aq_required) queue_delayed_work(iavf_wq, &adapter->watchdog_task, msecs_to_jiffies(20)); else queue_delayed_work(iavf_wq, &adapter->watchdog_task, HZ * 2); - queue_work(iavf_wq, &adapter->adminq_task); } static void iavf_disable_vf(struct iavf_adapter *adapter) @@ -2310,6 +2348,8 @@ continue_reset: reset_err: mutex_unlock(&adapter->client_lock); mutex_unlock(&adapter->crit_lock); + if (running) + iavf_change_state(adapter, __IAVF_RUNNING); dev_err(&adapter->pdev->dev, "failed to allocate resources during reinit\n"); iavf_close(netdev); } @@ -3635,67 +3675,6 @@ int iavf_process_config(struct iavf_adapter *adapter) return 0; } -/** - * iavf_init_task - worker thread to perform delayed initialization - * @work: pointer to work_struct containing our data - * - * This task completes the work that was begun in probe. Due to the nature - * of VF-PF communications, we may need to wait tens of milliseconds to get - * responses back from the PF. Rather than busy-wait in probe and bog down the - * whole system, we'll do it in a task so we can sleep. - * This task only runs during driver init. Once we've established - * communications with the PF driver and set up our netdev, the watchdog - * takes over. - **/ -static void iavf_init_task(struct work_struct *work) -{ - struct iavf_adapter *adapter = container_of(work, - struct iavf_adapter, - init_task.work); - struct iavf_hw *hw = &adapter->hw; - - if (iavf_lock_timeout(&adapter->crit_lock, 5000)) { - dev_warn(&adapter->pdev->dev, "failed to acquire crit_lock in %s\n", __FUNCTION__); - return; - } - switch (adapter->state) { - case __IAVF_STARTUP: - iavf_startup(adapter); - if (adapter->state == __IAVF_INIT_FAILED) - goto init_failed; - break; - case __IAVF_INIT_VERSION_CHECK: - iavf_init_version_check(adapter); - if (adapter->state == __IAVF_INIT_FAILED) - goto init_failed; - break; - case __IAVF_INIT_GET_RESOURCES: - iavf_init_get_resources(adapter); - if (adapter->state == __IAVF_INIT_FAILED) - goto init_failed; - goto out; - default: - goto init_failed; - } - - queue_delayed_work(iavf_wq, &adapter->init_task, - msecs_to_jiffies(30)); - goto out; -init_failed: - if (++adapter->aq_wait_count > IAVF_AQ_MAX_ERR) { - dev_err(&adapter->pdev->dev, - "Failed to communicate with PF; waiting before retry\n"); - adapter->flags |= IAVF_FLAG_PF_COMMS_FAILED; - iavf_shutdown_adminq(hw); - iavf_change_state(adapter, __IAVF_STARTUP); - queue_delayed_work(iavf_wq, &adapter->init_task, HZ * 5); - goto out; - } - queue_delayed_work(iavf_wq, &adapter->init_task, HZ); -out: - mutex_unlock(&adapter->crit_lock); -} - /** * iavf_shutdown - Shutdown the device in preparation for a reboot * @pdev: pci device structure @@ -3830,8 +3809,7 @@ static int iavf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) INIT_WORK(&adapter->adminq_task, iavf_adminq_task); INIT_DELAYED_WORK(&adapter->watchdog_task, iavf_watchdog_task); INIT_DELAYED_WORK(&adapter->client_task, iavf_client_task); - INIT_DELAYED_WORK(&adapter->init_task, iavf_init_task); - queue_delayed_work(iavf_wq, &adapter->init_task, + queue_delayed_work(iavf_wq, &adapter->watchdog_task, msecs_to_jiffies(5 * (pdev->devfn & 0x07))); /* Setup the wait queue for indicating transition to down status */ @@ -3937,8 +3915,8 @@ static void iavf_remove(struct pci_dev *pdev) int err; /* Indicate we are in remove and not to run reset_task */ mutex_lock(&adapter->remove_lock); - cancel_delayed_work_sync(&adapter->init_task); cancel_work_sync(&adapter->reset_task); + cancel_delayed_work_sync(&adapter->watchdog_task); cancel_delayed_work_sync(&adapter->client_task); if (adapter->netdev_registered) { unregister_netdev(netdev); From f616447034a120b18f6e612814641e7d8f5d7f0a Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Sat, 16 Oct 2021 07:58:15 +0200 Subject: [PATCH 307/440] MAINTAINERS: adjust file entry for of_net.c after movement Commit e330fb14590c ("of: net: move of_net under net/") moves of_net.c to ./net/core/, but misses to adjust the reference to this file in MAINTAINERS. Hence, ./scripts/get_maintainer.pl --self-test=patterns complains: warning: no file matches F: drivers/of/of_net.c Adjust the file entry after this file movement. Signed-off-by: Lukas Bulwahn Link: https://lore.kernel.org/r/20211016055815.14397-1-lukas.bulwahn@gmail.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 703aa3150a15..e107a81b0403 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7012,7 +7012,6 @@ F: drivers/net/mdio/fwnode_mdio.c F: drivers/net/mdio/of_mdio.c F: drivers/net/pcs/ F: drivers/net/phy/ -F: drivers/of/of_net.c F: include/dt-bindings/net/qca-ar803x.h F: include/linux/*mdio*.h F: include/linux/mdio/*.h @@ -7024,6 +7023,7 @@ F: include/linux/platform_data/mdio-gpio.h F: include/trace/events/mdio.h F: include/uapi/linux/mdio.h F: include/uapi/linux/mii.h +F: net/core/of_net.c EXFAT FILE SYSTEM M: Namjae Jeon From 891e861efb1d37428d6d79214b775f2809f5f348 Mon Sep 17 00:00:00 2001 From: Shai Malin Date: Fri, 15 Oct 2021 15:41:16 +0300 Subject: [PATCH 308/440] qed: Optimize the ll2 ooo flow Optimize the ll2 TCP out-of-order likely flows: - Optimize the non-error flows of the ll2 ooo data path. - Optimize "QED_OOO_RIGHT_BUF" over "QED_OOO_LEFT_BUF". Signed-off-by: Ariel Elior Signed-off-by: Shai Malin Link: https://lore.kernel.org/r/20211015124118.29041-1-smalin@marvell.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/qlogic/qed/qed_ll2.c | 63 ++++++++++++----------- drivers/net/ethernet/qlogic/qed/qed_ooo.c | 20 +++---- 2 files changed, 42 insertions(+), 41 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c index 3fedcefc36d8..6068b0b94ea3 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c @@ -378,7 +378,7 @@ static int qed_ll2_txq_completion(struct qed_hwfn *p_hwfn, void *p_cookie) num_bds_in_packet = p_pkt->bd_used; list_del(&p_pkt->list_entry); - if (num_bds < num_bds_in_packet) { + if (unlikely(num_bds < num_bds_in_packet)) { DP_NOTICE(p_hwfn, "Rest of BDs does not cover whole packet\n"); goto out; @@ -488,7 +488,7 @@ qed_ll2_rxq_handle_completion(struct qed_hwfn *p_hwfn, if (!list_empty(&p_rx->active_descq)) p_pkt = list_first_entry(&p_rx->active_descq, struct qed_ll2_rx_packet, list_entry); - if (!p_pkt) { + if (unlikely(!p_pkt)) { DP_NOTICE(p_hwfn, "[%d] LL2 Rx completion but active_descq is empty\n", p_ll2_conn->input.conn_type); @@ -501,7 +501,7 @@ qed_ll2_rxq_handle_completion(struct qed_hwfn *p_hwfn, qed_ll2_rxq_parse_reg(p_hwfn, p_cqe, &data); else qed_ll2_rxq_parse_gsi(p_hwfn, p_cqe, &data); - if (qed_chain_consume(&p_rx->rxq_chain) != p_pkt->rxq_bd) + if (unlikely(qed_chain_consume(&p_rx->rxq_chain) != p_pkt->rxq_bd)) DP_NOTICE(p_hwfn, "Mismatch between active_descq and the LL2 Rx chain\n"); @@ -671,7 +671,7 @@ static int qed_ll2_lb_rxq_handler(struct qed_hwfn *p_hwfn, &cqe->rx_cqe_sp)) continue; - if (cqe_type != CORE_RX_CQE_TYPE_REGULAR) { + if (unlikely(cqe_type != CORE_RX_CQE_TYPE_REGULAR)) { DP_NOTICE(p_hwfn, "Got a non-regular LB LL2 completion [type 0x%02x]\n", cqe_type); @@ -698,7 +698,7 @@ static int qed_ll2_lb_rxq_handler(struct qed_hwfn *p_hwfn, continue; /* Now process create/add/join isles */ - if (list_empty(&p_rx->active_descq)) { + if (unlikely(list_empty(&p_rx->active_descq))) { DP_NOTICE(p_hwfn, "LL2 OOO RX chain has no submitted buffers\n" ); @@ -708,12 +708,12 @@ static int qed_ll2_lb_rxq_handler(struct qed_hwfn *p_hwfn, p_pkt = list_first_entry(&p_rx->active_descq, struct qed_ll2_rx_packet, list_entry); - if ((iscsi_ooo->ooo_opcode == TCP_EVENT_ADD_NEW_ISLE) || - (iscsi_ooo->ooo_opcode == TCP_EVENT_ADD_ISLE_RIGHT) || - (iscsi_ooo->ooo_opcode == TCP_EVENT_ADD_ISLE_LEFT) || - (iscsi_ooo->ooo_opcode == TCP_EVENT_ADD_PEN) || - (iscsi_ooo->ooo_opcode == TCP_EVENT_JOIN)) { - if (!p_pkt) { + if (likely(iscsi_ooo->ooo_opcode == TCP_EVENT_ADD_NEW_ISLE || + iscsi_ooo->ooo_opcode == TCP_EVENT_ADD_ISLE_RIGHT || + iscsi_ooo->ooo_opcode == TCP_EVENT_ADD_ISLE_LEFT || + iscsi_ooo->ooo_opcode == TCP_EVENT_ADD_PEN || + iscsi_ooo->ooo_opcode == TCP_EVENT_JOIN)) { + if (unlikely(!p_pkt)) { DP_NOTICE(p_hwfn, "LL2 OOO RX packet is not valid\n"); return -EIO; @@ -885,16 +885,16 @@ static int qed_ll2_lb_txq_completion(struct qed_hwfn *p_hwfn, void *p_cookie) u16 new_idx = 0, num_bds = 0; int rc; - if (!p_ll2_conn) + if (unlikely(!p_ll2_conn)) return 0; - if (!QED_LL2_TX_REGISTERED(p_ll2_conn)) + if (unlikely(!QED_LL2_TX_REGISTERED(p_ll2_conn))) return 0; new_idx = le16_to_cpu(*p_tx->p_fw_cons); num_bds = ((s16)new_idx - (s16)p_tx->bds_idx); - if (!num_bds) + if (unlikely(!num_bds)) return 0; while (num_bds) { @@ -903,10 +903,10 @@ static int qed_ll2_lb_txq_completion(struct qed_hwfn *p_hwfn, void *p_cookie) p_pkt = list_first_entry(&p_tx->active_descq, struct qed_ll2_tx_packet, list_entry); - if (!p_pkt) + if (unlikely(!p_pkt)) return -EINVAL; - if (p_pkt->bd_used != 1) { + if (unlikely(p_pkt->bd_used != 1)) { DP_NOTICE(p_hwfn, "Unexpectedly many BDs(%d) in TX OOO completion\n", p_pkt->bd_used); @@ -1034,7 +1034,7 @@ static int qed_sp_ll2_tx_queue_start(struct qed_hwfn *p_hwfn, if (!QED_LL2_TX_REGISTERED(p_ll2_conn)) return 0; - if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_OOO) + if (likely(p_ll2_conn->input.conn_type == QED_LL2_TYPE_OOO)) p_ll2_conn->tx_stats_en = 0; else p_ll2_conn->tx_stats_en = 1; @@ -1885,8 +1885,8 @@ qed_ll2_prepare_tx_packet_set_bd(struct qed_hwfn *p_hwfn, } start_bd = (struct core_tx_bd *)qed_chain_produce(p_tx_chain); - if (QED_IS_IWARP_PERSONALITY(p_hwfn) && - p_ll2->input.conn_type == QED_LL2_TYPE_OOO) { + if (likely(QED_IS_IWARP_PERSONALITY(p_hwfn) && + p_ll2->input.conn_type == QED_LL2_TYPE_OOO)) { start_bd->nw_vlan_or_lb_echo = cpu_to_le16(IWARP_LL2_IN_ORDER_TX_QUEUE); } else { @@ -2007,28 +2007,29 @@ int qed_ll2_prepare_tx_packet(void *cxt, int rc = 0; p_ll2_conn = qed_ll2_handle_sanity(p_hwfn, connection_handle); - if (!p_ll2_conn) + if (unlikely(!p_ll2_conn)) return -EINVAL; p_tx = &p_ll2_conn->tx_queue; p_tx_chain = &p_tx->txq_chain; - if (pkt->num_of_bds > p_ll2_conn->input.tx_max_bds_per_packet) + if (unlikely(pkt->num_of_bds > p_ll2_conn->input.tx_max_bds_per_packet)) return -EIO; spin_lock_irqsave(&p_tx->lock, flags); - if (p_tx->cur_send_packet) { + if (unlikely(p_tx->cur_send_packet)) { rc = -EEXIST; goto out; } /* Get entry, but only if we have tx elements for it */ - if (!list_empty(&p_tx->free_descq)) + if (unlikely(!list_empty(&p_tx->free_descq))) p_curp = list_first_entry(&p_tx->free_descq, struct qed_ll2_tx_packet, list_entry); - if (p_curp && qed_chain_get_elem_left(p_tx_chain) < pkt->num_of_bds) + if (unlikely(p_curp && + qed_chain_get_elem_left(p_tx_chain) < pkt->num_of_bds)) p_curp = NULL; - if (!p_curp) { + if (unlikely(!p_curp)) { rc = -EBUSY; goto out; } @@ -2057,16 +2058,16 @@ int qed_ll2_set_fragment_of_tx_packet(void *cxt, unsigned long flags; p_ll2_conn = qed_ll2_handle_sanity(p_hwfn, connection_handle); - if (!p_ll2_conn) + if (unlikely(!p_ll2_conn)) return -EINVAL; - if (!p_ll2_conn->tx_queue.cur_send_packet) + if (unlikely(!p_ll2_conn->tx_queue.cur_send_packet)) return -EINVAL; p_cur_send_packet = p_ll2_conn->tx_queue.cur_send_packet; cur_send_frag_num = p_ll2_conn->tx_queue.cur_send_frag_num; - if (cur_send_frag_num >= p_cur_send_packet->bd_used) + if (unlikely(cur_send_frag_num >= p_cur_send_packet->bd_used)) return -EINVAL; /* Fill the BD information, and possibly notify FW */ @@ -2693,7 +2694,7 @@ static int qed_ll2_start_xmit(struct qed_dev *cdev, struct sk_buff *skb, */ nr_frags = skb_shinfo(skb)->nr_frags; - if (1 + nr_frags > CORE_LL2_TX_MAX_BDS_PER_PACKET) { + if (unlikely(1 + nr_frags > CORE_LL2_TX_MAX_BDS_PER_PACKET)) { DP_ERR(cdev, "Cannot transmit a packet with %d fragments\n", 1 + nr_frags); return -EINVAL; @@ -2735,7 +2736,7 @@ static int qed_ll2_start_xmit(struct qed_dev *cdev, struct sk_buff *skb, */ rc = qed_ll2_prepare_tx_packet(p_hwfn, cdev->ll2->handle, &pkt, 1); - if (rc) + if (unlikely(rc)) goto err; for (i = 0; i < nr_frags; i++) { @@ -2759,7 +2760,7 @@ static int qed_ll2_start_xmit(struct qed_dev *cdev, struct sk_buff *skb, /* if failed not much to do here, partial packet has been posted * we can't free memory, will need to wait for completion */ - if (rc) + if (unlikely(rc)) goto err2; } diff --git a/drivers/net/ethernet/qlogic/qed/qed_ooo.c b/drivers/net/ethernet/qlogic/qed/qed_ooo.c index b8c5641b29a8..5d725f59db24 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ooo.c +++ b/drivers/net/ethernet/qlogic/qed/qed_ooo.c @@ -26,12 +26,12 @@ static struct qed_ooo_archipelago u32 idx = (cid & 0xffff) - p_ooo_info->cid_base; struct qed_ooo_archipelago *p_archipelago; - if (idx >= p_ooo_info->max_num_archipelagos) + if (unlikely(idx >= p_ooo_info->max_num_archipelagos)) return NULL; p_archipelago = &p_ooo_info->p_archipelagos_mem[idx]; - if (list_empty(&p_archipelago->isles_list)) + if (unlikely(list_empty(&p_archipelago->isles_list))) return NULL; return p_archipelago; @@ -46,7 +46,7 @@ static struct qed_ooo_isle *qed_ooo_seek_isle(struct qed_hwfn *p_hwfn, u8 the_num_of_isle = 1; p_archipelago = qed_ooo_seek_archipelago(p_hwfn, p_ooo_info, cid); - if (!p_archipelago) { + if (unlikely(!p_archipelago)) { DP_NOTICE(p_hwfn, "Connection %d is not found in OOO list\n", cid); return NULL; @@ -362,7 +362,7 @@ void qed_ooo_add_new_isle(struct qed_hwfn *p_hwfn, if (ooo_isle > 1) { p_prev_isle = qed_ooo_seek_isle(p_hwfn, p_ooo_info, cid, ooo_isle - 1); - if (!p_prev_isle) { + if (unlikely(!p_prev_isle)) { DP_NOTICE(p_hwfn, "Isle %d is not found(cid %d)\n", ooo_isle - 1, cid); @@ -370,7 +370,7 @@ void qed_ooo_add_new_isle(struct qed_hwfn *p_hwfn, } } p_archipelago = qed_ooo_seek_archipelago(p_hwfn, p_ooo_info, cid); - if (!p_archipelago && (ooo_isle != 1)) { + if (unlikely(!p_archipelago && ooo_isle != 1)) { DP_NOTICE(p_hwfn, "Connection %d is not found in OOO list\n", cid); return; @@ -381,7 +381,7 @@ void qed_ooo_add_new_isle(struct qed_hwfn *p_hwfn, struct qed_ooo_isle, list_entry); list_del(&p_isle->list_entry); - if (!list_empty(&p_isle->buffers_list)) { + if (unlikely(!list_empty(&p_isle->buffers_list))) { DP_NOTICE(p_hwfn, "Free isle is not empty\n"); INIT_LIST_HEAD(&p_isle->buffers_list); } @@ -418,13 +418,13 @@ void qed_ooo_add_new_buffer(struct qed_hwfn *p_hwfn, struct qed_ooo_isle *p_isle = NULL; p_isle = qed_ooo_seek_isle(p_hwfn, p_ooo_info, cid, ooo_isle); - if (!p_isle) { + if (unlikely(!p_isle)) { DP_NOTICE(p_hwfn, "Isle %d is not found(cid %d)\n", ooo_isle, cid); return; } - if (buffer_side == QED_OOO_LEFT_BUF) + if (unlikely(buffer_side == QED_OOO_LEFT_BUF)) list_add(&p_buffer->list_entry, &p_isle->buffers_list); else list_add_tail(&p_buffer->list_entry, &p_isle->buffers_list); @@ -438,7 +438,7 @@ void qed_ooo_join_isles(struct qed_hwfn *p_hwfn, p_right_isle = qed_ooo_seek_isle(p_hwfn, p_ooo_info, cid, left_isle + 1); - if (!p_right_isle) { + if (unlikely(!p_right_isle)) { DP_NOTICE(p_hwfn, "Right isle %d is not found(cid %d)\n", left_isle + 1, cid); @@ -450,7 +450,7 @@ void qed_ooo_join_isles(struct qed_hwfn *p_hwfn, if (left_isle) { p_left_isle = qed_ooo_seek_isle(p_hwfn, p_ooo_info, cid, left_isle); - if (!p_left_isle) { + if (unlikely(!p_left_isle)) { DP_NOTICE(p_hwfn, "Left isle %d is not found(cid %d)\n", left_isle, cid); From 939a6567f976efb8b3e6d601ce35eb56b17babd0 Mon Sep 17 00:00:00 2001 From: Shai Malin Date: Fri, 15 Oct 2021 15:41:17 +0300 Subject: [PATCH 309/440] qed: Change the TCP common variable - "iscsi_ooo" Change the TCP common variable - "iscsi_ooo" to "ooo_opq". This variable is common between all the TCP L5 protocols and not specific to iSCSI. Signed-off-by: Ariel Elior Signed-off-by: Shai Malin Link: https://lore.kernel.org/r/20211015124118.29041-2-smalin@marvell.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/qlogic/qed/qed_ll2.c | 50 +++++++++++------------ 1 file changed, 24 insertions(+), 26 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c index 6068b0b94ea3..ed274f033626 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c @@ -623,18 +623,18 @@ static bool qed_ll2_lb_rxq_handler_slowpath(struct qed_hwfn *p_hwfn, struct core_rx_slow_path_cqe *p_cqe) { - struct ooo_opaque *iscsi_ooo; + struct ooo_opaque *ooo_opq; u32 cid; if (p_cqe->ramrod_cmd_id != CORE_RAMROD_RX_QUEUE_FLUSH) return false; - iscsi_ooo = (struct ooo_opaque *)&p_cqe->opaque_data; - if (iscsi_ooo->ooo_opcode != TCP_EVENT_DELETE_ISLES) + ooo_opq = (struct ooo_opaque *)&p_cqe->opaque_data; + if (ooo_opq->ooo_opcode != TCP_EVENT_DELETE_ISLES) return false; /* Need to make a flush */ - cid = le32_to_cpu(iscsi_ooo->cid); + cid = le32_to_cpu(ooo_opq->cid); qed_ooo_release_connection_isles(p_hwfn, p_hwfn->p_ooo_info, cid); return true; @@ -650,7 +650,7 @@ static int qed_ll2_lb_rxq_handler(struct qed_hwfn *p_hwfn, union core_rx_cqe_union *cqe = NULL; u16 cq_new_idx = 0, cq_old_idx = 0; struct qed_ooo_buffer *p_buffer; - struct ooo_opaque *iscsi_ooo; + struct ooo_opaque *ooo_opq; u8 placement_offset = 0; u8 cqe_type; @@ -683,18 +683,17 @@ static int qed_ll2_lb_rxq_handler(struct qed_hwfn *p_hwfn, parse_flags = le16_to_cpu(p_cqe_fp->parse_flags.flags); packet_length = le16_to_cpu(p_cqe_fp->packet_length); vlan = le16_to_cpu(p_cqe_fp->vlan); - iscsi_ooo = (struct ooo_opaque *)&p_cqe_fp->opaque_data; - qed_ooo_save_history_entry(p_hwfn, p_hwfn->p_ooo_info, - iscsi_ooo); - cid = le32_to_cpu(iscsi_ooo->cid); + ooo_opq = (struct ooo_opaque *)&p_cqe_fp->opaque_data; + qed_ooo_save_history_entry(p_hwfn, p_hwfn->p_ooo_info, ooo_opq); + cid = le32_to_cpu(ooo_opq->cid); /* Process delete isle first */ - if (iscsi_ooo->drop_size) + if (ooo_opq->drop_size) qed_ooo_delete_isles(p_hwfn, p_hwfn->p_ooo_info, cid, - iscsi_ooo->drop_isle, - iscsi_ooo->drop_size); + ooo_opq->drop_isle, + ooo_opq->drop_size); - if (iscsi_ooo->ooo_opcode == TCP_EVENT_NOP) + if (ooo_opq->ooo_opcode == TCP_EVENT_NOP) continue; /* Now process create/add/join isles */ @@ -708,11 +707,11 @@ static int qed_ll2_lb_rxq_handler(struct qed_hwfn *p_hwfn, p_pkt = list_first_entry(&p_rx->active_descq, struct qed_ll2_rx_packet, list_entry); - if (likely(iscsi_ooo->ooo_opcode == TCP_EVENT_ADD_NEW_ISLE || - iscsi_ooo->ooo_opcode == TCP_EVENT_ADD_ISLE_RIGHT || - iscsi_ooo->ooo_opcode == TCP_EVENT_ADD_ISLE_LEFT || - iscsi_ooo->ooo_opcode == TCP_EVENT_ADD_PEN || - iscsi_ooo->ooo_opcode == TCP_EVENT_JOIN)) { + if (likely(ooo_opq->ooo_opcode == TCP_EVENT_ADD_NEW_ISLE || + ooo_opq->ooo_opcode == TCP_EVENT_ADD_ISLE_RIGHT || + ooo_opq->ooo_opcode == TCP_EVENT_ADD_ISLE_LEFT || + ooo_opq->ooo_opcode == TCP_EVENT_ADD_PEN || + ooo_opq->ooo_opcode == TCP_EVENT_JOIN)) { if (unlikely(!p_pkt)) { DP_NOTICE(p_hwfn, "LL2 OOO RX packet is not valid\n"); @@ -727,19 +726,19 @@ static int qed_ll2_lb_rxq_handler(struct qed_hwfn *p_hwfn, qed_chain_consume(&p_rx->rxq_chain); list_add_tail(&p_pkt->list_entry, &p_rx->free_descq); - switch (iscsi_ooo->ooo_opcode) { + switch (ooo_opq->ooo_opcode) { case TCP_EVENT_ADD_NEW_ISLE: qed_ooo_add_new_isle(p_hwfn, p_hwfn->p_ooo_info, cid, - iscsi_ooo->ooo_isle, + ooo_opq->ooo_isle, p_buffer); break; case TCP_EVENT_ADD_ISLE_RIGHT: qed_ooo_add_new_buffer(p_hwfn, p_hwfn->p_ooo_info, cid, - iscsi_ooo->ooo_isle, + ooo_opq->ooo_isle, p_buffer, QED_OOO_RIGHT_BUF); break; @@ -747,7 +746,7 @@ static int qed_ll2_lb_rxq_handler(struct qed_hwfn *p_hwfn, qed_ooo_add_new_buffer(p_hwfn, p_hwfn->p_ooo_info, cid, - iscsi_ooo->ooo_isle, + ooo_opq->ooo_isle, p_buffer, QED_OOO_LEFT_BUF); break; @@ -755,13 +754,12 @@ static int qed_ll2_lb_rxq_handler(struct qed_hwfn *p_hwfn, qed_ooo_add_new_buffer(p_hwfn, p_hwfn->p_ooo_info, cid, - iscsi_ooo->ooo_isle + - 1, + ooo_opq->ooo_isle + 1, p_buffer, QED_OOO_LEFT_BUF); qed_ooo_join_isles(p_hwfn, p_hwfn->p_ooo_info, - cid, iscsi_ooo->ooo_isle); + cid, ooo_opq->ooo_isle); break; case TCP_EVENT_ADD_PEN: num_ooo_add_to_peninsula++; @@ -773,7 +771,7 @@ static int qed_ll2_lb_rxq_handler(struct qed_hwfn *p_hwfn, } else { DP_NOTICE(p_hwfn, "Unexpected event (%d) TX OOO completion\n", - iscsi_ooo->ooo_opcode); + ooo_opq->ooo_opcode); } } From 4c71ce50d2fe7f1b142113ffa412c9ed6677d52c Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Sun, 4 Jul 2021 15:16:21 +0300 Subject: [PATCH 310/440] net/mlx5: Support partial TTC rules Add bitmasks to ttc_params to indicate if rule is valid or not. It will allow to create TTC table with support only in part of the traffic types. In later patches which introduce the steering based LAG port selection, TTC will be created with only part of the rules according to the hash type. Signed-off-by: Maor Gottlieb Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c | 4 ++++ drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.h | 2 ++ 2 files changed, 6 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c index 749d17c0057d..b63dec24747a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c @@ -247,6 +247,8 @@ static int mlx5_generate_ttc_table_rules(struct mlx5_core_dev *dev, for (tt = 0; tt < MLX5_NUM_TT; tt++) { struct mlx5_ttc_rule *rule = &rules[tt]; + if (test_bit(tt, params->ignore_dests)) + continue; rule->rule = mlx5_generate_ttc_rule(dev, ft, ¶ms->dests[tt], ttc_rules[tt].etype, ttc_rules[tt].proto); @@ -266,6 +268,8 @@ static int mlx5_generate_ttc_table_rules(struct mlx5_core_dev *dev, if (!mlx5_tunnel_proto_supported_rx(dev, ttc_tunnel_rules[tt].proto)) continue; + if (test_bit(tt, params->ignore_tunnel_dests)) + continue; trules[tt] = mlx5_generate_ttc_rule(dev, ft, ¶ms->tunnel_dests[tt], ttc_tunnel_rules[tt].etype, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.h index ce95be8f8382..85fef0cd1c07 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.h @@ -43,7 +43,9 @@ struct ttc_params { struct mlx5_flow_namespace *ns; struct mlx5_flow_table_attr ft_attr; struct mlx5_flow_destination dests[MLX5_NUM_TT]; + DECLARE_BITMAP(ignore_dests, MLX5_NUM_TT); bool inner_ttc; + DECLARE_BITMAP(ignore_tunnel_dests, MLX5_NUM_TUNNEL_TT); struct mlx5_flow_destination tunnel_dests[MLX5_NUM_TUNNEL_TT]; }; From 425a563acb1d1872c0036fbcb644247640edbbc6 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Sun, 23 May 2021 13:34:28 +0300 Subject: [PATCH 311/440] net/mlx5: Introduce port selection namespace Add new port selection flow steering namespace. Flow steering rules in this namespaceare are used to determine the physical port for egress packets. Signed-off-by: Maor Gottlieb Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/fs_cmd.c | 1 + .../net/ethernet/mellanox/mlx5/core/fs_core.c | 26 +++++++++++++++++++ .../net/ethernet/mellanox/mlx5/core/fs_core.h | 7 +++-- drivers/net/ethernet/mellanox/mlx5/core/fw.c | 6 +++++ .../net/ethernet/mellanox/mlx5/core/main.c | 1 + include/linux/mlx5/device.h | 15 +++++++++++ include/linux/mlx5/fs.h | 1 + include/linux/mlx5/mlx5_ifc.h | 25 ++++++++++++++++-- 8 files changed, 78 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index 7db8df64a60e..caefdb7dfefe 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -969,6 +969,7 @@ const struct mlx5_flow_cmds *mlx5_fs_cmd_get_default(enum fs_flow_table_type typ case FS_FT_NIC_TX: case FS_FT_RDMA_RX: case FS_FT_RDMA_TX: + case FS_FT_PORT_SEL: return mlx5_fs_cmd_get_fw_cmds(); default: return mlx5_fs_cmd_get_stub_cmds(); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index fe501ba88bea..8d8696f7c3f5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -2191,6 +2191,10 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev, if (steering->fdb_root_ns) return &steering->fdb_root_ns->ns; return NULL; + case MLX5_FLOW_NAMESPACE_PORT_SEL: + if (steering->port_sel_root_ns) + return &steering->port_sel_root_ns->ns; + return NULL; case MLX5_FLOW_NAMESPACE_SNIFFER_RX: if (steering->sniffer_rx_root_ns) return &steering->sniffer_rx_root_ns->ns; @@ -2596,6 +2600,7 @@ void mlx5_cleanup_fs(struct mlx5_core_dev *dev) steering->fdb_root_ns = NULL; kfree(steering->fdb_sub_ns); steering->fdb_sub_ns = NULL; + cleanup_root_ns(steering->port_sel_root_ns); cleanup_root_ns(steering->sniffer_rx_root_ns); cleanup_root_ns(steering->sniffer_tx_root_ns); cleanup_root_ns(steering->rdma_rx_root_ns); @@ -2634,6 +2639,21 @@ static int init_sniffer_rx_root_ns(struct mlx5_flow_steering *steering) return PTR_ERR_OR_ZERO(prio); } +#define PORT_SEL_NUM_LEVELS 3 +static int init_port_sel_root_ns(struct mlx5_flow_steering *steering) +{ + struct fs_prio *prio; + + steering->port_sel_root_ns = create_root_ns(steering, FS_FT_PORT_SEL); + if (!steering->port_sel_root_ns) + return -ENOMEM; + + /* Create single prio */ + prio = fs_create_prio(&steering->port_sel_root_ns->ns, 0, + PORT_SEL_NUM_LEVELS); + return PTR_ERR_OR_ZERO(prio); +} + static int init_rdma_rx_root_ns(struct mlx5_flow_steering *steering) { int err; @@ -3020,6 +3040,12 @@ int mlx5_init_fs(struct mlx5_core_dev *dev) goto err; } + if (MLX5_CAP_FLOWTABLE_PORT_SELECTION(dev, ft_support)) { + err = init_port_sel_root_ns(steering); + if (err) + goto err; + } + if (MLX5_CAP_FLOWTABLE_RDMA_RX(dev, ft_support) && MLX5_CAP_FLOWTABLE_RDMA_RX(dev, table_miss_action_domain)) { err = init_rdma_rx_root_ns(steering); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index 98240badc342..79d37530afb3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -97,7 +97,8 @@ enum fs_flow_table_type { FS_FT_SNIFFER_TX = 0X6, FS_FT_RDMA_RX = 0X7, FS_FT_RDMA_TX = 0X8, - FS_FT_MAX_TYPE = FS_FT_RDMA_TX, + FS_FT_PORT_SEL = 0X9, + FS_FT_MAX_TYPE = FS_FT_PORT_SEL, }; enum fs_flow_table_op_mod { @@ -129,6 +130,7 @@ struct mlx5_flow_steering { struct mlx5_flow_root_namespace *rdma_rx_root_ns; struct mlx5_flow_root_namespace *rdma_tx_root_ns; struct mlx5_flow_root_namespace *egress_root_ns; + struct mlx5_flow_root_namespace *port_sel_root_ns; int esw_egress_acl_vports; int esw_ingress_acl_vports; }; @@ -341,7 +343,8 @@ struct mlx5_flow_root_namespace *find_root(struct fs_node *node); (type == FS_FT_SNIFFER_TX) ? MLX5_CAP_FLOWTABLE_SNIFFER_TX(mdev, cap) : \ (type == FS_FT_RDMA_RX) ? MLX5_CAP_FLOWTABLE_RDMA_RX(mdev, cap) : \ (type == FS_FT_RDMA_TX) ? MLX5_CAP_FLOWTABLE_RDMA_TX(mdev, cap) : \ - (BUILD_BUG_ON_ZERO(FS_FT_RDMA_TX != FS_FT_MAX_TYPE))\ + (type == FS_FT_PORT_SEL) ? MLX5_CAP_FLOWTABLE_PORT_SELECTION(mdev, cap) : \ + (BUILD_BUG_ON_ZERO(FS_FT_PORT_SEL != FS_FT_MAX_TYPE))\ ) #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c index f4f8993eac17..1037e3629e7e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -149,6 +149,12 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev) if (err) return err; + if (MLX5_CAP_GEN(dev, port_selection_cap)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_PORT_SELECTION); + if (err) + return err; + } + if (MLX5_CAP_GEN(dev, hca_cap_2)) { err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL_2); if (err) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 47d92fb459ed..f8446395163a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1416,6 +1416,7 @@ static const int types[] = { MLX5_CAP_TLS, MLX5_CAP_VDPA_EMULATION, MLX5_CAP_IPSEC, + MLX5_CAP_PORT_SELECTION, }; static void mlx5_hca_caps_free(struct mlx5_core_dev *dev) diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 109cc8106d16..347167c18802 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1185,6 +1185,7 @@ enum mlx5_cap_type { MLX5_CAP_DEV_EVENT = 0x14, MLX5_CAP_IPSEC, MLX5_CAP_GENERAL_2 = 0x20, + MLX5_CAP_PORT_SELECTION = 0x25, /* NUM OF CAP Types */ MLX5_CAP_NUM }; @@ -1342,6 +1343,20 @@ enum mlx5_qcam_feature_groups { MLX5_GET(e_switch_cap, \ mdev->caps.hca[MLX5_CAP_ESWITCH]->max, cap) +#define MLX5_CAP_PORT_SELECTION(mdev, cap) \ + MLX5_GET(port_selection_cap, \ + mdev->caps.hca[MLX5_CAP_PORT_SELECTION]->cur, cap) + +#define MLX5_CAP_PORT_SELECTION_MAX(mdev, cap) \ + MLX5_GET(port_selection_cap, \ + mdev->caps.hca[MLX5_CAP_PORT_SELECTION]->max, cap) + +#define MLX5_CAP_FLOWTABLE_PORT_SELECTION(mdev, cap) \ + MLX5_CAP_PORT_SELECTION(mdev, flow_table_properties_port_selection.cap) + +#define MLX5_CAP_FLOWTABLE_PORT_SELECTION_MAX(mdev, cap) \ + MLX5_CAP_PORT_SELECTION_MAX(mdev, flow_table_properties_port_selection.cap) + #define MLX5_CAP_ODP(mdev, cap)\ MLX5_GET(odp_cap, mdev->caps.hca[MLX5_CAP_ODP]->cur, cap) diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 0106c67e8ccb..259fcc168340 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -83,6 +83,7 @@ enum mlx5_flow_namespace_type { MLX5_FLOW_NAMESPACE_RDMA_RX, MLX5_FLOW_NAMESPACE_RDMA_RX_KERNEL, MLX5_FLOW_NAMESPACE_RDMA_TX, + MLX5_FLOW_NAMESPACE_PORT_SEL, }; enum { diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index c614ad1da44d..db1d9c69c1fa 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -767,6 +767,18 @@ struct mlx5_ifc_flow_table_nic_cap_bits { u8 reserved_at_20c0[0x5f40]; }; +struct mlx5_ifc_port_selection_cap_bits { + u8 reserved_at_0[0x10]; + u8 port_select_flow_table[0x1]; + u8 reserved_at_11[0xf]; + + u8 reserved_at_20[0x1e0]; + + struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_port_selection; + + u8 reserved_at_400[0x7c00]; +}; + enum { MLX5_FDB_TO_VPORT_REG_C_0 = 0x01, MLX5_FDB_TO_VPORT_REG_C_1 = 0x02, @@ -1515,7 +1527,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 uar_4k[0x1]; u8 reserved_at_241[0x9]; u8 uar_sz[0x6]; - u8 reserved_at_248[0x2]; + u8 port_selection_cap[0x1]; + u8 reserved_at_248[0x1]; u8 umem_uid_0[0x1]; u8 reserved_at_250[0x5]; u8 log_pg_sz[0x8]; @@ -3164,6 +3177,7 @@ union mlx5_ifc_hca_cap_union_bits { struct mlx5_ifc_flow_table_nic_cap_bits flow_table_nic_cap; struct mlx5_ifc_flow_table_eswitch_cap_bits flow_table_eswitch_cap; struct mlx5_ifc_e_switch_cap_bits e_switch_cap; + struct mlx5_ifc_port_selection_cap_bits port_selection_cap; struct mlx5_ifc_vector_calc_cap_bits vector_calc_cap; struct mlx5_ifc_qos_cap_bits qos_cap; struct mlx5_ifc_debug_cap_bits debug_cap; @@ -10434,9 +10448,16 @@ struct mlx5_ifc_dcbx_param_bits { u8 reserved_at_a0[0x160]; }; +enum { + MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY = 0, + MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT, +}; + struct mlx5_ifc_lagc_bits { u8 fdb_selection_mode[0x1]; - u8 reserved_at_1[0x1c]; + u8 reserved_at_1[0x14]; + u8 port_select_mode[0x3]; + u8 reserved_at_18[0x5]; u8 lag_state[0x3]; u8 reserved_at_20[0x14]; From e7e2519e3632396a25031b7e828ed35332e5dd07 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Tue, 6 Jul 2021 17:48:26 +0300 Subject: [PATCH 312/440] net/mlx5: Add support to create match definer Introduce new APIs to create and destroy flow matcher for given format id. Flow match definer object is used for defining the fields and mask used for the hash calculation. User should mask the desired fields like done in the match criteria. This object is assigned to flow group of type hash. In this flow group type, packets lookup is done based on the hash result. This patch also adds the required bits to create such flow group. Signed-off-by: Maor Gottlieb Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/fs_cmd.c | 57 ++++ .../net/ethernet/mellanox/mlx5/core/fs_cmd.h | 4 + .../net/ethernet/mellanox/mlx5/core/fs_core.c | 46 +++ .../net/ethernet/mellanox/mlx5/core/fs_core.h | 5 + .../mellanox/mlx5/core/steering/fs_dr.c | 15 + include/linux/mlx5/fs.h | 8 + include/linux/mlx5/mlx5_ifc.h | 272 ++++++++++++++++-- 7 files changed, 380 insertions(+), 27 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index caefdb7dfefe..2c82dc118460 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -185,6 +185,20 @@ static int mlx5_cmd_set_slave_root_fdb(struct mlx5_core_dev *master, return mlx5_cmd_exec(slave, in, sizeof(in), out, sizeof(out)); } +static int +mlx5_cmd_stub_destroy_match_definer(struct mlx5_flow_root_namespace *ns, + int definer_id) +{ + return 0; +} + +static int +mlx5_cmd_stub_create_match_definer(struct mlx5_flow_root_namespace *ns, + u16 format_id, u32 *match_mask) +{ + return 0; +} + static int mlx5_cmd_update_root_ft(struct mlx5_flow_root_namespace *ns, struct mlx5_flow_table *ft, u32 underlay_qpn, bool disconnect) @@ -909,6 +923,45 @@ static void mlx5_cmd_modify_header_dealloc(struct mlx5_flow_root_namespace *ns, mlx5_cmd_exec_in(dev, dealloc_modify_header_context, in); } +static int mlx5_cmd_destroy_match_definer(struct mlx5_flow_root_namespace *ns, + int definer_id) +{ + u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {}; + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; + + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, + MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, + MLX5_OBJ_TYPE_MATCH_DEFINER); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, definer_id); + + return mlx5_cmd_exec(ns->dev, in, sizeof(in), out, sizeof(out)); +} + +static int mlx5_cmd_create_match_definer(struct mlx5_flow_root_namespace *ns, + u16 format_id, u32 *match_mask) +{ + u32 out[MLX5_ST_SZ_DW(create_match_definer_out)] = {}; + u32 in[MLX5_ST_SZ_DW(create_match_definer_in)] = {}; + struct mlx5_core_dev *dev = ns->dev; + void *ptr; + int err; + + MLX5_SET(create_match_definer_in, in, general_obj_in_cmd_hdr.opcode, + MLX5_CMD_OP_CREATE_GENERAL_OBJECT); + MLX5_SET(create_match_definer_in, in, general_obj_in_cmd_hdr.obj_type, + MLX5_OBJ_TYPE_MATCH_DEFINER); + + ptr = MLX5_ADDR_OF(create_match_definer_in, in, obj_context); + MLX5_SET(match_definer, ptr, format_id, format_id); + + ptr = MLX5_ADDR_OF(match_definer, ptr, match_mask); + memcpy(ptr, match_mask, MLX5_FLD_SZ_BYTES(match_definer, match_mask)); + + err = mlx5_cmd_exec_inout(dev, create_match_definer, in, out); + return err ? err : MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); +} + static const struct mlx5_flow_cmds mlx5_flow_cmds = { .create_flow_table = mlx5_cmd_create_flow_table, .destroy_flow_table = mlx5_cmd_destroy_flow_table, @@ -923,6 +976,8 @@ static const struct mlx5_flow_cmds mlx5_flow_cmds = { .packet_reformat_dealloc = mlx5_cmd_packet_reformat_dealloc, .modify_header_alloc = mlx5_cmd_modify_header_alloc, .modify_header_dealloc = mlx5_cmd_modify_header_dealloc, + .create_match_definer = mlx5_cmd_create_match_definer, + .destroy_match_definer = mlx5_cmd_destroy_match_definer, .set_peer = mlx5_cmd_stub_set_peer, .create_ns = mlx5_cmd_stub_create_ns, .destroy_ns = mlx5_cmd_stub_destroy_ns, @@ -942,6 +997,8 @@ static const struct mlx5_flow_cmds mlx5_flow_cmd_stubs = { .packet_reformat_dealloc = mlx5_cmd_stub_packet_reformat_dealloc, .modify_header_alloc = mlx5_cmd_stub_modify_header_alloc, .modify_header_dealloc = mlx5_cmd_stub_modify_header_dealloc, + .create_match_definer = mlx5_cmd_stub_create_match_definer, + .destroy_match_definer = mlx5_cmd_stub_destroy_match_definer, .set_peer = mlx5_cmd_stub_set_peer, .create_ns = mlx5_cmd_stub_create_ns, .destroy_ns = mlx5_cmd_stub_destroy_ns, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h index 5ecd33cdc087..220ec632d35a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h @@ -97,6 +97,10 @@ struct mlx5_flow_cmds { int (*create_ns)(struct mlx5_flow_root_namespace *ns); int (*destroy_ns)(struct mlx5_flow_root_namespace *ns); + int (*create_match_definer)(struct mlx5_flow_root_namespace *ns, + u16 format_id, u32 *match_mask); + int (*destroy_match_definer)(struct mlx5_flow_root_namespace *ns, + int definer_id); }; int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u32 *id); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 8d8696f7c3f5..873efde0d458 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -3250,6 +3250,52 @@ void mlx5_packet_reformat_dealloc(struct mlx5_core_dev *dev, } EXPORT_SYMBOL(mlx5_packet_reformat_dealloc); +int mlx5_get_match_definer_id(struct mlx5_flow_definer *definer) +{ + return definer->id; +} + +struct mlx5_flow_definer * +mlx5_create_match_definer(struct mlx5_core_dev *dev, + enum mlx5_flow_namespace_type ns_type, u16 format_id, + u32 *match_mask) +{ + struct mlx5_flow_root_namespace *root; + struct mlx5_flow_definer *definer; + int id; + + root = get_root_namespace(dev, ns_type); + if (!root) + return ERR_PTR(-EOPNOTSUPP); + + definer = kzalloc(sizeof(*definer), GFP_KERNEL); + if (!definer) + return ERR_PTR(-ENOMEM); + + definer->ns_type = ns_type; + id = root->cmds->create_match_definer(root, format_id, match_mask); + if (id < 0) { + mlx5_core_warn(root->dev, "Failed to create match definer (%d)\n", id); + kfree(definer); + return ERR_PTR(id); + } + definer->id = id; + return definer; +} + +void mlx5_destroy_match_definer(struct mlx5_core_dev *dev, + struct mlx5_flow_definer *definer) +{ + struct mlx5_flow_root_namespace *root; + + root = get_root_namespace(dev, definer->ns_type); + if (WARN_ON(!root)) + return; + + root->cmds->destroy_match_definer(root, definer->id); + kfree(definer); +} + int mlx5_flow_namespace_set_peer(struct mlx5_flow_root_namespace *ns, struct mlx5_flow_root_namespace *peer_ns) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index 79d37530afb3..7711db245c63 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -49,6 +49,11 @@ #define FDB_TC_MAX_PRIO 16 #define FDB_TC_LEVELS_PER_PRIO 2 +struct mlx5_flow_definer { + enum mlx5_flow_namespace_type ns_type; + u32 id; +}; + struct mlx5_modify_hdr { enum mlx5_flow_namespace_type ns_type; union { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c index 230e920e3845..2632d5ae9bc0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c @@ -625,6 +625,19 @@ static void mlx5_cmd_dr_modify_header_dealloc(struct mlx5_flow_root_namespace *n mlx5dr_action_destroy(modify_hdr->action.dr_action); } +static int +mlx5_cmd_dr_destroy_match_definer(struct mlx5_flow_root_namespace *ns, + int definer_id) +{ + return -EOPNOTSUPP; +} + +static int mlx5_cmd_dr_create_match_definer(struct mlx5_flow_root_namespace *ns, + u16 format_id, u32 *match_mask) +{ + return -EOPNOTSUPP; +} + static int mlx5_cmd_dr_delete_fte(struct mlx5_flow_root_namespace *ns, struct mlx5_flow_table *ft, struct fs_fte *fte) @@ -727,6 +740,8 @@ static const struct mlx5_flow_cmds mlx5_flow_cmds_dr = { .packet_reformat_dealloc = mlx5_cmd_dr_packet_reformat_dealloc, .modify_header_alloc = mlx5_cmd_dr_modify_header_alloc, .modify_header_dealloc = mlx5_cmd_dr_modify_header_dealloc, + .create_match_definer = mlx5_cmd_dr_create_match_definer, + .destroy_match_definer = mlx5_cmd_dr_destroy_match_definer, .set_peer = mlx5_cmd_dr_set_peer, .create_ns = mlx5_cmd_dr_create_ns, .destroy_ns = mlx5_cmd_dr_destroy_ns, diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 259fcc168340..7a43fec63a35 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -98,6 +98,7 @@ enum { struct mlx5_pkt_reformat; struct mlx5_modify_hdr; +struct mlx5_flow_definer; struct mlx5_flow_table; struct mlx5_flow_group; struct mlx5_flow_namespace; @@ -258,6 +259,13 @@ struct mlx5_modify_hdr *mlx5_modify_header_alloc(struct mlx5_core_dev *dev, void *modify_actions); void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev, struct mlx5_modify_hdr *modify_hdr); +struct mlx5_flow_definer * +mlx5_create_match_definer(struct mlx5_core_dev *dev, + enum mlx5_flow_namespace_type ns_type, u16 format_id, + u32 *match_mask); +void mlx5_destroy_match_definer(struct mlx5_core_dev *dev, + struct mlx5_flow_definer *definer); +int mlx5_get_match_definer_id(struct mlx5_flow_definer *definer); struct mlx5_pkt_reformat_params { int type; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index db1d9c69c1fa..8f41145bc6ef 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -94,6 +94,7 @@ enum { enum { MLX5_OBJ_TYPE_GENEVE_TLV_OPT = 0x000b, MLX5_OBJ_TYPE_VIRTIO_NET_Q = 0x000d, + MLX5_OBJ_TYPE_MATCH_DEFINER = 0x0018, MLX5_OBJ_TYPE_MKEY = 0xff01, MLX5_OBJ_TYPE_QP = 0xff02, MLX5_OBJ_TYPE_PSV = 0xff03, @@ -1731,7 +1732,7 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 flex_parser_id_outer_first_mpls_over_gre[0x4]; u8 flex_parser_id_outer_first_mpls_over_udp_label[0x4]; - u8 reserved_at_6e0[0x10]; + u8 max_num_match_definer[0x10]; u8 sf_base_id[0x10]; u8 flex_parser_id_gtpu_dw_2[0x4]; @@ -1746,7 +1747,7 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_760[0x20]; u8 vhca_tunnel_commands[0x40]; - u8 reserved_at_7c0[0x40]; + u8 match_definer_format_supported[0x40]; }; struct mlx5_ifc_cmd_hca_cap_2_bits { @@ -5666,6 +5667,236 @@ struct mlx5_ifc_query_fte_in_bits { u8 reserved_at_120[0xe0]; }; +struct mlx5_ifc_match_definer_format_0_bits { + u8 reserved_at_0[0x100]; + + u8 metadata_reg_c_0[0x20]; + + u8 metadata_reg_c_1[0x20]; + + u8 outer_dmac_47_16[0x20]; + + u8 outer_dmac_15_0[0x10]; + u8 outer_ethertype[0x10]; + + u8 reserved_at_180[0x1]; + u8 sx_sniffer[0x1]; + u8 functional_lb[0x1]; + u8 outer_ip_frag[0x1]; + u8 outer_qp_type[0x2]; + u8 outer_encap_type[0x2]; + u8 port_number[0x2]; + u8 outer_l3_type[0x2]; + u8 outer_l4_type[0x2]; + u8 outer_first_vlan_type[0x2]; + u8 outer_first_vlan_prio[0x3]; + u8 outer_first_vlan_cfi[0x1]; + u8 outer_first_vlan_vid[0xc]; + + u8 outer_l4_type_ext[0x4]; + u8 reserved_at_1a4[0x2]; + u8 outer_ipsec_layer[0x2]; + u8 outer_l2_type[0x2]; + u8 force_lb[0x1]; + u8 outer_l2_ok[0x1]; + u8 outer_l3_ok[0x1]; + u8 outer_l4_ok[0x1]; + u8 outer_second_vlan_type[0x2]; + u8 outer_second_vlan_prio[0x3]; + u8 outer_second_vlan_cfi[0x1]; + u8 outer_second_vlan_vid[0xc]; + + u8 outer_smac_47_16[0x20]; + + u8 outer_smac_15_0[0x10]; + u8 inner_ipv4_checksum_ok[0x1]; + u8 inner_l4_checksum_ok[0x1]; + u8 outer_ipv4_checksum_ok[0x1]; + u8 outer_l4_checksum_ok[0x1]; + u8 inner_l3_ok[0x1]; + u8 inner_l4_ok[0x1]; + u8 outer_l3_ok_duplicate[0x1]; + u8 outer_l4_ok_duplicate[0x1]; + u8 outer_tcp_cwr[0x1]; + u8 outer_tcp_ece[0x1]; + u8 outer_tcp_urg[0x1]; + u8 outer_tcp_ack[0x1]; + u8 outer_tcp_psh[0x1]; + u8 outer_tcp_rst[0x1]; + u8 outer_tcp_syn[0x1]; + u8 outer_tcp_fin[0x1]; +}; + +struct mlx5_ifc_match_definer_format_22_bits { + u8 reserved_at_0[0x100]; + + u8 outer_ip_src_addr[0x20]; + + u8 outer_ip_dest_addr[0x20]; + + u8 outer_l4_sport[0x10]; + u8 outer_l4_dport[0x10]; + + u8 reserved_at_160[0x1]; + u8 sx_sniffer[0x1]; + u8 functional_lb[0x1]; + u8 outer_ip_frag[0x1]; + u8 outer_qp_type[0x2]; + u8 outer_encap_type[0x2]; + u8 port_number[0x2]; + u8 outer_l3_type[0x2]; + u8 outer_l4_type[0x2]; + u8 outer_first_vlan_type[0x2]; + u8 outer_first_vlan_prio[0x3]; + u8 outer_first_vlan_cfi[0x1]; + u8 outer_first_vlan_vid[0xc]; + + u8 metadata_reg_c_0[0x20]; + + u8 outer_dmac_47_16[0x20]; + + u8 outer_smac_47_16[0x20]; + + u8 outer_smac_15_0[0x10]; + u8 outer_dmac_15_0[0x10]; +}; + +struct mlx5_ifc_match_definer_format_23_bits { + u8 reserved_at_0[0x100]; + + u8 inner_ip_src_addr[0x20]; + + u8 inner_ip_dest_addr[0x20]; + + u8 inner_l4_sport[0x10]; + u8 inner_l4_dport[0x10]; + + u8 reserved_at_160[0x1]; + u8 sx_sniffer[0x1]; + u8 functional_lb[0x1]; + u8 inner_ip_frag[0x1]; + u8 inner_qp_type[0x2]; + u8 inner_encap_type[0x2]; + u8 port_number[0x2]; + u8 inner_l3_type[0x2]; + u8 inner_l4_type[0x2]; + u8 inner_first_vlan_type[0x2]; + u8 inner_first_vlan_prio[0x3]; + u8 inner_first_vlan_cfi[0x1]; + u8 inner_first_vlan_vid[0xc]; + + u8 tunnel_header_0[0x20]; + + u8 inner_dmac_47_16[0x20]; + + u8 inner_smac_47_16[0x20]; + + u8 inner_smac_15_0[0x10]; + u8 inner_dmac_15_0[0x10]; +}; + +struct mlx5_ifc_match_definer_format_29_bits { + u8 reserved_at_0[0xc0]; + + u8 outer_ip_dest_addr[0x80]; + + u8 outer_ip_src_addr[0x80]; + + u8 outer_l4_sport[0x10]; + u8 outer_l4_dport[0x10]; + + u8 reserved_at_1e0[0x20]; +}; + +struct mlx5_ifc_match_definer_format_30_bits { + u8 reserved_at_0[0xa0]; + + u8 outer_ip_dest_addr[0x80]; + + u8 outer_ip_src_addr[0x80]; + + u8 outer_dmac_47_16[0x20]; + + u8 outer_smac_47_16[0x20]; + + u8 outer_smac_15_0[0x10]; + u8 outer_dmac_15_0[0x10]; +}; + +struct mlx5_ifc_match_definer_format_31_bits { + u8 reserved_at_0[0xc0]; + + u8 inner_ip_dest_addr[0x80]; + + u8 inner_ip_src_addr[0x80]; + + u8 inner_l4_sport[0x10]; + u8 inner_l4_dport[0x10]; + + u8 reserved_at_1e0[0x20]; +}; + +struct mlx5_ifc_match_definer_format_32_bits { + u8 reserved_at_0[0xa0]; + + u8 inner_ip_dest_addr[0x80]; + + u8 inner_ip_src_addr[0x80]; + + u8 inner_dmac_47_16[0x20]; + + u8 inner_smac_47_16[0x20]; + + u8 inner_smac_15_0[0x10]; + u8 inner_dmac_15_0[0x10]; +}; + +struct mlx5_ifc_match_definer_bits { + u8 modify_field_select[0x40]; + + u8 reserved_at_40[0x40]; + + u8 reserved_at_80[0x10]; + u8 format_id[0x10]; + + u8 reserved_at_a0[0x160]; + + u8 match_mask[16][0x20]; +}; + +struct mlx5_ifc_general_obj_in_cmd_hdr_bits { + u8 opcode[0x10]; + u8 uid[0x10]; + + u8 vhca_tunnel_id[0x10]; + u8 obj_type[0x10]; + + u8 obj_id[0x20]; + + u8 reserved_at_60[0x20]; +}; + +struct mlx5_ifc_general_obj_out_cmd_hdr_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 obj_id[0x20]; + + u8 reserved_at_60[0x20]; +}; + +struct mlx5_ifc_create_match_definer_in_bits { + struct mlx5_ifc_general_obj_in_cmd_hdr_bits general_obj_in_cmd_hdr; + + struct mlx5_ifc_match_definer_bits obj_context; +}; + +struct mlx5_ifc_create_match_definer_out_bits { + struct mlx5_ifc_general_obj_out_cmd_hdr_bits general_obj_out_cmd_hdr; +}; + enum { MLX5_QUERY_FLOW_GROUP_OUT_MATCH_CRITERIA_ENABLE_OUTER_HEADERS = 0x0, MLX5_QUERY_FLOW_GROUP_OUT_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS = 0x1, @@ -8139,6 +8370,11 @@ struct mlx5_ifc_create_flow_group_out_bits { u8 reserved_at_60[0x20]; }; +enum { + MLX5_CREATE_FLOW_GROUP_IN_GROUP_TYPE_TCAM_SUBTABLE = 0x0, + MLX5_CREATE_FLOW_GROUP_IN_GROUP_TYPE_HASH_SPLIT = 0x1, +}; + enum { MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS = 0x0, MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS = 0x1, @@ -8160,7 +8396,9 @@ struct mlx5_ifc_create_flow_group_in_bits { u8 reserved_at_60[0x20]; u8 table_type[0x8]; - u8 reserved_at_88[0x18]; + u8 reserved_at_88[0x4]; + u8 group_type[0x4]; + u8 reserved_at_90[0x10]; u8 reserved_at_a0[0x8]; u8 table_id[0x18]; @@ -8175,7 +8413,10 @@ struct mlx5_ifc_create_flow_group_in_bits { u8 end_flow_index[0x20]; - u8 reserved_at_140[0xa0]; + u8 reserved_at_140[0x10]; + u8 match_definer_id[0x10]; + + u8 reserved_at_160[0x80]; u8 reserved_at_1e0[0x18]; u8 match_criteria_enable[0x8]; @@ -10671,29 +10912,6 @@ struct mlx5_ifc_dealloc_memic_out_bits { u8 reserved_at_40[0x40]; }; -struct mlx5_ifc_general_obj_in_cmd_hdr_bits { - u8 opcode[0x10]; - u8 uid[0x10]; - - u8 vhca_tunnel_id[0x10]; - u8 obj_type[0x10]; - - u8 obj_id[0x20]; - - u8 reserved_at_60[0x20]; -}; - -struct mlx5_ifc_general_obj_out_cmd_hdr_bits { - u8 status[0x8]; - u8 reserved_at_8[0x18]; - - u8 syndrome[0x20]; - - u8 obj_id[0x20]; - - u8 reserved_at_60[0x20]; -}; - struct mlx5_ifc_umem_bits { u8 reserved_at_0[0x80]; From 58a606dba708f114e7f412b5c3024027e8a73e34 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Tue, 3 Aug 2021 10:04:41 +0300 Subject: [PATCH 313/440] net/mlx5: Introduce new uplink destination type The uplink destination type should be used in rules to steer the packet to the uplink when the device is in steering based LAG mode. Signed-off-by: Maor Gottlieb Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c | 3 +++ drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c | 8 +++++++- include/linux/mlx5/mlx5_ifc.h | 1 + 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c index 87d65f6b5310..7841ef6c193c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c @@ -235,6 +235,9 @@ const char *parse_fs_dst(struct trace_seq *p, const char *ret = trace_seq_buffer_ptr(p); switch (dst->type) { + case MLX5_FLOW_DESTINATION_TYPE_UPLINK: + trace_seq_printf(p, "uplink\n"); + break; case MLX5_FLOW_DESTINATION_TYPE_VPORT: trace_seq_printf(p, "vport=%u\n", dst->vport.num); break; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index 2c82dc118460..750b21124a1a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -577,8 +577,8 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE: id = dst->dest_attr.ft->id; break; + case MLX5_FLOW_DESTINATION_TYPE_UPLINK: case MLX5_FLOW_DESTINATION_TYPE_VPORT: - id = dst->dest_attr.vport.num; MLX5_SET(dest_format_struct, in_dests, destination_eswitch_owner_vhca_id_valid, !!(dst->dest_attr.vport.flags & @@ -586,6 +586,12 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, MLX5_SET(dest_format_struct, in_dests, destination_eswitch_owner_vhca_id, dst->dest_attr.vport.vhca_id); + if (type == MLX5_FLOW_DESTINATION_TYPE_UPLINK) { + /* destination_id is reserved */ + id = 0; + break; + } + id = dst->dest_attr.vport.num; if (extended_dest && dst->dest_attr.vport.pkt_reformat) { MLX5_SET(dest_format_struct, in_dests, diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 8f41145bc6ef..09e43019d877 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1766,6 +1766,7 @@ enum mlx5_flow_destination_type { MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE = 0x1, MLX5_FLOW_DESTINATION_TYPE_TIR = 0x2, MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER = 0x6, + MLX5_FLOW_DESTINATION_TYPE_UPLINK = 0x8, MLX5_FLOW_DESTINATION_TYPE_PORT = 0x99, MLX5_FLOW_DESTINATION_TYPE_COUNTER = 0x100, From 3d677735d3b7f00f42cfd57a51d3d2109cf65f87 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Mon, 5 Jul 2021 15:34:00 +0300 Subject: [PATCH 314/440] net/mlx5: Lag, move lag files into directory Downstream patches add another lag related file so it makes sense to have all the lag files in a dedicated directory. Signed-off-by: Maor Gottlieb Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/Makefile | 4 ++-- drivers/net/ethernet/mellanox/mlx5/core/{ => lag}/lag.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/{ => lag}/lag.h | 2 +- .../net/ethernet/mellanox/mlx5/core/{lag_mp.c => lag/mp.c} | 4 ++-- .../net/ethernet/mellanox/mlx5/core/{lag_mp.h => lag/mp.h} | 0 5 files changed, 6 insertions(+), 6 deletions(-) rename drivers/net/ethernet/mellanox/mlx5/core/{ => lag}/lag.c (99%) rename drivers/net/ethernet/mellanox/mlx5/core/{ => lag}/lag.h (98%) rename drivers/net/ethernet/mellanox/mlx5/core/{lag_mp.c => lag/mp.c} (99%) rename drivers/net/ethernet/mellanox/mlx5/core/{lag_mp.h => lag/mp.h} (100%) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index a151575be51f..fb123e26927d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -14,7 +14,7 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ health.o mcg.o cq.o alloc.o port.o mr.o pd.o \ transobj.o vport.o sriov.o fs_cmd.o fs_core.o pci_irq.o \ - fs_counters.o fs_ft_pool.o rl.o lag.o dev.o events.o wq.o lib/gid.o \ + fs_counters.o fs_ft_pool.o rl.o lag/lag.o dev.o events.o wq.o lib/gid.o \ lib/devcom.o lib/pci_vsc.o lib/dm.o lib/fs_ttc.o diag/fs_tracepoint.o \ diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o \ fw_reset.o qos.o lib/tout.o @@ -37,7 +37,7 @@ mlx5_core-$(CONFIG_MLX5_EN_ARFS) += en_arfs.o mlx5_core-$(CONFIG_MLX5_EN_RXNFC) += en_fs_ethtool.o mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o en/port_buffer.o mlx5_core-$(CONFIG_PCI_HYPERV_INTERFACE) += en/hv_vhca_stats.o -mlx5_core-$(CONFIG_MLX5_ESWITCH) += lag_mp.o lib/geneve.o lib/port_tun.o \ +mlx5_core-$(CONFIG_MLX5_ESWITCH) += lag/mp.o lib/geneve.o lib/port_tun.o \ en_rep.o en/rep/bond.o en/mod_hdr.o \ en/mapping.o mlx5_core-$(CONFIG_MLX5_CLS_ACT) += en_tc.o en/rep/tc.o en/rep/neigh.o \ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c similarity index 99% rename from drivers/net/ethernet/mellanox/mlx5/core/lag.c rename to drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c index f35c8ba48aac..b37724fc5387 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c @@ -38,7 +38,7 @@ #include "mlx5_core.h" #include "eswitch.h" #include "lag.h" -#include "lag_mp.h" +#include "mp.h" /* General purpose, use for short periods of time. * Beware of lock dependencies (preferably, no locks should be acquired diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h similarity index 98% rename from drivers/net/ethernet/mellanox/mlx5/core/lag.h rename to drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h index d4bae528954e..c268663c89b4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h @@ -5,7 +5,7 @@ #define __MLX5_LAG_H__ #include "mlx5_core.h" -#include "lag_mp.h" +#include "mp.h" enum { MLX5_LAG_P1, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c similarity index 99% rename from drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c rename to drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c index f239b352a58a..810a15b83b9f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c @@ -3,8 +3,8 @@ #include #include -#include "lag.h" -#include "lag_mp.h" +#include "lag/lag.h" +#include "lag/mp.h" #include "mlx5_core.h" #include "eswitch.h" #include "lib/mlx5.h" diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.h similarity index 100% rename from drivers/net/ethernet/mellanox/mlx5/core/lag_mp.h rename to drivers/net/ethernet/mellanox/mlx5/core/lag/mp.h From 1065e0015dd7d83b3d03a9396ca37d3cca9a5ce5 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Tue, 13 Jul 2021 15:20:10 +0300 Subject: [PATCH 315/440] net/mlx5: Lag, set LAG traffic type mapping Generate a traffic type bitmap that will define which steering objects we need to create for the steering based LAG. Bits in this bitmap are set according to the LAG hash type. In addition, have a field that indicate if the lag is in encap mode or not. Signed-off-by: Maor Gottlieb Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/lag/lag.h | 2 + .../mellanox/mlx5/core/lag/port_sel.c | 37 +++++++++++++++++++ .../mellanox/mlx5/core/lag/port_sel.h | 14 +++++++ 3 files changed, 53 insertions(+) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.h diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h index c268663c89b4..670061e60d89 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h @@ -6,6 +6,7 @@ #include "mlx5_core.h" #include "mp.h" +#include "port_sel.h" enum { MLX5_LAG_P1, @@ -49,6 +50,7 @@ struct mlx5_lag { struct delayed_work bond_work; struct notifier_block nb; struct lag_mp lag_mp; + struct mlx5_lag_port_sel port_sel; }; static inline struct mlx5_lag * diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c new file mode 100644 index 000000000000..7b4ad49c8438 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. */ + +#include +#include "lag.h" + +static void set_tt_map(struct mlx5_lag_port_sel *port_sel, + enum netdev_lag_hash hash) +{ + port_sel->tunnel = false; + + switch (hash) { + case NETDEV_LAG_HASH_E34: + port_sel->tunnel = true; + fallthrough; + case NETDEV_LAG_HASH_L34: + set_bit(MLX5_TT_IPV4_TCP, port_sel->tt_map); + set_bit(MLX5_TT_IPV4_UDP, port_sel->tt_map); + set_bit(MLX5_TT_IPV6_TCP, port_sel->tt_map); + set_bit(MLX5_TT_IPV6_UDP, port_sel->tt_map); + set_bit(MLX5_TT_IPV4, port_sel->tt_map); + set_bit(MLX5_TT_IPV6, port_sel->tt_map); + set_bit(MLX5_TT_ANY, port_sel->tt_map); + break; + case NETDEV_LAG_HASH_E23: + port_sel->tunnel = true; + fallthrough; + case NETDEV_LAG_HASH_L23: + set_bit(MLX5_TT_IPV4, port_sel->tt_map); + set_bit(MLX5_TT_IPV6, port_sel->tt_map); + set_bit(MLX5_TT_ANY, port_sel->tt_map); + break; + default: + set_bit(MLX5_TT_ANY, port_sel->tt_map); + break; + } +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.h new file mode 100644 index 000000000000..c55736d2484d --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. */ + +#ifndef __MLX5_LAG_FS_H__ +#define __MLX5_LAG_FS_H__ + +#include "lib/fs_ttc.h" + +struct mlx5_lag_port_sel { + DECLARE_BITMAP(tt_map, MLX5_NUM_TT); + bool tunnel; +}; + +#endif /* __MLX5_LAG_FS_H__ */ From e465550b38edd71e233520d01b2d5db91d0ae077 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Tue, 13 Jul 2021 15:30:45 +0300 Subject: [PATCH 316/440] net/mlx5: Lag, set match mask according to the traffic type bitmap Set the related bits in the match definer mask according to the TT mapping. This mask will be used to create the match definers. Signed-off-by: Maor Gottlieb Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/lag/port_sel.c | 182 ++++++++++++++++++ 1 file changed, 182 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c index 7b4ad49c8438..6095f1049bdb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c @@ -4,6 +4,188 @@ #include #include "lag.h" +static int mlx5_lag_set_definer_inner(u32 *match_definer_mask, + enum mlx5_traffic_types tt) +{ + int format_id; + u8 *ipv6; + + switch (tt) { + case MLX5_TT_IPV4_UDP: + case MLX5_TT_IPV4_TCP: + format_id = 23; + MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask, + inner_l4_sport); + MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask, + inner_l4_dport); + MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask, + inner_ip_src_addr); + MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask, + inner_ip_dest_addr); + break; + case MLX5_TT_IPV4: + format_id = 23; + MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask, + inner_l3_type); + MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask, + inner_dmac_47_16); + MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask, + inner_dmac_15_0); + MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask, + inner_smac_47_16); + MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask, + inner_smac_15_0); + MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask, + inner_ip_src_addr); + MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask, + inner_ip_dest_addr); + break; + case MLX5_TT_IPV6_TCP: + case MLX5_TT_IPV6_UDP: + format_id = 31; + MLX5_SET_TO_ONES(match_definer_format_31, match_definer_mask, + inner_l4_sport); + MLX5_SET_TO_ONES(match_definer_format_31, match_definer_mask, + inner_l4_dport); + ipv6 = MLX5_ADDR_OF(match_definer_format_31, match_definer_mask, + inner_ip_dest_addr); + memset(ipv6, 0xff, 16); + ipv6 = MLX5_ADDR_OF(match_definer_format_31, match_definer_mask, + inner_ip_src_addr); + memset(ipv6, 0xff, 16); + break; + case MLX5_TT_IPV6: + format_id = 32; + ipv6 = MLX5_ADDR_OF(match_definer_format_32, match_definer_mask, + inner_ip_dest_addr); + memset(ipv6, 0xff, 16); + ipv6 = MLX5_ADDR_OF(match_definer_format_32, match_definer_mask, + inner_ip_src_addr); + memset(ipv6, 0xff, 16); + MLX5_SET_TO_ONES(match_definer_format_32, match_definer_mask, + inner_dmac_47_16); + MLX5_SET_TO_ONES(match_definer_format_32, match_definer_mask, + inner_dmac_15_0); + MLX5_SET_TO_ONES(match_definer_format_32, match_definer_mask, + inner_smac_47_16); + MLX5_SET_TO_ONES(match_definer_format_32, match_definer_mask, + inner_smac_15_0); + break; + default: + format_id = 23; + MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask, + inner_l3_type); + MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask, + inner_dmac_47_16); + MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask, + inner_dmac_15_0); + MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask, + inner_smac_47_16); + MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask, + inner_smac_15_0); + break; + } + + return format_id; +} + +static int mlx5_lag_set_definer(u32 *match_definer_mask, + enum mlx5_traffic_types tt, bool tunnel, + enum netdev_lag_hash hash) +{ + int format_id; + u8 *ipv6; + + if (tunnel) + return mlx5_lag_set_definer_inner(match_definer_mask, tt); + + switch (tt) { + case MLX5_TT_IPV4_UDP: + case MLX5_TT_IPV4_TCP: + format_id = 22; + MLX5_SET_TO_ONES(match_definer_format_22, match_definer_mask, + outer_l4_sport); + MLX5_SET_TO_ONES(match_definer_format_22, match_definer_mask, + outer_l4_dport); + MLX5_SET_TO_ONES(match_definer_format_22, match_definer_mask, + outer_ip_src_addr); + MLX5_SET_TO_ONES(match_definer_format_22, match_definer_mask, + outer_ip_dest_addr); + break; + case MLX5_TT_IPV4: + format_id = 22; + MLX5_SET_TO_ONES(match_definer_format_22, match_definer_mask, + outer_l3_type); + MLX5_SET_TO_ONES(match_definer_format_22, match_definer_mask, + outer_dmac_47_16); + MLX5_SET_TO_ONES(match_definer_format_22, match_definer_mask, + outer_dmac_15_0); + MLX5_SET_TO_ONES(match_definer_format_22, match_definer_mask, + outer_smac_47_16); + MLX5_SET_TO_ONES(match_definer_format_22, match_definer_mask, + outer_smac_15_0); + MLX5_SET_TO_ONES(match_definer_format_22, match_definer_mask, + outer_ip_src_addr); + MLX5_SET_TO_ONES(match_definer_format_22, match_definer_mask, + outer_ip_dest_addr); + break; + case MLX5_TT_IPV6_TCP: + case MLX5_TT_IPV6_UDP: + format_id = 29; + MLX5_SET_TO_ONES(match_definer_format_29, match_definer_mask, + outer_l4_sport); + MLX5_SET_TO_ONES(match_definer_format_29, match_definer_mask, + outer_l4_dport); + ipv6 = MLX5_ADDR_OF(match_definer_format_29, match_definer_mask, + outer_ip_dest_addr); + memset(ipv6, 0xff, 16); + ipv6 = MLX5_ADDR_OF(match_definer_format_29, match_definer_mask, + outer_ip_src_addr); + memset(ipv6, 0xff, 16); + break; + case MLX5_TT_IPV6: + format_id = 30; + ipv6 = MLX5_ADDR_OF(match_definer_format_30, match_definer_mask, + outer_ip_dest_addr); + memset(ipv6, 0xff, 16); + ipv6 = MLX5_ADDR_OF(match_definer_format_30, match_definer_mask, + outer_ip_src_addr); + memset(ipv6, 0xff, 16); + MLX5_SET_TO_ONES(match_definer_format_30, match_definer_mask, + outer_dmac_47_16); + MLX5_SET_TO_ONES(match_definer_format_30, match_definer_mask, + outer_dmac_15_0); + MLX5_SET_TO_ONES(match_definer_format_30, match_definer_mask, + outer_smac_47_16); + MLX5_SET_TO_ONES(match_definer_format_30, match_definer_mask, + outer_smac_15_0); + break; + default: + format_id = 0; + MLX5_SET_TO_ONES(match_definer_format_0, match_definer_mask, + outer_smac_47_16); + MLX5_SET_TO_ONES(match_definer_format_0, match_definer_mask, + outer_smac_15_0); + + if (hash == NETDEV_LAG_HASH_VLAN_SRCMAC) { + MLX5_SET_TO_ONES(match_definer_format_0, + match_definer_mask, + outer_first_vlan_vid); + break; + } + + MLX5_SET_TO_ONES(match_definer_format_0, match_definer_mask, + outer_ethertype); + MLX5_SET_TO_ONES(match_definer_format_0, match_definer_mask, + outer_dmac_47_16); + MLX5_SET_TO_ONES(match_definer_format_0, match_definer_mask, + outer_dmac_15_0); + break; + } + + return format_id; +} + static void set_tt_map(struct mlx5_lag_port_sel *port_sel, enum netdev_lag_hash hash) { From dc48516ec7d369c6b80bf9f14d774287b6c428aa Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Tue, 17 Aug 2021 10:24:05 +0300 Subject: [PATCH 317/440] net/mlx5: Lag, add support to create definers for LAG Every definer will consist of a flow table with a single hash group with exactly two flow table entries, one for each device port. The destination of these entries is the uplink vport according to the port state and hash policy. Signed-off-by: Maor Gottlieb Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/lag/lag.c | 4 +- .../net/ethernet/mellanox/mlx5/core/lag/lag.h | 1 + .../mellanox/mlx5/core/lag/port_sel.c | 203 ++++++++++++++++++ .../mellanox/mlx5/core/lag/port_sel.h | 13 ++ 4 files changed, 220 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c index b37724fc5387..17baa254f9ae 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c @@ -588,8 +588,10 @@ static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev, if (!(bond_status & 0x3)) return 0; - if (lag_upper_info) + if (lag_upper_info) { tracker->tx_type = lag_upper_info->tx_type; + tracker->hash_type = lag_upper_info->hash_type; + } /* Determine bonding status: * A device is considered bonded if both its physical ports are slaves diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h index 670061e60d89..f0e8b3412c13 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h @@ -33,6 +33,7 @@ struct lag_tracker { enum netdev_lag_tx_type tx_type; struct netdev_lag_lower_state_info netdev_state[MLX5_MAX_PORTS]; unsigned int is_bonded:1; + enum netdev_lag_hash hash_type; }; /* LAG data of a ConnectX card. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c index 6095f1049bdb..06bc7c7dbb6d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c @@ -4,6 +4,95 @@ #include #include "lag.h" +enum { + MLX5_LAG_FT_LEVEL_DEFINER, +}; + +static struct mlx5_flow_group * +mlx5_create_hash_flow_group(struct mlx5_flow_table *ft, + struct mlx5_flow_definer *definer) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_group *fg; + u32 *in; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return ERR_PTR(-ENOMEM); + + MLX5_SET(create_flow_group_in, in, match_definer_id, + mlx5_get_match_definer_id(definer)); + MLX5_SET(create_flow_group_in, in, start_flow_index, 0); + MLX5_SET(create_flow_group_in, in, end_flow_index, MLX5_MAX_PORTS - 1); + MLX5_SET(create_flow_group_in, in, group_type, + MLX5_CREATE_FLOW_GROUP_IN_GROUP_TYPE_HASH_SPLIT); + + fg = mlx5_create_flow_group(ft, in); + kvfree(in); + return fg; +} + +static int mlx5_lag_create_port_sel_table(struct mlx5_lag *ldev, + struct mlx5_lag_definer *lag_definer, + u8 port1, u8 port2) +{ + struct mlx5_core_dev *dev = ldev->pf[MLX5_LAG_P1].dev; + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_flow_destination dest = {}; + MLX5_DECLARE_FLOW_ACT(flow_act); + struct mlx5_flow_namespace *ns; + int err, i; + + ft_attr.max_fte = MLX5_MAX_PORTS; + ft_attr.level = MLX5_LAG_FT_LEVEL_DEFINER; + + ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_PORT_SEL); + if (!ns) { + mlx5_core_warn(dev, "Failed to get port selection namespace\n"); + return -EOPNOTSUPP; + } + + lag_definer->ft = mlx5_create_flow_table(ns, &ft_attr); + if (IS_ERR(lag_definer->ft)) { + mlx5_core_warn(dev, "Failed to create port selection table\n"); + return PTR_ERR(lag_definer->ft); + } + + lag_definer->fg = mlx5_create_hash_flow_group(lag_definer->ft, + lag_definer->definer); + if (IS_ERR(lag_definer->fg)) { + err = PTR_ERR(lag_definer->fg); + goto destroy_ft; + } + + dest.type = MLX5_FLOW_DESTINATION_TYPE_UPLINK; + dest.vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID; + flow_act.flags |= FLOW_ACT_NO_APPEND; + for (i = 0; i < MLX5_MAX_PORTS; i++) { + u8 affinity = i == 0 ? port1 : port2; + + dest.vport.vhca_id = MLX5_CAP_GEN(ldev->pf[affinity - 1].dev, + vhca_id); + lag_definer->rules[i] = mlx5_add_flow_rules(lag_definer->ft, + NULL, &flow_act, + &dest, 1); + if (IS_ERR(lag_definer->rules[i])) { + err = PTR_ERR(lag_definer->rules[i]); + while (i--) + mlx5_del_flow_rules(lag_definer->rules[i]); + goto destroy_fg; + } + } + + return 0; + +destroy_fg: + mlx5_destroy_flow_group(lag_definer->fg); +destroy_ft: + mlx5_destroy_flow_table(lag_definer->ft); + return err; +} + static int mlx5_lag_set_definer_inner(u32 *match_definer_mask, enum mlx5_traffic_types tt) { @@ -186,6 +275,120 @@ static int mlx5_lag_set_definer(u32 *match_definer_mask, return format_id; } +static struct mlx5_lag_definer * +mlx5_lag_create_definer(struct mlx5_lag *ldev, enum netdev_lag_hash hash, + enum mlx5_traffic_types tt, bool tunnel, u8 port1, + u8 port2) +{ + struct mlx5_core_dev *dev = ldev->pf[MLX5_LAG_P1].dev; + struct mlx5_lag_definer *lag_definer; + u32 *match_definer_mask; + int format_id, err; + + lag_definer = kzalloc(sizeof(*lag_definer), GFP_KERNEL); + if (!lag_definer) + return ERR_PTR(ENOMEM); + + match_definer_mask = kvzalloc(MLX5_FLD_SZ_BYTES(match_definer, + match_mask), + GFP_KERNEL); + if (!match_definer_mask) { + err = -ENOMEM; + goto free_lag_definer; + } + + format_id = mlx5_lag_set_definer(match_definer_mask, tt, tunnel, hash); + lag_definer->definer = + mlx5_create_match_definer(dev, MLX5_FLOW_NAMESPACE_PORT_SEL, + format_id, match_definer_mask); + if (IS_ERR(lag_definer->definer)) { + err = PTR_ERR(lag_definer->definer); + goto free_mask; + } + + err = mlx5_lag_create_port_sel_table(ldev, lag_definer, port1, port2); + if (err) + goto destroy_match_definer; + + kvfree(match_definer_mask); + + return lag_definer; + +destroy_match_definer: + mlx5_destroy_match_definer(dev, lag_definer->definer); +free_mask: + kvfree(match_definer_mask); +free_lag_definer: + kfree(lag_definer); + return ERR_PTR(err); +} + +static void mlx5_lag_destroy_definer(struct mlx5_lag *ldev, + struct mlx5_lag_definer *lag_definer) +{ + struct mlx5_core_dev *dev = ldev->pf[MLX5_LAG_P1].dev; + int i; + + for (i = 0; i < MLX5_MAX_PORTS; i++) + mlx5_del_flow_rules(lag_definer->rules[i]); + mlx5_destroy_flow_group(lag_definer->fg); + mlx5_destroy_flow_table(lag_definer->ft); + mlx5_destroy_match_definer(dev, lag_definer->definer); + kfree(lag_definer); +} + +static void mlx5_lag_destroy_definers(struct mlx5_lag *ldev) +{ + struct mlx5_lag_port_sel *port_sel = &ldev->port_sel; + int tt; + + for_each_set_bit(tt, port_sel->tt_map, MLX5_NUM_TT) { + if (port_sel->outer.definers[tt]) + mlx5_lag_destroy_definer(ldev, + port_sel->outer.definers[tt]); + if (port_sel->inner.definers[tt]) + mlx5_lag_destroy_definer(ldev, + port_sel->inner.definers[tt]); + } +} + +static int mlx5_lag_create_definers(struct mlx5_lag *ldev, + enum netdev_lag_hash hash_type, + u8 port1, u8 port2) +{ + struct mlx5_lag_port_sel *port_sel = &ldev->port_sel; + struct mlx5_lag_definer *lag_definer; + int tt, err; + + for_each_set_bit(tt, port_sel->tt_map, MLX5_NUM_TT) { + lag_definer = mlx5_lag_create_definer(ldev, hash_type, tt, + false, port1, port2); + if (IS_ERR(lag_definer)) { + err = PTR_ERR(lag_definer); + goto destroy_definers; + } + port_sel->outer.definers[tt] = lag_definer; + + if (!port_sel->tunnel) + continue; + + lag_definer = + mlx5_lag_create_definer(ldev, hash_type, tt, + true, port1, port2); + if (IS_ERR(lag_definer)) { + err = PTR_ERR(lag_definer); + goto destroy_definers; + } + port_sel->inner.definers[tt] = lag_definer; + } + + return 0; + +destroy_definers: + mlx5_lag_destroy_definers(ldev); + return err; +} + static void set_tt_map(struct mlx5_lag_port_sel *port_sel, enum netdev_lag_hash hash) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.h index c55736d2484d..1b9e2130a0a5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.h @@ -6,9 +6,22 @@ #include "lib/fs_ttc.h" +struct mlx5_lag_definer { + struct mlx5_flow_definer *definer; + struct mlx5_flow_table *ft; + struct mlx5_flow_group *fg; + struct mlx5_flow_handle *rules[MLX5_MAX_PORTS]; +}; + +struct mlx5_lag_ttc { + struct mlx5_lag_definer *definers[MLX5_NUM_TT]; +}; + struct mlx5_lag_port_sel { DECLARE_BITMAP(tt_map, MLX5_NUM_TT); bool tunnel; + struct mlx5_lag_ttc outer; + struct mlx5_lag_ttc inner; }; #endif /* __MLX5_LAG_FS_H__ */ From 8e25a2bc6687cd45aef909d6d862718745b5a3fc Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Thu, 15 Jul 2021 09:43:35 +0300 Subject: [PATCH 318/440] net/mlx5: Lag, add support to create TTC tables for LAG port selection Add support to create inner and outer TTC tables for LAG port selection. These tables are used to classify the packets in order to select the related definer. Signed-off-by: Maor Gottlieb Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/lag/port_sel.c | 91 +++++++++++++++++++ .../mellanox/mlx5/core/lag/port_sel.h | 1 + 2 files changed, 92 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c index 06bc7c7dbb6d..a855b9e86791 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c @@ -5,6 +5,8 @@ #include "lag.h" enum { + MLX5_LAG_FT_LEVEL_TTC, + MLX5_LAG_FT_LEVEL_INNER_TTC, MLX5_LAG_FT_LEVEL_DEFINER, }; @@ -420,3 +422,92 @@ static void set_tt_map(struct mlx5_lag_port_sel *port_sel, break; } } + +#define SET_IGNORE_DESTS_BITS(tt_map, dests) \ + do { \ + int idx; \ + \ + for_each_clear_bit(idx, tt_map, MLX5_NUM_TT) \ + set_bit(idx, dests); \ + } while (0) + +static void mlx5_lag_set_inner_ttc_params(struct mlx5_lag *ldev, + struct ttc_params *ttc_params) +{ + struct mlx5_core_dev *dev = ldev->pf[MLX5_LAG_P1].dev; + struct mlx5_lag_port_sel *port_sel = &ldev->port_sel; + struct mlx5_flow_table_attr *ft_attr; + int tt; + + ttc_params->ns = mlx5_get_flow_namespace(dev, + MLX5_FLOW_NAMESPACE_PORT_SEL); + ft_attr = &ttc_params->ft_attr; + ft_attr->level = MLX5_LAG_FT_LEVEL_INNER_TTC; + + for_each_set_bit(tt, port_sel->tt_map, MLX5_NUM_TT) { + ttc_params->dests[tt].type = + MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + ttc_params->dests[tt].ft = port_sel->inner.definers[tt]->ft; + } + SET_IGNORE_DESTS_BITS(port_sel->tt_map, ttc_params->ignore_dests); +} + +static void mlx5_lag_set_outer_ttc_params(struct mlx5_lag *ldev, + struct ttc_params *ttc_params) +{ + struct mlx5_core_dev *dev = ldev->pf[MLX5_LAG_P1].dev; + struct mlx5_lag_port_sel *port_sel = &ldev->port_sel; + struct mlx5_flow_table_attr *ft_attr; + int tt; + + ttc_params->ns = mlx5_get_flow_namespace(dev, + MLX5_FLOW_NAMESPACE_PORT_SEL); + ft_attr = &ttc_params->ft_attr; + ft_attr->level = MLX5_LAG_FT_LEVEL_TTC; + + for_each_set_bit(tt, port_sel->tt_map, MLX5_NUM_TT) { + ttc_params->dests[tt].type = + MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + ttc_params->dests[tt].ft = port_sel->outer.definers[tt]->ft; + } + SET_IGNORE_DESTS_BITS(port_sel->tt_map, ttc_params->ignore_dests); + + ttc_params->inner_ttc = port_sel->tunnel; + if (!port_sel->tunnel) + return; + + for (tt = 0; tt < MLX5_NUM_TUNNEL_TT; tt++) { + ttc_params->tunnel_dests[tt].type = + MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + ttc_params->tunnel_dests[tt].ft = + mlx5_get_ttc_flow_table(port_sel->inner.ttc); + } +} + +static int mlx5_lag_create_ttc_table(struct mlx5_lag *ldev) +{ + struct mlx5_core_dev *dev = ldev->pf[MLX5_LAG_P1].dev; + struct mlx5_lag_port_sel *port_sel = &ldev->port_sel; + struct ttc_params ttc_params = {}; + + mlx5_lag_set_outer_ttc_params(ldev, &ttc_params); + port_sel->outer.ttc = mlx5_create_ttc_table(dev, &ttc_params); + if (IS_ERR(port_sel->outer.ttc)) + return PTR_ERR(port_sel->outer.ttc); + + return 0; +} + +static int mlx5_lag_create_inner_ttc_table(struct mlx5_lag *ldev) +{ + struct mlx5_core_dev *dev = ldev->pf[MLX5_LAG_P1].dev; + struct mlx5_lag_port_sel *port_sel = &ldev->port_sel; + struct ttc_params ttc_params = {}; + + mlx5_lag_set_inner_ttc_params(ldev, &ttc_params); + port_sel->inner.ttc = mlx5_create_ttc_table(dev, &ttc_params); + if (IS_ERR(port_sel->inner.ttc)) + return PTR_ERR(port_sel->inner.ttc); + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.h index 1b9e2130a0a5..045dceec0fab 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.h @@ -14,6 +14,7 @@ struct mlx5_lag_definer { }; struct mlx5_lag_ttc { + struct mlx5_ttc_table *ttc; struct mlx5_lag_definer *definers[MLX5_NUM_TT]; }; From b7267869e9233266d772c418e0a048302510778d Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Thu, 15 Jul 2021 10:13:52 +0300 Subject: [PATCH 319/440] net/mlx5: Lag, add support to create/destroy/modify port selection Add create function, build the steering tables, TTC and definers according to the LAG hash type. The destroy function, destroys all the steering components. The modify functions is used when the bond mapping changes and it iterates over all the rules in the definers and modifies them to steer the packet to the relevant active ports. Signed-off-by: Maor Gottlieb Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/Makefile | 2 +- .../mellanox/mlx5/core/lag/port_sel.c | 98 +++++++++++++++++++ .../mellanox/mlx5/core/lag/port_sel.h | 24 +++++ 3 files changed, 123 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index fb123e26927d..bdb271b604d9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -37,7 +37,7 @@ mlx5_core-$(CONFIG_MLX5_EN_ARFS) += en_arfs.o mlx5_core-$(CONFIG_MLX5_EN_RXNFC) += en_fs_ethtool.o mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o en/port_buffer.o mlx5_core-$(CONFIG_PCI_HYPERV_INTERFACE) += en/hv_vhca_stats.o -mlx5_core-$(CONFIG_MLX5_ESWITCH) += lag/mp.o lib/geneve.o lib/port_tun.o \ +mlx5_core-$(CONFIG_MLX5_ESWITCH) += lag/mp.o lag/port_sel.o lib/geneve.o lib/port_tun.o \ en_rep.o en/rep/bond.o en/mod_hdr.o \ en/mapping.o mlx5_core-$(CONFIG_MLX5_CLS_ACT) += en_tc.o en/rep/tc.o en/rep/neigh.o \ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c index a855b9e86791..adc836b3d857 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c @@ -511,3 +511,101 @@ static int mlx5_lag_create_inner_ttc_table(struct mlx5_lag *ldev) return 0; } + +int mlx5_lag_port_sel_create(struct mlx5_lag *ldev, + enum netdev_lag_hash hash_type, u8 port1, u8 port2) +{ + struct mlx5_lag_port_sel *port_sel = &ldev->port_sel; + int err; + + set_tt_map(port_sel, hash_type); + err = mlx5_lag_create_definers(ldev, hash_type, port1, port2); + if (err) + return err; + + if (port_sel->tunnel) { + err = mlx5_lag_create_inner_ttc_table(ldev); + if (err) + goto destroy_definers; + } + + err = mlx5_lag_create_ttc_table(ldev); + if (err) + goto destroy_inner; + + return 0; + +destroy_inner: + if (port_sel->tunnel) + mlx5_destroy_ttc_table(port_sel->inner.ttc); +destroy_definers: + mlx5_lag_destroy_definers(ldev); + return err; +} + +static int +mlx5_lag_modify_definers_destinations(struct mlx5_lag *ldev, + struct mlx5_lag_definer **definers, + u8 port1, u8 port2) +{ + struct mlx5_lag_port_sel *port_sel = &ldev->port_sel; + struct mlx5_flow_destination dest = {}; + int err; + int tt; + + dest.type = MLX5_FLOW_DESTINATION_TYPE_UPLINK; + dest.vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID; + + for_each_set_bit(tt, port_sel->tt_map, MLX5_NUM_TT) { + struct mlx5_flow_handle **rules = definers[tt]->rules; + + if (ldev->v2p_map[MLX5_LAG_P1] != port1) { + dest.vport.vhca_id = + MLX5_CAP_GEN(ldev->pf[port1 - 1].dev, vhca_id); + err = mlx5_modify_rule_destination(rules[MLX5_LAG_P1], + &dest, NULL); + if (err) + return err; + } + + if (ldev->v2p_map[MLX5_LAG_P2] != port2) { + dest.vport.vhca_id = + MLX5_CAP_GEN(ldev->pf[port2 - 1].dev, vhca_id); + err = mlx5_modify_rule_destination(rules[MLX5_LAG_P2], + &dest, NULL); + if (err) + return err; + } + } + + return 0; +} + +int mlx5_lag_port_sel_modify(struct mlx5_lag *ldev, u8 port1, u8 port2) +{ + struct mlx5_lag_port_sel *port_sel = &ldev->port_sel; + int err; + + err = mlx5_lag_modify_definers_destinations(ldev, + port_sel->outer.definers, + port1, port2); + if (err) + return err; + + if (!port_sel->tunnel) + return 0; + + return mlx5_lag_modify_definers_destinations(ldev, + port_sel->inner.definers, + port1, port2); +} + +void mlx5_lag_port_sel_destroy(struct mlx5_lag *ldev) +{ + struct mlx5_lag_port_sel *port_sel = &ldev->port_sel; + + mlx5_destroy_ttc_table(port_sel->outer.ttc); + if (port_sel->tunnel) + mlx5_destroy_ttc_table(port_sel->inner.ttc); + mlx5_lag_destroy_definers(ldev); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.h index 045dceec0fab..6d15b28a42fc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.h @@ -25,4 +25,28 @@ struct mlx5_lag_port_sel { struct mlx5_lag_ttc inner; }; +#ifdef CONFIG_MLX5_ESWITCH + +int mlx5_lag_port_sel_modify(struct mlx5_lag *ldev, u8 port1, u8 port2); +void mlx5_lag_port_sel_destroy(struct mlx5_lag *ldev); +int mlx5_lag_port_sel_create(struct mlx5_lag *ldev, + enum netdev_lag_hash hash_type, u8 port1, + u8 port2); + +#else /* CONFIG_MLX5_ESWITCH */ +static inline int mlx5_lag_port_sel_create(struct mlx5_lag *ldev, + enum netdev_lag_hash hash_type, + u8 port1, u8 port2) +{ + return 0; +} + +static inline int mlx5_lag_port_sel_modify(struct mlx5_lag *ldev, u8 port1, + u8 port2) +{ + return 0; +} + +static inline void mlx5_lag_port_sel_destroy(struct mlx5_lag *ldev) {} +#endif /* CONFIG_MLX5_ESWITCH */ #endif /* __MLX5_LAG_FS_H__ */ From da6b0bb0fc73d8eae5181f58af80afe04c54fe0a Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Thu, 19 Aug 2021 00:19:14 +0300 Subject: [PATCH 320/440] net/mlx5: Lag, use steering to select the affinity port in LAG Use the steering based solution for select the affinity port when the LAG mode is based on hash policy and the device support in port selection flow table. Signed-off-by: Maor Gottlieb Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/lag/lag.c | 96 ++++++++++++++----- .../net/ethernet/mellanox/mlx5/core/lag/lag.h | 4 +- 2 files changed, 76 insertions(+), 24 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c index 17baa254f9ae..6a754c57e7f7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c @@ -47,16 +47,21 @@ static DEFINE_SPINLOCK(lag_lock); static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 remap_port1, - u8 remap_port2, bool shared_fdb) + u8 remap_port2, bool shared_fdb, u8 flags) { u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {}; void *lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx); MLX5_SET(create_lag_in, in, opcode, MLX5_CMD_OP_CREATE_LAG); - MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1); - MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2); MLX5_SET(lagc, lag_ctx, fdb_selection_mode, shared_fdb); + if (!(flags & MLX5_LAG_FLAG_HASH_BASED)) { + MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1); + MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2); + } else { + MLX5_SET(lagc, lag_ctx, port_select_mode, + MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT); + } return mlx5_cmd_exec_in(dev, create_lag, in); } @@ -199,6 +204,15 @@ static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker, *port1 = 2; } +static int _mlx5_modify_lag(struct mlx5_lag *ldev, u8 v2p_port1, u8 v2p_port2) +{ + struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; + + if (ldev->flags & MLX5_LAG_FLAG_HASH_BASED) + return mlx5_lag_port_sel_modify(ldev, v2p_port1, v2p_port2); + return mlx5_cmd_modify_lag(dev0, v2p_port1, v2p_port2); +} + void mlx5_modify_lag(struct mlx5_lag *ldev, struct lag_tracker *tracker) { @@ -211,39 +225,56 @@ void mlx5_modify_lag(struct mlx5_lag *ldev, if (v2p_port1 != ldev->v2p_map[MLX5_LAG_P1] || v2p_port2 != ldev->v2p_map[MLX5_LAG_P2]) { - ldev->v2p_map[MLX5_LAG_P1] = v2p_port1; - ldev->v2p_map[MLX5_LAG_P2] = v2p_port2; - - mlx5_core_info(dev0, "modify lag map port 1:%d port 2:%d", - ldev->v2p_map[MLX5_LAG_P1], - ldev->v2p_map[MLX5_LAG_P2]); - - err = mlx5_cmd_modify_lag(dev0, v2p_port1, v2p_port2); - if (err) + err = _mlx5_modify_lag(ldev, v2p_port1, v2p_port2); + if (err) { mlx5_core_err(dev0, "Failed to modify LAG (%d)\n", err); + return; + } + ldev->v2p_map[MLX5_LAG_P1] = v2p_port1; + ldev->v2p_map[MLX5_LAG_P2] = v2p_port2; + mlx5_core_info(dev0, "modify lag map port 1:%d port 2:%d", + ldev->v2p_map[MLX5_LAG_P1], + ldev->v2p_map[MLX5_LAG_P2]); } } +static void mlx5_lag_set_port_sel_mode(struct mlx5_lag *ldev, + struct lag_tracker *tracker, u8 *flags) +{ + bool roce_lag = !!(*flags & MLX5_LAG_FLAG_ROCE); + struct lag_func *dev0 = &ldev->pf[MLX5_LAG_P1]; + + if (roce_lag || + !MLX5_CAP_PORT_SELECTION(dev0->dev, port_select_flow_table) || + tracker->tx_type != NETDEV_LAG_TX_TYPE_HASH) + return; + *flags |= MLX5_LAG_FLAG_HASH_BASED; +} + +static char *get_str_port_sel_mode(u8 flags) +{ + if (flags & MLX5_LAG_FLAG_HASH_BASED) + return "hash"; + return "queue_affinity"; +} + static int mlx5_create_lag(struct mlx5_lag *ldev, struct lag_tracker *tracker, - bool shared_fdb) + bool shared_fdb, u8 flags) { struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {}; int err; - mlx5_infer_tx_affinity_mapping(tracker, &ldev->v2p_map[MLX5_LAG_P1], - &ldev->v2p_map[MLX5_LAG_P2]); - - mlx5_core_info(dev0, "lag map port 1:%d port 2:%d shared_fdb:%d", + mlx5_core_info(dev0, "lag map port 1:%d port 2:%d shared_fdb:%d mode:%s", ldev->v2p_map[MLX5_LAG_P1], ldev->v2p_map[MLX5_LAG_P2], - shared_fdb); + shared_fdb, get_str_port_sel_mode(flags)); err = mlx5_cmd_create_lag(dev0, ldev->v2p_map[MLX5_LAG_P1], - ldev->v2p_map[MLX5_LAG_P2], shared_fdb); + ldev->v2p_map[MLX5_LAG_P2], shared_fdb, flags); if (err) { mlx5_core_err(dev0, "Failed to create LAG (%d)\n", @@ -279,16 +310,32 @@ int mlx5_activate_lag(struct mlx5_lag *ldev, struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; int err; - err = mlx5_create_lag(ldev, tracker, shared_fdb); + mlx5_infer_tx_affinity_mapping(tracker, &ldev->v2p_map[MLX5_LAG_P1], + &ldev->v2p_map[MLX5_LAG_P2]); + mlx5_lag_set_port_sel_mode(ldev, tracker, &flags); + if (flags & MLX5_LAG_FLAG_HASH_BASED) { + err = mlx5_lag_port_sel_create(ldev, tracker->hash_type, + ldev->v2p_map[MLX5_LAG_P1], + ldev->v2p_map[MLX5_LAG_P2]); + if (err) { + mlx5_core_err(dev0, + "Failed to create LAG port selection(%d)\n", + err); + return err; + } + } + + err = mlx5_create_lag(ldev, tracker, shared_fdb, flags); if (err) { - if (roce_lag) { + if (flags & MLX5_LAG_FLAG_HASH_BASED) + mlx5_lag_port_sel_destroy(ldev); + if (roce_lag) mlx5_core_err(dev0, "Failed to activate RoCE LAG\n"); - } else { + else mlx5_core_err(dev0, "Failed to activate VF LAG\n" "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n"); - } return err; } @@ -302,6 +349,7 @@ static int mlx5_deactivate_lag(struct mlx5_lag *ldev) struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {}; bool roce_lag = __mlx5_lag_is_roce(ldev); + u8 flags = ldev->flags; int err; ldev->flags &= ~MLX5_LAG_MODE_FLAGS; @@ -324,6 +372,8 @@ static int mlx5_deactivate_lag(struct mlx5_lag *ldev) "Failed to deactivate VF LAG; driver restart required\n" "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n"); } + } else if (flags & MLX5_LAG_FLAG_HASH_BASED) { + mlx5_lag_port_sel_destroy(ldev); } return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h index f0e8b3412c13..e5d231c31b54 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h @@ -18,10 +18,12 @@ enum { MLX5_LAG_FLAG_SRIOV = 1 << 1, MLX5_LAG_FLAG_MULTIPATH = 1 << 2, MLX5_LAG_FLAG_READY = 1 << 3, + MLX5_LAG_FLAG_HASH_BASED = 1 << 4, }; #define MLX5_LAG_MODE_FLAGS (MLX5_LAG_FLAG_ROCE | MLX5_LAG_FLAG_SRIOV |\ - MLX5_LAG_FLAG_MULTIPATH) + MLX5_LAG_FLAG_MULTIPATH | \ + MLX5_LAG_FLAG_HASH_BASED) struct lag_func { struct mlx5_core_dev *dev; From 408881627ff05ee47448b930952af68470267c0f Mon Sep 17 00:00:00 2001 From: Maor Dickman Date: Mon, 20 Sep 2021 10:51:05 +0300 Subject: [PATCH 321/440] net/mlx5: E-Switch, Use dynamic alloc for dest array Use dynamic allocation for the dest array in preparation for the next patch which increase MLX5_MAX_FLOW_FWD_VPORTS and will cause stack allocation to be bigger than 1024 bytes. Signed-off-by: Maor Dickman Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/eswitch_offloads.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 3e5a7d74020b..0ef126fd6a8e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -482,12 +482,12 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, struct mlx5_flow_spec *spec, struct mlx5_flow_attr *attr) { - struct mlx5_flow_destination dest[MLX5_MAX_FLOW_FWD_VPORTS + 1] = {}; struct mlx5_flow_act flow_act = { .flags = FLOW_ACT_NO_APPEND, }; struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; struct mlx5_fs_chains *chains = esw_chains(esw); bool split = !!(esw_attr->split_count); struct mlx5_vport_tbl_attr fwd_attr; + struct mlx5_flow_destination *dest; struct mlx5_flow_handle *rule; struct mlx5_flow_table *fdb; int i = 0; @@ -495,6 +495,10 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, if (esw->mode != MLX5_ESWITCH_OFFLOADS) return ERR_PTR(-EOPNOTSUPP); + dest = kcalloc(MLX5_MAX_FLOW_FWD_VPORTS + 1, sizeof(*dest), GFP_KERNEL); + if (!dest) + return ERR_PTR(-ENOMEM); + flow_act.action = attr->action; /* if per flow vlan pop/push is emulated, don't set that into the firmware */ if (!mlx5_eswitch_vlan_actions_supported(esw->dev, 1)) @@ -574,6 +578,7 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, else atomic64_inc(&esw->offloads.num_flows); + kfree(dest); return rule; err_add_rule: @@ -584,6 +589,7 @@ err_add_rule: err_esw_get: esw_cleanup_dests(esw, attr); err_create_goto_table: + kfree(dest); return rule; } @@ -592,16 +598,20 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw, struct mlx5_flow_spec *spec, struct mlx5_flow_attr *attr) { - struct mlx5_flow_destination dest[MLX5_MAX_FLOW_FWD_VPORTS + 1] = {}; struct mlx5_flow_act flow_act = { .flags = FLOW_ACT_NO_APPEND, }; struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; struct mlx5_fs_chains *chains = esw_chains(esw); struct mlx5_vport_tbl_attr fwd_attr; + struct mlx5_flow_destination *dest; struct mlx5_flow_table *fast_fdb; struct mlx5_flow_table *fwd_fdb; struct mlx5_flow_handle *rule; int i, err = 0; + dest = kcalloc(MLX5_MAX_FLOW_FWD_VPORTS + 1, sizeof(*dest), GFP_KERNEL); + if (!dest) + return ERR_PTR(-ENOMEM); + fast_fdb = mlx5_chains_get_table(chains, attr->chain, attr->prio, 0); if (IS_ERR(fast_fdb)) { rule = ERR_CAST(fast_fdb); @@ -654,6 +664,7 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw, atomic64_inc(&esw->offloads.num_flows); + kfree(dest); return rule; err_chain_src_rewrite: esw_put_dest_tables_loop(esw, attr, 0, i); @@ -661,6 +672,7 @@ err_chain_src_rewrite: err_get_fwd: mlx5_chains_put_table(chains, attr->chain, attr->prio, 0); err_get_fast: + kfree(dest); return rule; } From d40bfeddacd6b4c05a28e3e8f7cb18989a4e134f Mon Sep 17 00:00:00 2001 From: Maor Dickman Date: Sun, 5 Sep 2021 14:22:11 +0300 Subject: [PATCH 322/440] net/mlx5: E-Switch, Increase supported number of forward destinations to 32 Increase supported number of forward destinations in the same rule, local and remote, from 2 to 32. Signed-off-by: Maor Dickman Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 7461aafb321e..28467f11f04b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -433,7 +433,7 @@ enum mlx5_flow_match_level { }; /* current maximum for flow based vport multicasting */ -#define MLX5_MAX_FLOW_FWD_VPORTS 2 +#define MLX5_MAX_FLOW_FWD_VPORTS 32 enum { MLX5_ESW_DEST_ENCAP = BIT(0), From 6b3efbfa4e68d84e6efbcd1ff4e98f5ba1da4307 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Tue, 19 Oct 2021 11:07:04 +0300 Subject: [PATCH 323/440] net: sch_tbf: Add a graft command As another qdisc is linked to the TBF, the latter should issue an event to give drivers a chance to react to the grafting. In other qdiscs, this event is called GRAFT, so follow suit with TBF as well. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 2 ++ net/sched/sch_tbf.c | 16 ++++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 4a5833108083..193f88ebf629 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -977,6 +977,7 @@ enum tc_tbf_command { TC_TBF_REPLACE, TC_TBF_DESTROY, TC_TBF_STATS, + TC_TBF_GRAFT, }; struct tc_tbf_qopt_offload_replace_params { @@ -992,6 +993,7 @@ struct tc_tbf_qopt_offload { union { struct tc_tbf_qopt_offload_replace_params replace_params; struct tc_qopt_offload_stats stats; + u32 child_handle; }; }; diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 78e79029dc63..72102277449e 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -184,6 +184,20 @@ static int tbf_offload_dump(struct Qdisc *sch) return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_TBF, &qopt); } +static void tbf_offload_graft(struct Qdisc *sch, struct Qdisc *new, + struct Qdisc *old, struct netlink_ext_ack *extack) +{ + struct tc_tbf_qopt_offload graft_offload = { + .handle = sch->handle, + .parent = sch->parent, + .child_handle = new->handle, + .command = TC_TBF_GRAFT, + }; + + qdisc_offload_graft_helper(qdisc_dev(sch), sch, new, old, + TC_SETUP_QDISC_TBF, &graft_offload, extack); +} + /* GSO packet is too big, segment it so that tbf can transmit * each segment in time */ @@ -547,6 +561,8 @@ static int tbf_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, new = &noop_qdisc; *old = qdisc_replace(sch, new, &q->qdisc); + + tbf_offload_graft(sch, new, *old, extack); return 0; } From 76ff72a7204f3a4e47345872895118149ddd57a8 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Tue, 19 Oct 2021 11:07:05 +0300 Subject: [PATCH 324/440] mlxsw: spectrum_qdisc: Query tclass / priomap instead of caching it Currently when keeping track of qdiscs, mlxsw notes the TC and priomap corresponding to each qdisc. That is fine currently, as there only ever is one level of qdiscs to update: the direct children of ETS / PRIO. However as deeper structures are made offloadable, ETS would need to update these values for the complete subtree, and interim qdiscs would need to remember to propagate the value. Instead, reverse the responsibility: child qdiscs can ask their parent what their TC and priomap are. ETS / PRIO know the answer right away, or there are defaults for when the root qdisc does not assign them (e.g. when RED is used as root qdisc). When RED and TBF become classful, they will simply forward the request up to their parent. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_qdisc.c | 180 ++++++++++++++---- 1 file changed, 146 insertions(+), 34 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c index 6d1431fa31d7..5114d65ed33f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c @@ -50,12 +50,24 @@ struct mlxsw_sp_qdisc_ops { struct mlxsw_sp_qdisc *(*find_class)(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, u32 parent); unsigned int num_classes; + + u8 (*get_prio_bitmap)(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + struct mlxsw_sp_qdisc *child); + int (*get_tclass_num)(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + struct mlxsw_sp_qdisc *child); +}; + +struct mlxsw_sp_qdisc_ets_band { + u8 prio_bitmap; + int tclass_num; +}; + +struct mlxsw_sp_qdisc_ets_data { + struct mlxsw_sp_qdisc_ets_band bands[IEEE_8021QAZ_MAX_TCS]; }; struct mlxsw_sp_qdisc { u32 handle; - int tclass_num; - u8 prio_bitmap; union { struct red_stats red; } xstats_base; @@ -67,6 +79,10 @@ struct mlxsw_sp_qdisc { u64 backlog; } stats_base; + union { + struct mlxsw_sp_qdisc_ets_data *ets_data; + }; + struct mlxsw_sp_qdisc_ops *ops; struct mlxsw_sp_qdisc *parent; struct mlxsw_sp_qdisc *qdiscs; @@ -187,6 +203,28 @@ mlxsw_sp_qdisc_reduce_parent_backlog(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) tmp->stats_base.backlog -= mlxsw_sp_qdisc->stats_base.backlog; } +static u8 mlxsw_sp_qdisc_get_prio_bitmap(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) +{ + struct mlxsw_sp_qdisc *parent = mlxsw_sp_qdisc->parent; + + if (!parent) + return 0xff; + return parent->ops->get_prio_bitmap(parent, mlxsw_sp_qdisc); +} + +#define MLXSW_SP_PORT_DEFAULT_TCLASS 0 + +static int mlxsw_sp_qdisc_get_tclass_num(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) +{ + struct mlxsw_sp_qdisc *parent = mlxsw_sp_qdisc->parent; + + if (!parent) + return MLXSW_SP_PORT_DEFAULT_TCLASS; + return parent->ops->get_tclass_num(parent, mlxsw_sp_qdisc); +} + static int mlxsw_sp_qdisc_destroy(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) @@ -406,13 +444,17 @@ mlxsw_sp_qdisc_collect_tc_stats(struct mlxsw_sp_port *mlxsw_sp_port, u64 *p_tx_bytes, u64 *p_tx_packets, u64 *p_drops, u64 *p_backlog) { - int tclass_num = mlxsw_sp_qdisc->tclass_num; struct mlxsw_sp_port_xstats *xstats; u64 tx_bytes, tx_packets; + u8 prio_bitmap; + int tclass_num; + prio_bitmap = mlxsw_sp_qdisc_get_prio_bitmap(mlxsw_sp_port, + mlxsw_sp_qdisc); + tclass_num = mlxsw_sp_qdisc_get_tclass_num(mlxsw_sp_port, + mlxsw_sp_qdisc); xstats = &mlxsw_sp_port->periodic_hw_stats.xstats; - mlxsw_sp_qdisc_bstats_per_priority_get(xstats, - mlxsw_sp_qdisc->prio_bitmap, + mlxsw_sp_qdisc_bstats_per_priority_get(xstats, prio_bitmap, &tx_packets, &tx_bytes); *p_tx_packets += tx_packets; @@ -506,17 +548,21 @@ static void mlxsw_sp_setup_tc_qdisc_red_clean_stats(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) { - int tclass_num = mlxsw_sp_qdisc->tclass_num; struct mlxsw_sp_qdisc_stats *stats_base; struct mlxsw_sp_port_xstats *xstats; struct red_stats *red_base; + u8 prio_bitmap; + int tclass_num; + prio_bitmap = mlxsw_sp_qdisc_get_prio_bitmap(mlxsw_sp_port, + mlxsw_sp_qdisc); + tclass_num = mlxsw_sp_qdisc_get_tclass_num(mlxsw_sp_port, + mlxsw_sp_qdisc); xstats = &mlxsw_sp_port->periodic_hw_stats.xstats; stats_base = &mlxsw_sp_qdisc->stats_base; red_base = &mlxsw_sp_qdisc->xstats_base.red; - mlxsw_sp_qdisc_bstats_per_priority_get(xstats, - mlxsw_sp_qdisc->prio_bitmap, + mlxsw_sp_qdisc_bstats_per_priority_get(xstats, prio_bitmap, &stats_base->tx_packets, &stats_base->tx_bytes); red_base->prob_mark = xstats->tc_ecn[tclass_num]; @@ -533,8 +579,10 @@ static int mlxsw_sp_qdisc_red_destroy(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) { - return mlxsw_sp_tclass_congestion_disable(mlxsw_sp_port, - mlxsw_sp_qdisc->tclass_num); + int tclass_num = mlxsw_sp_qdisc_get_tclass_num(mlxsw_sp_port, + mlxsw_sp_qdisc); + + return mlxsw_sp_tclass_congestion_disable(mlxsw_sp_port, tclass_num); } static int @@ -571,10 +619,13 @@ mlxsw_sp_qdisc_red_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; struct tc_red_qopt_offload_params *p = params; - int tclass_num = mlxsw_sp_qdisc->tclass_num; + int tclass_num; u32 min, max; u64 prob; + tclass_num = mlxsw_sp_qdisc_get_tclass_num(mlxsw_sp_port, + mlxsw_sp_qdisc); + /* calculate probability in percentage */ prob = p->probability; prob *= 100; @@ -616,11 +667,13 @@ mlxsw_sp_qdisc_get_red_xstats(struct mlxsw_sp_port *mlxsw_sp_port, void *xstats_ptr) { struct red_stats *xstats_base = &mlxsw_sp_qdisc->xstats_base.red; - int tclass_num = mlxsw_sp_qdisc->tclass_num; struct mlxsw_sp_port_xstats *xstats; struct red_stats *res = xstats_ptr; int early_drops, marks, pdrops; + int tclass_num; + tclass_num = mlxsw_sp_qdisc_get_tclass_num(mlxsw_sp_port, + mlxsw_sp_qdisc); xstats = &mlxsw_sp_port->periodic_hw_stats.xstats; early_drops = xstats->wred_drop[tclass_num] - xstats_base->prob_drop; @@ -643,11 +696,13 @@ mlxsw_sp_qdisc_get_red_stats(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, struct tc_qopt_offload_stats *stats_ptr) { - int tclass_num = mlxsw_sp_qdisc->tclass_num; struct mlxsw_sp_qdisc_stats *stats_base; struct mlxsw_sp_port_xstats *xstats; u64 overlimits; + int tclass_num; + tclass_num = mlxsw_sp_qdisc_get_tclass_num(mlxsw_sp_port, + mlxsw_sp_qdisc); xstats = &mlxsw_sp_port->periodic_hw_stats.xstats; stats_base = &mlxsw_sp_qdisc->stats_base; @@ -668,8 +723,6 @@ mlxsw_sp_qdisc_leaf_find_class(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, return NULL; } -#define MLXSW_SP_PORT_DEFAULT_TCLASS 0 - static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_red = { .type = MLXSW_SP_QDISC_RED, .check_params = mlxsw_sp_qdisc_red_check_params, @@ -749,9 +802,12 @@ static int mlxsw_sp_qdisc_tbf_destroy(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) { + int tclass_num = mlxsw_sp_qdisc_get_tclass_num(mlxsw_sp_port, + mlxsw_sp_qdisc); + return mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port, MLXSW_REG_QEEC_HR_SUBGROUP, - mlxsw_sp_qdisc->tclass_num, 0, + tclass_num, 0, MLXSW_REG_QEEC_MAS_DIS, 0); } @@ -835,9 +891,13 @@ mlxsw_sp_qdisc_tbf_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, { struct tc_tbf_qopt_offload_replace_params *p = params; u64 rate_kbps = mlxsw_sp_qdisc_tbf_rate_kbps(p); + int tclass_num; u8 burst_size; int err; + tclass_num = mlxsw_sp_qdisc_get_tclass_num(mlxsw_sp_port, + mlxsw_sp_qdisc); + err = mlxsw_sp_qdisc_tbf_bs(mlxsw_sp_port, p->max_size, &burst_size); if (WARN_ON_ONCE(err)) /* check_params above was supposed to reject this value. */ @@ -853,7 +913,7 @@ mlxsw_sp_qdisc_tbf_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, */ return mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port, MLXSW_REG_QEEC_HR_SUBGROUP, - mlxsw_sp_qdisc->tclass_num, 0, + tclass_num, 0, rate_kbps, burst_size); } @@ -1040,9 +1100,10 @@ static int __mlxsw_sp_qdisc_ets_destroy(struct mlxsw_sp_port *mlxsw_sp_port, i, 0, false, 0); mlxsw_sp_qdisc_destroy(mlxsw_sp_port, &mlxsw_sp_qdisc->qdiscs[i]); - mlxsw_sp_qdisc->qdiscs[i].prio_bitmap = 0; } + kfree(mlxsw_sp_qdisc->ets_data); + mlxsw_sp_qdisc->ets_data = NULL; return 0; } @@ -1079,40 +1140,60 @@ __mlxsw_sp_qdisc_ets_replace(struct mlxsw_sp_port *mlxsw_sp_port, const unsigned int *weights, const u8 *priomap) { + struct mlxsw_sp_qdisc_ets_data *ets_data = mlxsw_sp_qdisc->ets_data; struct mlxsw_sp_qdisc_state *qdisc_state = mlxsw_sp_port->qdisc; + struct mlxsw_sp_qdisc_ets_band *ets_band; struct mlxsw_sp_qdisc *child_qdisc; - int tclass, i, band, backlog; - u8 old_priomap; + u8 old_priomap, new_priomap; + int i, band, backlog; int err; + if (!ets_data) { + ets_data = kzalloc(sizeof(*ets_data), GFP_KERNEL); + if (!ets_data) + return -ENOMEM; + mlxsw_sp_qdisc->ets_data = ets_data; + + for (band = 0; band < mlxsw_sp_qdisc->num_classes; band++) { + int tclass_num = MLXSW_SP_PRIO_BAND_TO_TCLASS(band); + + ets_band = &ets_data->bands[band]; + ets_band->tclass_num = tclass_num; + } + } + for (band = 0; band < nbands; band++) { - tclass = MLXSW_SP_PRIO_BAND_TO_TCLASS(band); + int tclass_num; + child_qdisc = &mlxsw_sp_qdisc->qdiscs[band]; - old_priomap = child_qdisc->prio_bitmap; - child_qdisc->prio_bitmap = 0; + ets_band = &ets_data->bands[band]; + + tclass_num = ets_band->tclass_num; + old_priomap = ets_band->prio_bitmap; + new_priomap = 0; err = mlxsw_sp_port_ets_set(mlxsw_sp_port, MLXSW_REG_QEEC_HR_SUBGROUP, - tclass, 0, !!quanta[band], + tclass_num, 0, !!quanta[band], weights[band]); if (err) return err; for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { if (priomap[i] == band) { - child_qdisc->prio_bitmap |= BIT(i); + new_priomap |= BIT(i); if (BIT(i) & old_priomap) continue; err = mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, - i, tclass); + i, tclass_num); if (err) return err; } } - child_qdisc->tclass_num = tclass; + ets_band->prio_bitmap = new_priomap; - if (old_priomap != child_qdisc->prio_bitmap && + if (old_priomap != new_priomap && child_qdisc->ops && child_qdisc->ops->clean_stats) { backlog = child_qdisc->stats_base.backlog; child_qdisc->ops->clean_stats(mlxsw_sp_port, @@ -1131,13 +1212,15 @@ __mlxsw_sp_qdisc_ets_replace(struct mlxsw_sp_port *mlxsw_sp_port, } } for (; band < IEEE_8021QAZ_MAX_TCS; band++) { - tclass = MLXSW_SP_PRIO_BAND_TO_TCLASS(band); + ets_band = &ets_data->bands[band]; + ets_band->prio_bitmap = 0; + child_qdisc = &mlxsw_sp_qdisc->qdiscs[band]; - child_qdisc->prio_bitmap = 0; mlxsw_sp_qdisc_destroy(mlxsw_sp_port, child_qdisc); + mlxsw_sp_port_ets_set(mlxsw_sp_port, MLXSW_REG_QEEC_HR_SUBGROUP, - tclass, 0, false, 0); + ets_band->tclass_num, 0, false, 0); } qdisc_state->future_handle = TC_H_UNSPEC; @@ -1243,6 +1326,31 @@ mlxsw_sp_qdisc_prio_find_class(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, return &mlxsw_sp_qdisc->qdiscs[band]; } +static struct mlxsw_sp_qdisc_ets_band * +mlxsw_sp_qdisc_ets_get_band(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + struct mlxsw_sp_qdisc *child) +{ + unsigned int band = child - mlxsw_sp_qdisc->qdiscs; + + if (WARN_ON(band >= IEEE_8021QAZ_MAX_TCS)) + band = 0; + return &mlxsw_sp_qdisc->ets_data->bands[band]; +} + +static u8 +mlxsw_sp_qdisc_ets_get_prio_bitmap(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + struct mlxsw_sp_qdisc *child) +{ + return mlxsw_sp_qdisc_ets_get_band(mlxsw_sp_qdisc, child)->prio_bitmap; +} + +static int +mlxsw_sp_qdisc_ets_get_tclass_num(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + struct mlxsw_sp_qdisc *child) +{ + return mlxsw_sp_qdisc_ets_get_band(mlxsw_sp_qdisc, child)->tclass_num; +} + static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_prio = { .type = MLXSW_SP_QDISC_PRIO, .check_params = mlxsw_sp_qdisc_prio_check_params, @@ -1253,6 +1361,8 @@ static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_prio = { .clean_stats = mlxsw_sp_setup_tc_qdisc_prio_clean_stats, .find_class = mlxsw_sp_qdisc_prio_find_class, .num_classes = IEEE_8021QAZ_MAX_TCS, + .get_prio_bitmap = mlxsw_sp_qdisc_ets_get_prio_bitmap, + .get_tclass_num = mlxsw_sp_qdisc_ets_get_tclass_num, }; static int @@ -1304,6 +1414,8 @@ static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_ets = { .clean_stats = mlxsw_sp_setup_tc_qdisc_prio_clean_stats, .find_class = mlxsw_sp_qdisc_prio_find_class, .num_classes = IEEE_8021QAZ_MAX_TCS, + .get_prio_bitmap = mlxsw_sp_qdisc_ets_get_prio_bitmap, + .get_tclass_num = mlxsw_sp_qdisc_ets_get_tclass_num, }; /* Linux allows linking of Qdiscs to arbitrary classes (so long as the resulting @@ -1902,6 +2014,7 @@ mlxsw_sp_setup_tc_block_qevent_bind(struct mlxsw_sp_port *mlxsw_sp_port, struct flow_block_cb *block_cb; struct mlxsw_sp_qdisc *qdisc; bool register_block = false; + int tclass_num; int err; block_cb = flow_block_cb_lookup(f->block, mlxsw_sp_qevent_block_cb, mlxsw_sp); @@ -1934,9 +2047,10 @@ mlxsw_sp_setup_tc_block_qevent_bind(struct mlxsw_sp_port *mlxsw_sp_port, goto err_binding_exists; } + tclass_num = mlxsw_sp_qdisc_get_tclass_num(mlxsw_sp_port, qdisc); qevent_binding = mlxsw_sp_qevent_binding_create(mlxsw_sp_port, f->sch->handle, - qdisc->tclass_num, + tclass_num, span_trigger, action_mask); if (IS_ERR(qevent_binding)) { @@ -2048,8 +2162,6 @@ int mlxsw_sp_tc_qdisc_init(struct mlxsw_sp_port *mlxsw_sp_port) return -ENOMEM; mutex_init(&qdisc_state->lock); - qdisc_state->root_qdisc.prio_bitmap = 0xff; - qdisc_state->root_qdisc.tclass_num = MLXSW_SP_PORT_DEFAULT_TCLASS; mlxsw_sp_port->qdisc = qdisc_state; return 0; } From 91796f507afccf1c5aa3d7365df0f23c0eedd8c6 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Tue, 19 Oct 2021 11:07:06 +0300 Subject: [PATCH 325/440] mlxsw: spectrum_qdisc: Extract two helpers for handling future FIFOs Extract from __mlxsw_sp_qdisc_ets_replace() two helpers for handling of one future FIFO resp. reinitializing the array of future FIFOs. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_qdisc.c | 48 +++++++++++++------ 1 file changed, 33 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c index 5114d65ed33f..7f29f51bdf1b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c @@ -1022,6 +1022,32 @@ static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_fifo = { .clean_stats = mlxsw_sp_setup_tc_qdisc_leaf_clean_stats, }; +static int +mlxsw_sp_qdisc_future_fifo_replace(struct mlxsw_sp_port *mlxsw_sp_port, + u32 handle, unsigned int band, + struct mlxsw_sp_qdisc *child_qdisc) +{ + struct mlxsw_sp_qdisc_state *qdisc_state = mlxsw_sp_port->qdisc; + + if (handle == qdisc_state->future_handle && + qdisc_state->future_fifos[band]) + return mlxsw_sp_qdisc_replace(mlxsw_sp_port, TC_H_UNSPEC, + child_qdisc, + &mlxsw_sp_qdisc_ops_fifo, + NULL); + return 0; +} + +static void +mlxsw_sp_qdisc_future_fifos_init(struct mlxsw_sp_port *mlxsw_sp_port, + u32 handle) +{ + struct mlxsw_sp_qdisc_state *qdisc_state = mlxsw_sp_port->qdisc; + + qdisc_state->future_handle = handle; + memset(qdisc_state->future_fifos, 0, sizeof(qdisc_state->future_fifos)); +} + static int __mlxsw_sp_setup_tc_fifo(struct mlxsw_sp_port *mlxsw_sp_port, struct tc_fifo_qopt_offload *p) { @@ -1037,9 +1063,8 @@ static int __mlxsw_sp_setup_tc_fifo(struct mlxsw_sp_port *mlxsw_sp_port, /* This notifications is for a different Qdisc than * previously. Wipe the future cache. */ - memset(qdisc_state->future_fifos, 0, - sizeof(qdisc_state->future_fifos)); - qdisc_state->future_handle = parent_handle; + mlxsw_sp_qdisc_future_fifos_init(mlxsw_sp_port, + parent_handle); } band = TC_H_MIN(p->parent) - 1; @@ -1141,7 +1166,6 @@ __mlxsw_sp_qdisc_ets_replace(struct mlxsw_sp_port *mlxsw_sp_port, const u8 *priomap) { struct mlxsw_sp_qdisc_ets_data *ets_data = mlxsw_sp_qdisc->ets_data; - struct mlxsw_sp_qdisc_state *qdisc_state = mlxsw_sp_port->qdisc; struct mlxsw_sp_qdisc_ets_band *ets_band; struct mlxsw_sp_qdisc *child_qdisc; u8 old_priomap, new_priomap; @@ -1201,15 +1225,10 @@ __mlxsw_sp_qdisc_ets_replace(struct mlxsw_sp_port *mlxsw_sp_port, child_qdisc->stats_base.backlog = backlog; } - if (handle == qdisc_state->future_handle && - qdisc_state->future_fifos[band]) { - err = mlxsw_sp_qdisc_replace(mlxsw_sp_port, TC_H_UNSPEC, - child_qdisc, - &mlxsw_sp_qdisc_ops_fifo, - NULL); - if (err) - return err; - } + err = mlxsw_sp_qdisc_future_fifo_replace(mlxsw_sp_port, handle, + band, child_qdisc); + if (err) + return err; } for (; band < IEEE_8021QAZ_MAX_TCS; band++) { ets_band = &ets_data->bands[band]; @@ -1223,8 +1242,7 @@ __mlxsw_sp_qdisc_ets_replace(struct mlxsw_sp_port *mlxsw_sp_port, ets_band->tclass_num, 0, false, 0); } - qdisc_state->future_handle = TC_H_UNSPEC; - memset(qdisc_state->future_fifos, 0, sizeof(qdisc_state->future_fifos)); + mlxsw_sp_qdisc_future_fifos_init(mlxsw_sp_port, TC_H_UNSPEC); return 0; } From 65626e075714ac9f512ea97e051e81614a781fed Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Tue, 19 Oct 2021 11:07:07 +0300 Subject: [PATCH 326/440] mlxsw: spectrum_qdisc: Destroy children in mlxsw_sp_qdisc_destroy() Currently ETS and PRIO are the only offloaded classful qdiscs. Since they are both similar, their destroy handler is the same, and it handles children destruction itself. But now it is possible to do it generically for any classful qdisc. Therefore promote the recursive destruction from the ETS handler to mlxsw_sp_qdisc_destroy(), so that RED and TBF pick it up in follow-up patches. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c index 7f29f51bdf1b..8ececee1b79b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c @@ -232,6 +232,7 @@ mlxsw_sp_qdisc_destroy(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_qdisc *root_qdisc = &mlxsw_sp_port->qdisc->root_qdisc; int err_hdroom = 0; int err = 0; + int i; if (!mlxsw_sp_qdisc) return 0; @@ -249,6 +250,9 @@ mlxsw_sp_qdisc_destroy(struct mlxsw_sp_port *mlxsw_sp_port, if (!mlxsw_sp_qdisc->ops) return 0; + for (i = 0; i < mlxsw_sp_qdisc->num_classes; i++) + mlxsw_sp_qdisc_destroy(mlxsw_sp_port, + &mlxsw_sp_qdisc->qdiscs[i]); mlxsw_sp_qdisc_reduce_parent_backlog(mlxsw_sp_qdisc); if (mlxsw_sp_qdisc->ops->destroy) err = mlxsw_sp_qdisc->ops->destroy(mlxsw_sp_port, @@ -1123,8 +1127,6 @@ static int __mlxsw_sp_qdisc_ets_destroy(struct mlxsw_sp_port *mlxsw_sp_port, mlxsw_sp_port_ets_set(mlxsw_sp_port, MLXSW_REG_QEEC_HR_SUBGROUP, i, 0, false, 0); - mlxsw_sp_qdisc_destroy(mlxsw_sp_port, - &mlxsw_sp_qdisc->qdiscs[i]); } kfree(mlxsw_sp_qdisc->ets_data); From be7e2a5a58d4905c8737b64491accb803cf987a9 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Tue, 19 Oct 2021 11:07:08 +0300 Subject: [PATCH 327/440] mlxsw: spectrum_qdisc: Unify graft validation Qdisc graft operations have so far been reported at PRIO, ETS and RED, with RED events ignored, because RED was not considered a classful qdisc. A following patch will make mlxsw recognize RED and TBF as classful qdiscs, and thus it is necessary to validate grafting at these qdiscs as well. Rename the existing graft validator to make it clear that it is a generic function, and invoke for RED and TBF graft events as well. Drop the unnecessary PRIO helper and invoke the graft validator directly for PRIO as well. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_qdisc.c | 37 ++++++++++--------- 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c index 8ececee1b79b..62ec3768c615 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c @@ -739,6 +739,10 @@ static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_red = { .find_class = mlxsw_sp_qdisc_leaf_find_class, }; +static int mlxsw_sp_qdisc_graft(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + u8 band, u32 child_handle); + static int __mlxsw_sp_setup_tc_red(struct mlxsw_sp_port *mlxsw_sp_port, struct tc_red_qopt_offload *p) { @@ -766,6 +770,9 @@ static int __mlxsw_sp_setup_tc_red(struct mlxsw_sp_port *mlxsw_sp_port, case TC_RED_STATS: return mlxsw_sp_qdisc_get_stats(mlxsw_sp_port, mlxsw_sp_qdisc, &p->stats); + case TC_RED_GRAFT: + return mlxsw_sp_qdisc_graft(mlxsw_sp_port, mlxsw_sp_qdisc, 0, + p->child_handle); default: return -EOPNOTSUPP; } @@ -976,6 +983,9 @@ static int __mlxsw_sp_setup_tc_tbf(struct mlxsw_sp_port *mlxsw_sp_port, case TC_TBF_STATS: return mlxsw_sp_qdisc_get_stats(mlxsw_sp_port, mlxsw_sp_qdisc, &p->stats); + case TC_TBF_GRAFT: + return mlxsw_sp_qdisc_graft(mlxsw_sp_port, mlxsw_sp_qdisc, 0, + p->child_handle); default: return -EOPNOTSUPP; } @@ -1463,10 +1473,9 @@ static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_ets = { * grafted corresponds to the parent handle. If the two don't match, we * unoffload the child. */ -static int -__mlxsw_sp_qdisc_ets_graft(struct mlxsw_sp_port *mlxsw_sp_port, - struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, - u8 band, u32 child_handle) +static int mlxsw_sp_qdisc_graft(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + u8 band, u32 child_handle) { struct mlxsw_sp_qdisc *old_qdisc; u32 parent; @@ -1499,15 +1508,6 @@ __mlxsw_sp_qdisc_ets_graft(struct mlxsw_sp_port *mlxsw_sp_port, return -EOPNOTSUPP; } -static int -mlxsw_sp_qdisc_prio_graft(struct mlxsw_sp_port *mlxsw_sp_port, - struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, - struct tc_prio_qopt_offload_graft_params *p) -{ - return __mlxsw_sp_qdisc_ets_graft(mlxsw_sp_port, mlxsw_sp_qdisc, - p->band, p->child_handle); -} - static int __mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port, struct tc_prio_qopt_offload *p) { @@ -1533,8 +1533,9 @@ static int __mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port, return mlxsw_sp_qdisc_get_stats(mlxsw_sp_port, mlxsw_sp_qdisc, &p->stats); case TC_PRIO_GRAFT: - return mlxsw_sp_qdisc_prio_graft(mlxsw_sp_port, mlxsw_sp_qdisc, - &p->graft_params); + return mlxsw_sp_qdisc_graft(mlxsw_sp_port, mlxsw_sp_qdisc, + p->graft_params.band, + p->graft_params.child_handle); default: return -EOPNOTSUPP; } @@ -1577,9 +1578,9 @@ static int __mlxsw_sp_setup_tc_ets(struct mlxsw_sp_port *mlxsw_sp_port, return mlxsw_sp_qdisc_get_stats(mlxsw_sp_port, mlxsw_sp_qdisc, &p->stats); case TC_ETS_GRAFT: - return __mlxsw_sp_qdisc_ets_graft(mlxsw_sp_port, mlxsw_sp_qdisc, - p->graft_params.band, - p->graft_params.child_handle); + return mlxsw_sp_qdisc_graft(mlxsw_sp_port, mlxsw_sp_qdisc, + p->graft_params.band, + p->graft_params.child_handle); default: return -EOPNOTSUPP; } From 01164dda0a64498c9f8fded838e80f645e918a5d Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Tue, 19 Oct 2021 11:07:09 +0300 Subject: [PATCH 328/440] mlxsw: spectrum_qdisc: Clean stats recursively when priomap changes On Spectrum, there are no per-TC TX counters. Instead, mlxsw uses per-prio counters and aggregates them according to the priomap. Therefore when priomap changes, the counter base values need to be reset to reflect the change. Previously, this was only done for the sole child qdisc, but a following patch makes RED and TBF classful. Thus apply the request to the whole sub-tree. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_qdisc.c | 37 +++++++++++++++---- 1 file changed, 29 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c index 62ec3768c615..412394e02d2a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c @@ -1169,6 +1169,31 @@ mlxsw_sp_qdisc_prio_check_params(struct mlxsw_sp_port *mlxsw_sp_port, return __mlxsw_sp_qdisc_ets_check_params(p->bands); } +static struct mlxsw_sp_qdisc * +mlxsw_sp_qdisc_walk_cb_clean_stats(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + void *mlxsw_sp_port) +{ + u64 backlog; + + if (mlxsw_sp_qdisc->ops) { + backlog = mlxsw_sp_qdisc->stats_base.backlog; + if (mlxsw_sp_qdisc->ops->clean_stats) + mlxsw_sp_qdisc->ops->clean_stats(mlxsw_sp_port, + mlxsw_sp_qdisc); + mlxsw_sp_qdisc->stats_base.backlog = backlog; + } + + return NULL; +} + +static void +mlxsw_sp_qdisc_tree_clean_stats(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) +{ + mlxsw_sp_qdisc_walk(mlxsw_sp_qdisc, mlxsw_sp_qdisc_walk_cb_clean_stats, + mlxsw_sp_port); +} + static int __mlxsw_sp_qdisc_ets_replace(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, @@ -1181,7 +1206,7 @@ __mlxsw_sp_qdisc_ets_replace(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_qdisc_ets_band *ets_band; struct mlxsw_sp_qdisc *child_qdisc; u8 old_priomap, new_priomap; - int i, band, backlog; + int i, band; int err; if (!ets_data) { @@ -1229,13 +1254,9 @@ __mlxsw_sp_qdisc_ets_replace(struct mlxsw_sp_port *mlxsw_sp_port, ets_band->prio_bitmap = new_priomap; - if (old_priomap != new_priomap && - child_qdisc->ops && child_qdisc->ops->clean_stats) { - backlog = child_qdisc->stats_base.backlog; - child_qdisc->ops->clean_stats(mlxsw_sp_port, - child_qdisc); - child_qdisc->stats_base.backlog = backlog; - } + if (old_priomap != new_priomap) + mlxsw_sp_qdisc_tree_clean_stats(mlxsw_sp_port, + child_qdisc); err = mlxsw_sp_qdisc_future_fifo_replace(mlxsw_sp_port, handle, band, child_qdisc); From c2792f38caae108cd1a082b9fa838d47f051ad50 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Tue, 19 Oct 2021 11:07:10 +0300 Subject: [PATCH 329/440] mlxsw: spectrum_qdisc: Validate qdisc topology A following patch will enable offloading qdiscs that are deeper than directly under root qdisc. Currently the topology validation consists of demanding a root qdisc position for ETS and PRIO. Since RED and TBF are considered classless, this is enough. In order to prevent some nonsensical combinations when RED and TBF become classful, introduce a more general topology validator. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_qdisc.c | 91 +++++++++++++++++-- 1 file changed, 82 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c index 412394e02d2a..b865fd3ccf31 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c @@ -157,8 +157,7 @@ mlxsw_sp_qdisc_walk_cb_find(struct mlxsw_sp_qdisc *qdisc, void *data) } static struct mlxsw_sp_qdisc * -mlxsw_sp_qdisc_find(struct mlxsw_sp_port *mlxsw_sp_port, u32 parent, - bool root_only) +mlxsw_sp_qdisc_find(struct mlxsw_sp_port *mlxsw_sp_port, u32 parent) { struct mlxsw_sp_qdisc_state *qdisc_state = mlxsw_sp_port->qdisc; @@ -166,8 +165,6 @@ mlxsw_sp_qdisc_find(struct mlxsw_sp_port *mlxsw_sp_port, u32 parent, return NULL; if (parent == TC_H_ROOT) return &qdisc_state->root_qdisc; - if (root_only) - return NULL; return mlxsw_sp_qdisc_walk(&qdisc_state->root_qdisc, mlxsw_sp_qdisc_walk_cb_find, &parent); } @@ -268,6 +265,78 @@ mlxsw_sp_qdisc_destroy(struct mlxsw_sp_port *mlxsw_sp_port, return err_hdroom ?: err; } +struct mlxsw_sp_qdisc_tree_validate { + bool forbid_ets; + bool forbid_tbf; + bool forbid_red; +}; + +static int +__mlxsw_sp_qdisc_tree_validate(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + struct mlxsw_sp_qdisc_tree_validate validate); + +static int +mlxsw_sp_qdisc_tree_validate_children(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + struct mlxsw_sp_qdisc_tree_validate validate) +{ + unsigned int i; + int err; + + for (i = 0; i < mlxsw_sp_qdisc->num_classes; i++) { + err = __mlxsw_sp_qdisc_tree_validate(&mlxsw_sp_qdisc->qdiscs[i], + validate); + if (err) + return err; + } + + return 0; +} + +static int +__mlxsw_sp_qdisc_tree_validate(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + struct mlxsw_sp_qdisc_tree_validate validate) +{ + if (!mlxsw_sp_qdisc->ops) + return 0; + + switch (mlxsw_sp_qdisc->ops->type) { + case MLXSW_SP_QDISC_FIFO: + break; + case MLXSW_SP_QDISC_RED: + if (validate.forbid_red) + return -EINVAL; + validate.forbid_red = true; + validate.forbid_ets = true; + break; + case MLXSW_SP_QDISC_TBF: + if (validate.forbid_tbf) + return -EINVAL; + validate.forbid_tbf = true; + validate.forbid_ets = true; + break; + case MLXSW_SP_QDISC_PRIO: + case MLXSW_SP_QDISC_ETS: + if (validate.forbid_ets) + return -EINVAL; + validate.forbid_ets = true; + break; + default: + WARN_ON(1); + return -EINVAL; + } + + return mlxsw_sp_qdisc_tree_validate_children(mlxsw_sp_qdisc, validate); +} + +static int mlxsw_sp_qdisc_tree_validate(struct mlxsw_sp_port *mlxsw_sp_port) +{ + struct mlxsw_sp_qdisc_tree_validate validate = {}; + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc; + + mlxsw_sp_qdisc = &mlxsw_sp_port->qdisc->root_qdisc; + return __mlxsw_sp_qdisc_tree_validate(mlxsw_sp_qdisc, validate); +} + static int mlxsw_sp_qdisc_create(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, @@ -310,6 +379,10 @@ static int mlxsw_sp_qdisc_create(struct mlxsw_sp_port *mlxsw_sp_port, mlxsw_sp_qdisc->num_classes = ops->num_classes; mlxsw_sp_qdisc->ops = ops; mlxsw_sp_qdisc->handle = handle; + err = mlxsw_sp_qdisc_tree_validate(mlxsw_sp_port); + if (err) + goto err_replace; + err = ops->replace(mlxsw_sp_port, handle, mlxsw_sp_qdisc, params); if (err) goto err_replace; @@ -748,7 +821,7 @@ static int __mlxsw_sp_setup_tc_red(struct mlxsw_sp_port *mlxsw_sp_port, { struct mlxsw_sp_qdisc *mlxsw_sp_qdisc; - mlxsw_sp_qdisc = mlxsw_sp_qdisc_find(mlxsw_sp_port, p->parent, false); + mlxsw_sp_qdisc = mlxsw_sp_qdisc_find(mlxsw_sp_port, p->parent); if (!mlxsw_sp_qdisc) return -EOPNOTSUPP; @@ -964,7 +1037,7 @@ static int __mlxsw_sp_setup_tc_tbf(struct mlxsw_sp_port *mlxsw_sp_port, { struct mlxsw_sp_qdisc *mlxsw_sp_qdisc; - mlxsw_sp_qdisc = mlxsw_sp_qdisc_find(mlxsw_sp_port, p->parent, false); + mlxsw_sp_qdisc = mlxsw_sp_qdisc_find(mlxsw_sp_port, p->parent); if (!mlxsw_sp_qdisc) return -EOPNOTSUPP; @@ -1070,7 +1143,7 @@ static int __mlxsw_sp_setup_tc_fifo(struct mlxsw_sp_port *mlxsw_sp_port, unsigned int band; u32 parent_handle; - mlxsw_sp_qdisc = mlxsw_sp_qdisc_find(mlxsw_sp_port, p->parent, false); + mlxsw_sp_qdisc = mlxsw_sp_qdisc_find(mlxsw_sp_port, p->parent); if (!mlxsw_sp_qdisc && p->handle == TC_H_UNSPEC) { parent_handle = TC_H_MAJ(p->parent); if (parent_handle != qdisc_state->future_handle) { @@ -1534,7 +1607,7 @@ static int __mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port, { struct mlxsw_sp_qdisc *mlxsw_sp_qdisc; - mlxsw_sp_qdisc = mlxsw_sp_qdisc_find(mlxsw_sp_port, p->parent, true); + mlxsw_sp_qdisc = mlxsw_sp_qdisc_find(mlxsw_sp_port, p->parent); if (!mlxsw_sp_qdisc) return -EOPNOTSUPP; @@ -1579,7 +1652,7 @@ static int __mlxsw_sp_setup_tc_ets(struct mlxsw_sp_port *mlxsw_sp_port, { struct mlxsw_sp_qdisc *mlxsw_sp_qdisc; - mlxsw_sp_qdisc = mlxsw_sp_qdisc_find(mlxsw_sp_port, p->parent, true); + mlxsw_sp_qdisc = mlxsw_sp_qdisc_find(mlxsw_sp_port, p->parent); if (!mlxsw_sp_qdisc) return -EOPNOTSUPP; From 2a18c08d75ee04a9159e4bbfb09dbb1e33ef8507 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Tue, 19 Oct 2021 11:07:11 +0300 Subject: [PATCH 330/440] mlxsw: spectrum_qdisc: Make RED, TBF offloads classful Permit offloading qdiscs below RED and TBF. In order to avoid having to implement trivial propagating callbacks for get_prio_bitmap and get_tclass_num, extend mlxsw_sp_qdisc_get_prio_bitmap() and ..._get_tclass_num() to handle the lack of the callback as a cue to forward the request to the parent. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_qdisc.c | 32 ++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c index b865fd3ccf31..ddb5ad88b350 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c @@ -207,6 +207,8 @@ static u8 mlxsw_sp_qdisc_get_prio_bitmap(struct mlxsw_sp_port *mlxsw_sp_port, if (!parent) return 0xff; + if (!parent->ops->get_prio_bitmap) + return mlxsw_sp_qdisc_get_prio_bitmap(mlxsw_sp_port, parent); return parent->ops->get_prio_bitmap(parent, mlxsw_sp_qdisc); } @@ -219,6 +221,8 @@ static int mlxsw_sp_qdisc_get_tclass_num(struct mlxsw_sp_port *mlxsw_sp_port, if (!parent) return MLXSW_SP_PORT_DEFAULT_TCLASS; + if (!parent->ops->get_tclass_num) + return mlxsw_sp_qdisc_get_tclass_num(mlxsw_sp_port, parent); return parent->ops->get_tclass_num(parent, mlxsw_sp_qdisc); } @@ -689,6 +693,14 @@ mlxsw_sp_qdisc_red_check_params(struct mlxsw_sp_port *mlxsw_sp_port, return 0; } +static int +mlxsw_sp_qdisc_future_fifo_replace(struct mlxsw_sp_port *mlxsw_sp_port, + u32 handle, unsigned int band, + struct mlxsw_sp_qdisc *child_qdisc); +static void +mlxsw_sp_qdisc_future_fifos_init(struct mlxsw_sp_port *mlxsw_sp_port, + u32 handle); + static int mlxsw_sp_qdisc_red_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, @@ -699,6 +711,13 @@ mlxsw_sp_qdisc_red_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, int tclass_num; u32 min, max; u64 prob; + int err; + + err = mlxsw_sp_qdisc_future_fifo_replace(mlxsw_sp_port, handle, 0, + &mlxsw_sp_qdisc->qdiscs[0]); + if (err) + return err; + mlxsw_sp_qdisc_future_fifos_init(mlxsw_sp_port, TC_H_UNSPEC); tclass_num = mlxsw_sp_qdisc_get_tclass_num(mlxsw_sp_port, mlxsw_sp_qdisc); @@ -797,7 +816,10 @@ static struct mlxsw_sp_qdisc * mlxsw_sp_qdisc_leaf_find_class(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, u32 parent) { - return NULL; + /* RED and TBF are formally classful qdiscs, but all class references, + * including X:0, just refer to the same one class. + */ + return &mlxsw_sp_qdisc->qdiscs[0]; } static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_red = { @@ -810,6 +832,7 @@ static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_red = { .get_xstats = mlxsw_sp_qdisc_get_red_xstats, .clean_stats = mlxsw_sp_setup_tc_qdisc_red_clean_stats, .find_class = mlxsw_sp_qdisc_leaf_find_class, + .num_classes = 1, }; static int mlxsw_sp_qdisc_graft(struct mlxsw_sp_port *mlxsw_sp_port, @@ -979,6 +1002,12 @@ mlxsw_sp_qdisc_tbf_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, u8 burst_size; int err; + err = mlxsw_sp_qdisc_future_fifo_replace(mlxsw_sp_port, handle, 0, + &mlxsw_sp_qdisc->qdiscs[0]); + if (err) + return err; + mlxsw_sp_qdisc_future_fifos_init(mlxsw_sp_port, TC_H_UNSPEC); + tclass_num = mlxsw_sp_qdisc_get_tclass_num(mlxsw_sp_port, mlxsw_sp_qdisc); @@ -1030,6 +1059,7 @@ static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_tbf = { .get_stats = mlxsw_sp_qdisc_get_tbf_stats, .clean_stats = mlxsw_sp_setup_tc_qdisc_leaf_clean_stats, .find_class = mlxsw_sp_qdisc_leaf_find_class, + .num_classes = 1, }; static int __mlxsw_sp_setup_tc_tbf(struct mlxsw_sp_port *mlxsw_sp_port, From 29c1eac2e64eda847d6ea51a00c2021198447ed9 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Tue, 19 Oct 2021 11:07:12 +0300 Subject: [PATCH 331/440] selftests: mlxsw: Add a test for un/offloadable qdisc trees This checks that various qdisc configurations either are or are not offloaded. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../drivers/net/mlxsw/sch_offload.sh | 276 ++++++++++++++++++ 1 file changed, 276 insertions(+) create mode 100755 tools/testing/selftests/drivers/net/mlxsw/sch_offload.sh diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_offload.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_offload.sh new file mode 100755 index 000000000000..ade79ef08de3 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_offload.sh @@ -0,0 +1,276 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test qdisc offload indication + + +ALL_TESTS=" + test_root + test_etsprio +" +NUM_NETIFS=1 +lib_dir=$(dirname $0)/../../../net/forwarding +source $lib_dir/lib.sh + +check_not_offloaded() +{ + local handle=$1; shift + local h + local offloaded + + h=$(qdisc_stats_get $h1 "$handle" .handle) + [[ $h == '"'$handle'"' ]] + check_err $? "Qdisc with handle $handle does not exist" + + offloaded=$(qdisc_stats_get $h1 "$handle" .offloaded) + [[ $offloaded == true ]] + check_fail $? "Qdisc with handle $handle offloaded, but should not be" +} + +check_all_offloaded() +{ + local handle=$1; shift + + if [[ ! -z $handle ]]; then + local offloaded=$(qdisc_stats_get $h1 "$handle" .offloaded) + [[ $offloaded == true ]] + check_err $? "Qdisc with handle $handle not offloaded" + fi + + local unoffloaded=$(tc q sh dev $h1 invisible | + grep -v offloaded | + sed s/root/parent\ root/ | + cut -d' ' -f 5) + [[ -z $unoffloaded ]] + check_err $? "Qdiscs with following parents not offloaded: $unoffloaded" + + pre_cleanup +} + +with_ets() +{ + local handle=$1; shift + local locus=$1; shift + + tc qdisc add dev $h1 $locus handle $handle \ + ets bands 8 priomap 7 6 5 4 3 2 1 0 + "$@" + tc qdisc del dev $h1 $locus +} + +with_prio() +{ + local handle=$1; shift + local locus=$1; shift + + tc qdisc add dev $h1 $locus handle $handle \ + prio bands 8 priomap 7 6 5 4 3 2 1 0 + "$@" + tc qdisc del dev $h1 $locus +} + +with_red() +{ + local handle=$1; shift + local locus=$1; shift + + tc qdisc add dev $h1 $locus handle $handle \ + red limit 1000000 min 200000 max 300000 probability 0.5 avpkt 1500 + "$@" + tc qdisc del dev $h1 $locus +} + +with_tbf() +{ + local handle=$1; shift + local locus=$1; shift + + tc qdisc add dev $h1 $locus handle $handle \ + tbf rate 400Mbit burst 128K limit 1M + "$@" + tc qdisc del dev $h1 $locus +} + +with_pfifo() +{ + local handle=$1; shift + local locus=$1; shift + + tc qdisc add dev $h1 $locus handle $handle pfifo limit 100K + "$@" + tc qdisc del dev $h1 $locus +} + +with_bfifo() +{ + local handle=$1; shift + local locus=$1; shift + + tc qdisc add dev $h1 $locus handle $handle bfifo limit 100K + "$@" + tc qdisc del dev $h1 $locus +} + +with_drr() +{ + local handle=$1; shift + local locus=$1; shift + + tc qdisc add dev $h1 $locus handle $handle drr + "$@" + tc qdisc del dev $h1 $locus +} + +with_qdiscs() +{ + local handle=$1; shift + local parent=$1; shift + local kind=$1; shift + local next_handle=$((handle * 2)) + local locus; + + if [[ $kind == "--" ]]; then + local cmd=$1; shift + $cmd $(printf %x: $parent) "$@" + else + if ((parent == 0)); then + locus=root + else + locus=$(printf "parent %x:1" $parent) + fi + + with_$kind $(printf %x: $handle) "$locus" \ + with_qdiscs $next_handle $handle "$@" + fi +} + +get_name() +{ + local parent=$1; shift + local name=$(echo "" "${@^^}" | tr ' ' -) + + if ((parent != 0)); then + kind=$(qdisc_stats_get $h1 $parent: .kind) + kind=${kind%\"} + kind=${kind#\"} + name="-${kind^^}$name" + fi + + echo root$name +} + +do_test_offloaded() +{ + local handle=$1; shift + local parent=$1; shift + + RET=0 + with_qdiscs $handle $parent "$@" -- check_all_offloaded + log_test $(get_name $parent "$@")" offloaded" +} + +do_test_nooffload() +{ + local handle=$1; shift + local parent=$1; shift + + local name=$(echo "${@^^}" | tr ' ' -) + local kind + + RET=0 + with_qdiscs $handle $parent "$@" -- check_not_offloaded + log_test $(get_name $parent "$@")" not offloaded" +} + +do_test_combinations() +{ + local handle=$1; shift + local parent=$1; shift + + local cont + local leaf + local fifo + + for cont in "" ets prio; do + for leaf in "" red tbf "red tbf" "tbf red"; do + for fifo in "" pfifo bfifo; do + if [[ -z "$cont$leaf$fifo" ]]; then + continue + fi + do_test_offloaded $handle $parent \ + $cont $leaf $fifo + done + done + done + + for cont in ets prio; do + for leaf in red tbf; do + do_test_nooffload $handle $parent $cont red tbf $leaf + do_test_nooffload $handle $parent $cont tbf red $leaf + done + for leaf in "red red" "tbf tbf"; do + do_test_nooffload $handle $parent $cont $leaf + done + done + + do_test_nooffload $handle $parent drr +} + +test_root() +{ + do_test_combinations 1 0 +} + +do_test_etsprio() +{ + local parent=$1; shift + local tbfpfx=$1; shift + local cont + + for cont in ets prio; do + RET=0 + with_$cont 8: "$parent" \ + with_red 11: "parent 8:1" \ + with_red 12: "parent 8:2" \ + with_tbf 13: "parent 8:3" \ + with_tbf 14: "parent 8:4" \ + check_all_offloaded + log_test "root$tbfpfx-ETS-{RED,TBF} offloaded" + + RET=0 + with_$cont 8: "$parent" \ + with_red 81: "parent 8:1" \ + with_tbf 811: "parent 81:1" \ + with_tbf 84: "parent 8:4" \ + with_red 841: "parent 84:1" \ + check_all_offloaded + log_test "root$tbfpfx-ETS-{RED-TBF,TBF-RED} offloaded" + + RET=0 + with_$cont 8: "$parent" \ + with_red 81: "parent 8:1" \ + with_tbf 811: "parent 81:1" \ + with_bfifo 8111: "parent 811:1" \ + with_tbf 82: "parent 8:2" \ + with_red 821: "parent 82:1" \ + with_bfifo 8211: "parent 821:1" \ + check_all_offloaded + log_test "root$tbfpfx-ETS-{RED-TBF-bFIFO,TBF-RED-bFIFO} offloaded" + done +} + +test_etsprio() +{ + do_test_etsprio root "" +} + +cleanup() +{ + tc qdisc del dev $h1 root &>/dev/null +} + +trap cleanup EXIT +h1=${NETIFS[p1]} +tests_run + +exit $EXIT_STATUS From 15c343eb0588adca003879b12a916d54e9f8918d Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 18 Oct 2021 07:29:21 -0700 Subject: [PATCH 332/440] ethernet: mv643xx: use eth_hw_addr_set() Commit 406f42fa0d3c ("net-next: When a bond have a massive amount of VLANs...") introduced a rbtree for faster Ethernet address look up. To maintain netdev->dev_addr in this tree we need to make all the writes to it got through appropriate helpers. Read the address into an array on the stack, then call eth_hw_addr_set(). Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mv643xx_eth.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c index a63d9a5c8059..bb14fa2241a3 100644 --- a/drivers/net/ethernet/marvell/mv643xx_eth.c +++ b/drivers/net/ethernet/marvell/mv643xx_eth.c @@ -2925,10 +2925,14 @@ static void set_params(struct mv643xx_eth_private *mp, struct net_device *dev = mp->dev; unsigned int tx_ring_size; - if (is_valid_ether_addr(pd->mac_addr)) + if (is_valid_ether_addr(pd->mac_addr)) { eth_hw_addr_set(dev, pd->mac_addr); - else - uc_addr_get(mp, dev->dev_addr); + } else { + u8 addr[ETH_ALEN]; + + uc_addr_get(mp, addr); + eth_hw_addr_set(dev, addr); + } mp->rx_ring_size = DEFAULT_RX_QUEUE_SIZE; if (pd->rx_queue_size) From 4789b57af37f75024e02954dba23bebe25b7133a Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 18 Oct 2021 07:29:22 -0700 Subject: [PATCH 333/440] ethernet: sky2/skge: use eth_hw_addr_set() Commit 406f42fa0d3c ("net-next: When a bond have a massive amount of VLANs...") introduced a rbtree for faster Ethernet address look up. To maintain netdev->dev_addr in this tree we need to make all the writes to it got through appropriate helpers. Read the address into an array on the stack, then call eth_hw_addr_set(). Signed-off-by: Jakub Kicinski Acked-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/skge.c | 4 +++- drivers/net/ethernet/marvell/sky2.c | 9 ++++++--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/marvell/skge.c b/drivers/net/ethernet/marvell/skge.c index ac48dcca268c..0c864e5bf0a6 100644 --- a/drivers/net/ethernet/marvell/skge.c +++ b/drivers/net/ethernet/marvell/skge.c @@ -3810,6 +3810,7 @@ static struct net_device *skge_devinit(struct skge_hw *hw, int port, { struct skge_port *skge; struct net_device *dev = alloc_etherdev(sizeof(*skge)); + u8 addr[ETH_ALEN]; if (!dev) return NULL; @@ -3862,7 +3863,8 @@ static struct net_device *skge_devinit(struct skge_hw *hw, int port, } /* read the mac address */ - memcpy_fromio(dev->dev_addr, hw->regs + B2_MAC_1 + port*8, ETH_ALEN); + memcpy_fromio(addr, hw->regs + B2_MAC_1 + port*8, ETH_ALEN); + eth_hw_addr_set(dev, addr); return dev; } diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c index 0da18b3f1c01..5abb55191e8e 100644 --- a/drivers/net/ethernet/marvell/sky2.c +++ b/drivers/net/ethernet/marvell/sky2.c @@ -4721,9 +4721,12 @@ static struct net_device *sky2_init_netdev(struct sky2_hw *hw, unsigned port, * 2) from internal registers set by bootloader */ ret = of_get_ethdev_address(hw->pdev->dev.of_node, dev); - if (ret) - memcpy_fromio(dev->dev_addr, hw->regs + B2_MAC_1 + port * 8, - ETH_ALEN); + if (ret) { + u8 addr[ETH_ALEN]; + + memcpy_fromio(addr, hw->regs + B2_MAC_1 + port * 8, ETH_ALEN); + eth_hw_addr_set(dev, addr); + } /* if the address is invalid, use a random value */ if (!is_valid_ether_addr(dev->dev_addr)) { From b814d328692356a1f1cf1e8e1ee6de99bf15f4e5 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 18 Oct 2021 07:29:23 -0700 Subject: [PATCH 334/440] ethernet: lpc: use eth_hw_addr_set() Commit 406f42fa0d3c ("net-next: When a bond have a massive amount of VLANs...") introduced a rbtree for faster Ethernet address look up. To maintain netdev->dev_addr in this tree we need to make all the writes to it got through appropriate helpers. Read the address into an array on the stack, then call eth_hw_addr_set(). Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/nxp/lpc_eth.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/nxp/lpc_eth.c b/drivers/net/ethernet/nxp/lpc_eth.c index fbfbf94e0377..a63cc295b979 100644 --- a/drivers/net/ethernet/nxp/lpc_eth.c +++ b/drivers/net/ethernet/nxp/lpc_eth.c @@ -1232,6 +1232,7 @@ static int lpc_eth_drv_probe(struct platform_device *pdev) struct net_device *ndev; dma_addr_t dma_handle; struct resource *res; + u8 addr[ETH_ALEN]; int irq, ret; /* Setup network interface for RMII or MII mode */ @@ -1347,7 +1348,8 @@ static int lpc_eth_drv_probe(struct platform_device *pdev) pldat->phy_node = of_parse_phandle(np, "phy-handle", 0); /* Get MAC address from current HW setting (POR state is all zeros) */ - __lpc_get_mac(pldat, ndev->dev_addr); + __lpc_get_mac(pldat, addr); + eth_hw_addr_set(ndev, addr); if (!is_valid_ether_addr(ndev->dev_addr)) { of_get_ethdev_address(np, ndev); From 88e102e8777eccb569382d0cd289e88f0c0c6578 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 18 Oct 2021 07:29:24 -0700 Subject: [PATCH 335/440] ethernet: netxen: use eth_hw_addr_set() Commit 406f42fa0d3c ("net-next: When a bond have a massive amount of VLANs...") introduced a rbtree for faster Ethernet address look up. To maintain netdev->dev_addr in this tree we need to make all the writes to it got through appropriate helpers. Invert the address into an array on the stack, then call eth_hw_addr_set(). Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c index b4e094e6f58c..4cfab4434e80 100644 --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c @@ -463,6 +463,7 @@ netxen_read_mac_addr(struct netxen_adapter *adapter) u64 mac_addr; struct net_device *netdev = adapter->netdev; struct pci_dev *pdev = adapter->pdev; + u8 addr[ETH_ALEN]; if (NX_IS_REVISION_P3(adapter->ahw.revision_id)) { if (netxen_p3_get_mac_addr(adapter, &mac_addr) != 0) @@ -474,7 +475,8 @@ netxen_read_mac_addr(struct netxen_adapter *adapter) p = (unsigned char *)&mac_addr; for (i = 0; i < 6; i++) - netdev->dev_addr[i] = *(p + 5 - i); + addr[i] = *(p + 5 - i); + eth_hw_addr_set(netdev, addr); memcpy(adapter->mac_addr, netdev->dev_addr, netdev->addr_len); From 1c5d09d58748b74d8d03a9beadf691f8d9ec8622 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 18 Oct 2021 07:29:25 -0700 Subject: [PATCH 336/440] ethernet: r8169: use eth_hw_addr_set() Commit 406f42fa0d3c ("net-next: When a bond have a massive amount of VLANs...") introduced a rbtree for faster Ethernet address look up. To maintain netdev->dev_addr in this tree we need to make all the writes to it got through appropriate helpers. Read the address into an array on the stack, then call eth_hw_addr_set(). Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/r8169_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 0199914440ab..ee6c9c842012 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -5217,7 +5217,7 @@ static int rtl_get_ether_clk(struct rtl8169_private *tp) static void rtl_init_mac_address(struct rtl8169_private *tp) { struct net_device *dev = tp->dev; - u8 *mac_addr = dev->dev_addr; + u8 mac_addr[ETH_ALEN]; int rc; rc = eth_platform_get_mac_address(tp_to_dev(tp), mac_addr); @@ -5235,6 +5235,7 @@ static void rtl_init_mac_address(struct rtl8169_private *tp) eth_hw_addr_random(dev); dev_warn(tp_to_dev(tp), "can't read MAC address, setting random one\n"); done: + eth_hw_addr_set(dev, mac_addr); rtl_rar_set(tp, mac_addr); } From 0b08956cd532786b7442338d700e6950e00b4a92 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 18 Oct 2021 07:29:26 -0700 Subject: [PATCH 337/440] ethernet: renesas: use eth_hw_addr_set() Commit 406f42fa0d3c ("net-next: When a bond have a massive amount of VLANs...") introduced a rbtree for faster Ethernet address look up. To maintain netdev->dev_addr in this tree we need to make all the writes to it got through appropriate helpers. Break the address up into an array on the stack, then call eth_hw_addr_set(). Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/ravb_main.c | 14 ++++++++------ drivers/net/ethernet/renesas/sh_eth.c | 14 ++++++++------ 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index e5243cc87a19..b4c597f4040c 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -136,13 +136,15 @@ static void ravb_read_mac_address(struct device_node *np, if (ret) { u32 mahr = ravb_read(ndev, MAHR); u32 malr = ravb_read(ndev, MALR); + u8 addr[ETH_ALEN]; - ndev->dev_addr[0] = (mahr >> 24) & 0xFF; - ndev->dev_addr[1] = (mahr >> 16) & 0xFF; - ndev->dev_addr[2] = (mahr >> 8) & 0xFF; - ndev->dev_addr[3] = (mahr >> 0) & 0xFF; - ndev->dev_addr[4] = (malr >> 8) & 0xFF; - ndev->dev_addr[5] = (malr >> 0) & 0xFF; + addr[0] = (mahr >> 24) & 0xFF; + addr[1] = (mahr >> 16) & 0xFF; + addr[2] = (mahr >> 8) & 0xFF; + addr[3] = (mahr >> 0) & 0xFF; + addr[4] = (malr >> 8) & 0xFF; + addr[5] = (malr >> 0) & 0xFF; + eth_hw_addr_set(ndev, addr); } } diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 0a7d23df45f2..a3fbb2221c9a 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -1157,13 +1157,15 @@ static void read_mac_address(struct net_device *ndev, unsigned char *mac) } else { u32 mahr = sh_eth_read(ndev, MAHR); u32 malr = sh_eth_read(ndev, MALR); + u8 addr[ETH_ALEN]; - ndev->dev_addr[0] = (mahr >> 24) & 0xFF; - ndev->dev_addr[1] = (mahr >> 16) & 0xFF; - ndev->dev_addr[2] = (mahr >> 8) & 0xFF; - ndev->dev_addr[3] = (mahr >> 0) & 0xFF; - ndev->dev_addr[4] = (malr >> 8) & 0xFF; - ndev->dev_addr[5] = (malr >> 0) & 0xFF; + addr[0] = (mahr >> 24) & 0xFF; + addr[1] = (mahr >> 16) & 0xFF; + addr[2] = (mahr >> 8) & 0xFF; + addr[3] = (mahr >> 0) & 0xFF; + addr[4] = (malr >> 8) & 0xFF; + addr[5] = (malr >> 0) & 0xFF; + eth_hw_addr_set(ndev, addr); } } From 298b0e0c5fec65081e308a4aa2f16ec8a91070e1 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 18 Oct 2021 07:29:27 -0700 Subject: [PATCH 338/440] ethernet: rocker: use eth_hw_addr_set() Commit 406f42fa0d3c ("net-next: When a bond have a massive amount of VLANs...") introduced a rbtree for faster Ethernet address look up. To maintain netdev->dev_addr in this tree we need to make all the writes to it got through appropriate helpers. Read the address into an array on the stack, then call eth_hw_addr_set(). Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/rocker/rocker_main.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c index f28c0c36b606..ba4062881eed 100644 --- a/drivers/net/ethernet/rocker/rocker_main.c +++ b/drivers/net/ethernet/rocker/rocker_main.c @@ -2545,11 +2545,13 @@ static void rocker_port_dev_addr_init(struct rocker_port *rocker_port) { const struct rocker *rocker = rocker_port->rocker; const struct pci_dev *pdev = rocker->pdev; + u8 addr[ETH_ALEN]; int err; - err = rocker_cmd_get_port_settings_macaddr(rocker_port, - rocker_port->dev->dev_addr); - if (err) { + err = rocker_cmd_get_port_settings_macaddr(rocker_port, addr); + if (!err) { + eth_hw_addr_set(rocker_port->dev, addr); + } else { dev_warn(&pdev->dev, "failed to get mac address, using random\n"); eth_hw_addr_random(rocker_port->dev); } From 15fa05bf41abc3703b0c48d8adc9d57d8411f726 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 18 Oct 2021 07:29:28 -0700 Subject: [PATCH 339/440] ethernet: sxgbe: use eth_hw_addr_set() Commit 406f42fa0d3c ("net-next: When a bond have a massive amount of VLANs...") introduced a rbtree for faster Ethernet address look up. To maintain netdev->dev_addr in this tree we need to make all the writes to it got through appropriate helpers. Read the address into an array on the stack, then call eth_hw_addr_set(). Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c index 6781aa636d58..32161a56726c 100644 --- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c +++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c @@ -931,10 +931,13 @@ static int sxgbe_get_hw_features(struct sxgbe_priv_data * const priv) static void sxgbe_check_ether_addr(struct sxgbe_priv_data *priv) { if (!is_valid_ether_addr(priv->dev->dev_addr)) { + u8 addr[ETH_ALEN]; + priv->hw->mac->get_umac_addr((void __iomem *) - priv->ioaddr, - priv->dev->dev_addr, 0); - if (!is_valid_ether_addr(priv->dev->dev_addr)) + priv->ioaddr, addr, 0); + if (is_valid_ether_addr(addr)) + eth_hw_addr_set(priv->dev, addr); + else eth_hw_addr_random(priv->dev); } dev_info(priv->device, "device MAC address %pM\n", From f60e8b06e0cc00c03c775a6b6f186782c5c8df8a Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 18 Oct 2021 07:29:29 -0700 Subject: [PATCH 340/440] ethernet: sis190: use eth_hw_addr_set() Commit 406f42fa0d3c ("net-next: When a bond have a massive amount of VLANs...") introduced a rbtree for faster Ethernet address look up. To maintain netdev->dev_addr in this tree we need to make all the writes to it got through appropriate helpers. Read the address into an array on the stack, then call eth_hw_addr_set(). Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/sis/sis190.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/sis/sis190.c b/drivers/net/ethernet/sis/sis190.c index 5e66e3f3aafc..216bb2d34d7c 100644 --- a/drivers/net/ethernet/sis/sis190.c +++ b/drivers/net/ethernet/sis/sis190.c @@ -1631,6 +1631,7 @@ static int sis190_get_mac_addr_from_apc(struct pci_dev *pdev, static const u16 ids[] = { 0x0965, 0x0966, 0x0968 }; struct sis190_private *tp = netdev_priv(dev); struct pci_dev *isa_bridge; + u8 addr[ETH_ALEN]; u8 reg, tmp8; unsigned int i; @@ -1659,8 +1660,9 @@ static int sis190_get_mac_addr_from_apc(struct pci_dev *pdev, for (i = 0; i < ETH_ALEN; i++) { outb(0x9 + i, 0x78); - dev->dev_addr[i] = inb(0x79); + addr[i] = inb(0x79); } + eth_hw_addr_set(dev, addr); outb(0x12, 0x78); reg = inb(0x79); From 74fad215ee3d27b6e6aae1acc0c2c4dade7a68b1 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 18 Oct 2021 07:29:30 -0700 Subject: [PATCH 341/440] ethernet: sis900: use eth_hw_addr_set() Commit 406f42fa0d3c ("net-next: When a bond have a massive amount of VLANs...") introduced a rbtree for faster Ethernet address look up. To maintain netdev->dev_addr in this tree we need to make all the writes to it got through appropriate helpers. Read the address into an array on the stack, then call eth_hw_addr_set(). Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/sis/sis900.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/sis/sis900.c b/drivers/net/ethernet/sis/sis900.c index 3f5717a1874f..cc2d907c4c4b 100644 --- a/drivers/net/ethernet/sis/sis900.c +++ b/drivers/net/ethernet/sis/sis900.c @@ -292,6 +292,7 @@ static int sis630e_get_mac_addr(struct pci_dev *pci_dev, struct net_device *net_dev) { struct pci_dev *isa_bridge = NULL; + u8 addr[ETH_ALEN]; u8 reg; int i; @@ -308,8 +309,9 @@ static int sis630e_get_mac_addr(struct pci_dev *pci_dev, for (i = 0; i < 6; i++) { outb(0x09 + i, 0x70); - ((u8 *)(net_dev->dev_addr))[i] = inb(0x71); + addr[i] = inb(0x71); } + eth_hw_addr_set(net_dev, addr); pci_write_config_byte(isa_bridge, 0x48, reg & ~0x40); pci_dev_put(isa_bridge); From 02bfb6beb695dbf5fb8119f4700516be09da70aa Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 18 Oct 2021 07:29:31 -0700 Subject: [PATCH 342/440] ethernet: smc91x: use eth_hw_addr_set() Commit 406f42fa0d3c ("net-next: When a bond have a massive amount of VLANs...") introduced a rbtree for faster Ethernet address look up. To maintain netdev->dev_addr in this tree we need to make all the writes to it got through appropriate helpers. Read the address into an array on the stack, then call eth_hw_addr_set(). Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/smsc/smc911x.c | 4 +++- drivers/net/ethernet/smsc/smc91x.c | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/smsc/smc911x.c b/drivers/net/ethernet/smsc/smc911x.c index b008b4e8a2a5..89381f796985 100644 --- a/drivers/net/ethernet/smsc/smc911x.c +++ b/drivers/net/ethernet/smsc/smc911x.c @@ -1788,6 +1788,7 @@ static int smc911x_probe(struct net_device *dev) struct dma_slave_config config; dma_cap_mask_t mask; #endif + u8 addr[ETH_ALEN]; DBG(SMC_DEBUG_FUNC, dev, "--> %s\n", __func__); @@ -1892,7 +1893,8 @@ static int smc911x_probe(struct net_device *dev) spin_lock_init(&lp->lock); /* Get the MAC address */ - SMC_GET_MAC_ADDR(lp, dev->dev_addr); + SMC_GET_MAC_ADDR(lp, addr); + eth_hw_addr_set(dev, addr); /* now, reset the chip, and put it into a known state */ smc911x_reset(dev); diff --git a/drivers/net/ethernet/smsc/smc91x.c b/drivers/net/ethernet/smsc/smc91x.c index 813ea941b91a..a31c159e96ea 100644 --- a/drivers/net/ethernet/smsc/smc91x.c +++ b/drivers/net/ethernet/smsc/smc91x.c @@ -1851,6 +1851,7 @@ static int smc_probe(struct net_device *dev, void __iomem *ioaddr, int retval; unsigned int val, revision_register; const char *version_string; + u8 addr[ETH_ALEN]; DBG(2, dev, "%s: %s\n", CARDNAME, __func__); @@ -1922,7 +1923,8 @@ static int smc_probe(struct net_device *dev, void __iomem *ioaddr, /* Get the MAC address */ SMC_SELECT_BANK(lp, 1); - SMC_GET_MAC_ADDR(lp, dev->dev_addr); + SMC_GET_MAC_ADDR(lp, addr); + eth_hw_addr_set(dev, addr); /* now, reset the chip, and put it into a known state */ smc_reset(dev); From f15fef4c0675d4117d71ae5233e864fd3db14761 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 18 Oct 2021 07:29:32 -0700 Subject: [PATCH 343/440] ethernet: smsc: use eth_hw_addr_set() Commit 406f42fa0d3c ("net-next: When a bond have a massive amount of VLANs...") introduced a rbtree for faster Ethernet address look up. To maintain netdev->dev_addr in this tree we need to make all the writes to it got through appropriate helpers. Break the address up into an array on the stack, then call eth_hw_addr_set(). Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/smsc/smsc911x.c | 14 ++++++++------ drivers/net/ethernet/smsc/smsc9420.c | 16 +++++++++------- 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c index 73bcc6f2bb6e..7a50ba00f8ae 100644 --- a/drivers/net/ethernet/smsc/smsc911x.c +++ b/drivers/net/ethernet/smsc/smsc911x.c @@ -2162,13 +2162,15 @@ static void smsc911x_read_mac_address(struct net_device *dev) struct smsc911x_data *pdata = netdev_priv(dev); u32 mac_high16 = smsc911x_mac_read(pdata, ADDRH); u32 mac_low32 = smsc911x_mac_read(pdata, ADDRL); + u8 addr[ETH_ALEN]; - dev->dev_addr[0] = (u8)(mac_low32); - dev->dev_addr[1] = (u8)(mac_low32 >> 8); - dev->dev_addr[2] = (u8)(mac_low32 >> 16); - dev->dev_addr[3] = (u8)(mac_low32 >> 24); - dev->dev_addr[4] = (u8)(mac_high16); - dev->dev_addr[5] = (u8)(mac_high16 >> 8); + addr[0] = (u8)(mac_low32); + addr[1] = (u8)(mac_low32 >> 8); + addr[2] = (u8)(mac_low32 >> 16); + addr[3] = (u8)(mac_low32 >> 24); + addr[4] = (u8)(mac_high16); + addr[5] = (u8)(mac_high16 >> 8); + eth_hw_addr_set(dev, addr); } /* Initializing private device structures, only called from probe */ diff --git a/drivers/net/ethernet/smsc/smsc9420.c b/drivers/net/ethernet/smsc/smsc9420.c index d207c0b463ab..d937af18973e 100644 --- a/drivers/net/ethernet/smsc/smsc9420.c +++ b/drivers/net/ethernet/smsc/smsc9420.c @@ -416,6 +416,7 @@ static void smsc9420_set_mac_address(struct net_device *dev) static void smsc9420_check_mac_address(struct net_device *dev) { struct smsc9420_pdata *pd = netdev_priv(dev); + u8 addr[ETH_ALEN]; /* Check if mac address has been specified when bringing interface up */ if (is_valid_ether_addr(dev->dev_addr)) { @@ -427,15 +428,16 @@ static void smsc9420_check_mac_address(struct net_device *dev) * it will already have been set */ u32 mac_high16 = smsc9420_reg_read(pd, ADDRH); u32 mac_low32 = smsc9420_reg_read(pd, ADDRL); - dev->dev_addr[0] = (u8)(mac_low32); - dev->dev_addr[1] = (u8)(mac_low32 >> 8); - dev->dev_addr[2] = (u8)(mac_low32 >> 16); - dev->dev_addr[3] = (u8)(mac_low32 >> 24); - dev->dev_addr[4] = (u8)(mac_high16); - dev->dev_addr[5] = (u8)(mac_high16 >> 8); + addr[0] = (u8)(mac_low32); + addr[1] = (u8)(mac_low32 >> 8); + addr[2] = (u8)(mac_low32 >> 16); + addr[3] = (u8)(mac_low32 >> 24); + addr[4] = (u8)(mac_high16); + addr[5] = (u8)(mac_high16 >> 8); - if (is_valid_ether_addr(dev->dev_addr)) { + if (is_valid_ether_addr(addr)) { /* eeprom values are valid so use them */ + eth_hw_addr_set(dev, addr); netif_dbg(pd, probe, pd->dev, "Mac Address is read from EEPROM\n"); } else { From e80094a473eefad9d856ce3ab0d7afdbb64800c4 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 18 Oct 2021 14:10:02 -0700 Subject: [PATCH 344/440] ethernet: add a helper for assigning port addresses We have 5 drivers which offset base MAC addr by port id. Create a helper for them. This helper takes care of overflows, which some drivers did not do, please complain if that's going to break anything! Signed-off-by: Jakub Kicinski Reviewed-by: Vladimir Oltean Reviewed-by: Shannon Nelson Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- include/linux/etherdevice.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index 23681c3d3b8a..2ad71cc90b37 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -551,6 +551,27 @@ static inline unsigned long compare_ether_header(const void *a, const void *b) #endif } +/** + * eth_hw_addr_gen - Generate and assign Ethernet address to a port + * @dev: pointer to port's net_device structure + * @base_addr: base Ethernet address + * @id: offset to add to the base address + * + * Generate a MAC address using a base address and an offset and assign it + * to a net_device. Commonly used by switch drivers which need to compute + * addresses for all their ports. addr_assign_type is not changed. + */ +static inline void eth_hw_addr_gen(struct net_device *dev, const u8 *base_addr, + unsigned int id) +{ + u64 u = ether_addr_to_u64(base_addr); + u8 addr[ETH_ALEN]; + + u += id; + u64_to_ether_addr(u, addr); + eth_hw_addr_set(dev, addr); +} + /** * eth_skb_pad - Pad buffer to mininum number of octets for Ethernet frame * @skb: Buffer to pad From 53fdcce6ab932dbd8f1df75873b6f303dc85751f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 18 Oct 2021 14:10:03 -0700 Subject: [PATCH 345/440] ethernet: ocelot: use eth_hw_addr_gen() Commit 406f42fa0d3c ("net-next: When a bond have a massive amount of VLANs...") introduced a rbtree for faster Ethernet address look up. To maintain netdev->dev_addr in this tree we need to make all the writes to it got through appropriate helpers. Signed-off-by: Jakub Kicinski Reviewed-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/ethernet/mscc/ocelot_net.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c index 9992bf06311d..affa9649f490 100644 --- a/drivers/net/ethernet/mscc/ocelot_net.c +++ b/drivers/net/ethernet/mscc/ocelot_net.c @@ -1705,8 +1705,7 @@ int ocelot_probe_port(struct ocelot *ocelot, int port, struct regmap *target, NETIF_F_HW_TC; dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_TC; - eth_hw_addr_set(dev, ocelot->base_mac); - dev->dev_addr[ETH_ALEN - 1] += port; + eth_hw_addr_gen(dev, ocelot->base_mac, port); ocelot_mact_learn(ocelot, PGID_CPU, dev->dev_addr, ocelot_port->pvid_vlan.vid, ENTRYTYPE_LOCKED); From 8eb8192ea2915a783d65a29138f6fceee4d81cb2 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 18 Oct 2021 14:10:04 -0700 Subject: [PATCH 346/440] ethernet: prestera: use eth_hw_addr_gen() Commit 406f42fa0d3c ("net-next: When a bond have a massive amount of VLANs...") introduced a rbtree for faster Ethernet address look up. To maintain netdev->dev_addr in this tree we need to make all the writes to it got through appropriate helpers. Vadym and Taras report that the current behavior of the driver is not exactly expected and it's better to add the port id in like other drivers do. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/prestera/prestera_main.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/prestera/prestera_main.c b/drivers/net/ethernet/marvell/prestera/prestera_main.c index b667f560b931..d0d5a229d19d 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_main.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_main.c @@ -338,11 +338,14 @@ static int prestera_port_create(struct prestera_switch *sw, u32 id) goto err_port_init; } + eth_hw_addr_gen(dev, sw->base_mac, port->fp_id); /* firmware requires that port's MAC address consist of the first * 5 bytes of the base MAC address */ - memcpy(dev->dev_addr, sw->base_mac, dev->addr_len - 1); - dev->dev_addr[dev->addr_len - 1] = port->fp_id; + if (memcmp(dev->dev_addr, sw->base_mac, ETH_ALEN - 1)) { + dev_warn(prestera_dev(sw), "Port MAC address wraps for port(%u)\n", id); + dev_addr_mod(dev, 0, sw->base_mac, ETH_ALEN - 1); + } err = prestera_hw_port_mac_set(port, dev->dev_addr); if (err) { From ba3fdfe32bb94d712188fbdd37d4e64edbf8e65d Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 18 Oct 2021 14:10:05 -0700 Subject: [PATCH 347/440] ethernet: fec: use eth_hw_addr_gen() Commit 406f42fa0d3c ("net-next: When a bond have a massive amount of VLANs...") introduced a rbtree for faster Ethernet address look up. To maintain netdev->dev_addr in this tree we need to make all the writes to it got through appropriate helpers. Signed-off-by: Jakub Kicinski Reviewed-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fec_main.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 47a6fc702ac7..bc418b910999 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -1768,11 +1768,8 @@ static int fec_get_mac(struct net_device *ndev) return 0; } - eth_hw_addr_set(ndev, iap); - /* Adjust MAC if using macaddr */ - if (iap == macaddr) - ndev->dev_addr[ETH_ALEN-1] = macaddr[ETH_ALEN-1] + fep->dev_id; + eth_hw_addr_gen(ndev, iap, iap == macaddr ? fep->dev_id : 0); return 0; } From be7550549e26d937ef055f6a90f370e7408b42ef Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 18 Oct 2021 14:10:06 -0700 Subject: [PATCH 348/440] ethernet: mlxsw: use eth_hw_addr_gen() Commit 406f42fa0d3c ("net-next: When a bond have a massive amount of VLANs...") introduced a rbtree for faster Ethernet address look up. To maintain netdev->dev_addr in this tree we need to make all the writes to it got through appropriate helpers. Signed-off-by: Jakub Kicinski Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/minimal.c | 10 +++------- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 8 ++++---- 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/minimal.c b/drivers/net/ethernet/mellanox/mlxsw/minimal.c index e0892f259adf..5d4dfa5ddbb5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/minimal.c +++ b/drivers/net/ethernet/mellanox/mlxsw/minimal.c @@ -200,20 +200,16 @@ static int mlxsw_m_port_dev_addr_get(struct mlxsw_m_port *mlxsw_m_port) { struct mlxsw_m *mlxsw_m = mlxsw_m_port->mlxsw_m; - struct net_device *dev = mlxsw_m_port->dev; char ppad_pl[MLXSW_REG_PPAD_LEN]; + u8 addr[ETH_ALEN]; int err; mlxsw_reg_ppad_pack(ppad_pl, false, 0); err = mlxsw_reg_query(mlxsw_m->core, MLXSW_REG(ppad), ppad_pl); if (err) return err; - mlxsw_reg_ppad_mac_memcpy_from(ppad_pl, dev->dev_addr); - /* The last byte value in base mac address is guaranteed - * to be such it does not overflow when adding local_port - * value. - */ - dev->dev_addr[ETH_ALEN - 1] += mlxsw_m_port->module + 1; + mlxsw_reg_ppad_mac_memcpy_from(ppad_pl, addr); + eth_hw_addr_gen(mlxsw_m_port->dev, addr, mlxsw_m_port->module + 1); return 0; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index d05850ff3a77..66c346a86ec5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -316,11 +316,11 @@ static int mlxsw_sp_port_dev_addr_set(struct mlxsw_sp_port *mlxsw_sp_port, static int mlxsw_sp_port_dev_addr_init(struct mlxsw_sp_port *mlxsw_sp_port) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - unsigned char *addr = mlxsw_sp_port->dev->dev_addr; - ether_addr_copy(addr, mlxsw_sp->base_mac); - addr[ETH_ALEN - 1] += mlxsw_sp_port->local_port; - return mlxsw_sp_port_dev_addr_set(mlxsw_sp_port, addr); + eth_hw_addr_gen(mlxsw_sp_port->dev, mlxsw_sp->base_mac, + mlxsw_sp_port->local_port); + return mlxsw_sp_port_dev_addr_set(mlxsw_sp_port, + mlxsw_sp_port->dev->dev_addr); } static int mlxsw_sp_port_max_mtu_get(struct mlxsw_sp_port *mlxsw_sp_port, int *p_max_mtu) From 07a7ec9bdafee05d7f161a4a2159e94d4b6b3253 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 18 Oct 2021 14:10:07 -0700 Subject: [PATCH 349/440] ethernet: sparx5: use eth_hw_addr_gen() Commit 406f42fa0d3c ("net-next: When a bond have a massive amount of VLANs...") introduced a rbtree for faster Ethernet address look up. To maintain netdev->dev_addr in this tree we need to make all the writes to it got through appropriate helpers. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c b/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c index b21ebaa32d7e..e042f117dc7a 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c @@ -200,7 +200,6 @@ struct net_device *sparx5_create_netdev(struct sparx5 *sparx5, u32 portno) { struct sparx5_port *spx5_port; struct net_device *ndev; - u64 val; ndev = devm_alloc_etherdev(sparx5->dev, sizeof(struct sparx5_port)); if (!ndev) @@ -216,8 +215,7 @@ struct net_device *sparx5_create_netdev(struct sparx5 *sparx5, u32 portno) ndev->netdev_ops = &sparx5_port_netdev_ops; ndev->ethtool_ops = &sparx5_ethtool_ops; - val = ether_addr_to_u64(sparx5->base_mac) + portno + 1; - u64_to_ether_addr(val, ndev->dev_addr); + eth_hw_addr_gen(ndev, sparx5->base_mac, portno + 1); return ndev; } From 1bd297988b75c0768e34e983b2a61a6d3dcda999 Mon Sep 17 00:00:00 2001 From: luo penghao Date: Mon, 18 Oct 2021 08:51:54 +0000 Subject: [PATCH 350/440] e1000e: Remove redundant statement This assignment statement is meaningless, because the statement will execute to the tag "set_itr_now". The clang_analyzer complains as follows: drivers/net/ethernet/intel/e1000e/netdev.c:2552:3 warning: Value stored to 'current_itr' is never read. Reported-by: Zeal Robot Signed-off-by: luo penghao Reviewed-by: Jesse Brandeburg Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/e1000e/netdev.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index ff8672ae2b6c..21ec7169b6a1 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -2549,7 +2549,6 @@ static void e1000_set_itr(struct e1000_adapter *adapter) /* for non-gigabit speeds, just fix the interrupt rate at 4000 */ if (adapter->link_speed != SPEED_1000) { - current_itr = 0; new_itr = 4000; goto set_itr_now; } From dc90604b5836998bb2911faf80dc204f35db08a0 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 19 Oct 2021 11:00:04 +0100 Subject: [PATCH 351/440] net: phylink: rejig SFP interface selection in ksettings_set() Commit ea269a6f7207 ("net: phylink: Update SFP selected interface on advertising changes") added a better solution to selecting the interface mode for SFPs using the advertisement mask. This method will work for mvneta and mvpp2 when selecting between 2500base-X and 1000base-X without needing to use the basex helper, or indicate that we support both 1000base-X and 2500base-X when in either of these two interface modes. Hence, we need to eliminate the validation prior to selecting the interface, otherwise when we clean up mvneta's validation function, we will end up locking to 2500base-X as we validate with an interface mode of PHY_INERFACE_MODE_2500BASEX. The supported mask will already have been reduced down to the union of support for the SFP and MAC already, so we can be confident that the advertisement mask is already appropriately restricted. We only need to select the appropriate interface, and then revalidate with the new interface mode. We get rid of the check for pl->sfp_port too, this is meaningless here as it doesn't get cleared when a module is removed, so it doesn't indicate if a module is present. Just rely on pl->sfp_bus. Signed-off-by: Russell King Signed-off-by: David S. Miller --- drivers/net/phy/phylink.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 16240f2dd161..9ac65f435ff1 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -1612,20 +1612,11 @@ int phylink_ethtool_ksettings_set(struct phylink *pl, linkmode_mod_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, config.advertising, config.an_enabled); - /* Validate without changing the current supported mask. */ - linkmode_copy(support, pl->supported); - if (phylink_validate(pl, support, &config)) - return -EINVAL; - - /* If autonegotiation is enabled, we must have an advertisement */ - if (config.an_enabled && phylink_is_empty_linkmode(config.advertising)) - return -EINVAL; - /* If this link is with an SFP, ensure that changes to advertised modes * also cause the associated interface to be selected such that the * link can be configured correctly. */ - if (pl->sfp_port && pl->sfp_bus) { + if (pl->sfp_bus) { config.interface = sfp_select_interface(pl->sfp_bus, config.advertising); if (config.interface == PHY_INTERFACE_MODE_NA) { @@ -1645,8 +1636,17 @@ int phylink_ethtool_ksettings_set(struct phylink *pl, __ETHTOOL_LINK_MODE_MASK_NBITS, support); return -EINVAL; } + } else { + /* Validate without changing the current supported mask. */ + linkmode_copy(support, pl->supported); + if (phylink_validate(pl, support, &config)) + return -EINVAL; } + /* If autonegotiation is enabled, we must have an advertisement */ + if (config.an_enabled && phylink_is_empty_linkmode(config.advertising)) + return -EINVAL; + mutex_lock(&pl->state_mutex); pl->link_config.speed = config.speed; pl->link_config.duplex = config.duplex; From e22db7bd552f7f7f19fe4ef60abfb7e7b364e3a8 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 19 Oct 2021 12:12:04 +0200 Subject: [PATCH 352/440] net: sched: Allow statistics reads from softirq. Eric reported that the rate estimator reads statics from the softirq which in turn triggers a warning introduced in the statistics rework. The warning is too cautious. The updates happen in the softirq context so reads from softirq are fine since the writes can not be preempted. The updates/writes happen during qdisc_run() which ensures one writer and the softirq context. The remaining bad context for reading statistics remains in hard-IRQ because it may preempt a writer. Fixes: 29cbcd8582837 ("net: sched: Remove Qdisc::running sequence counter") Reported-by: Eric Dumazet Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: David S. Miller --- net/core/gen_stats.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c index 5516ea0d5da0..15c270e22c5e 100644 --- a/net/core/gen_stats.c +++ b/net/core/gen_stats.c @@ -154,7 +154,7 @@ void gnet_stats_add_basic(struct gnet_stats_basic_sync *bstats, u64 bytes = 0; u64 packets = 0; - WARN_ON_ONCE((cpu || running) && !in_task()); + WARN_ON_ONCE((cpu || running) && in_hardirq()); if (cpu) { gnet_stats_add_basic_cpu(bstats, cpu); From 92817dad7dcb781561dcebbf1b19a177774d84c2 Mon Sep 17 00:00:00 2001 From: Robert Hancock Date: Tue, 19 Oct 2021 11:24:50 +0100 Subject: [PATCH 353/440] net: phylink: Support disabling autonegotiation for PCS The auto-negotiation state in the PCS as set by phylink_mii_c22_pcs_config was previously always enabled when the driver is configured for in-band autonegotiation, even if autonegotiation was disabled on the interface with ethtool. Update the code to set the BMCR_ANENABLE bit based on the interface's autonegotiation enabled state. Update phylink_mii_c22_pcs_get_state to not check autonegotiation-related fields when autonegotiation is disabled. Update phylink_mac_pcs_get_state to initialize the state based on the interface's configured speed, duplex and pause parameters rather than to unknown when autonegotiation is disabled, before calling the driver's pcs_get_state functions, as they are not likely to provide meaningful data for these fields when autonegotiation is disabled. In this case the driver is really just filling in the link state field. Note that in cases where there is a downstream PHY connected, such as with SGMII and a copper PHY, the configuration set by ethtool is handled by phy_ethtool_ksettings_set and not propagated to the PCS. This is correct since SGMII or 1000Base-X autonegotiation with the PCS should normally still be used even if the copper side has disabled it. Signed-off-by: Robert Hancock Signed-off-by: Russell King (Oracle) Signed-off-by: David S. Miller --- drivers/net/phy/phylink.c | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 9ac65f435ff1..7e93d81fa5ad 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -551,9 +551,15 @@ static void phylink_mac_pcs_get_state(struct phylink *pl, linkmode_zero(state->lp_advertising); state->interface = pl->link_config.interface; state->an_enabled = pl->link_config.an_enabled; - state->speed = SPEED_UNKNOWN; - state->duplex = DUPLEX_UNKNOWN; - state->pause = MLO_PAUSE_NONE; + if (state->an_enabled) { + state->speed = SPEED_UNKNOWN; + state->duplex = DUPLEX_UNKNOWN; + state->pause = MLO_PAUSE_NONE; + } else { + state->speed = pl->link_config.speed; + state->duplex = pl->link_config.duplex; + state->pause = pl->link_config.pause; + } state->an_complete = 0; state->link = 1; @@ -2549,7 +2555,10 @@ void phylink_mii_c22_pcs_get_state(struct mdio_device *pcs, state->link = !!(bmsr & BMSR_LSTATUS); state->an_complete = !!(bmsr & BMSR_ANEGCOMPLETE); - if (!state->link) + /* If there is no link or autonegotiation is disabled, the LP advertisement + * data is not meaningful, so don't go any further. + */ + if (!state->link || !state->an_enabled) return; switch (state->interface) { @@ -2651,7 +2660,12 @@ int phylink_mii_c22_pcs_config(struct mdio_device *pcs, unsigned int mode, changed = ret > 0; /* Ensure ISOLATE bit is disabled */ - bmcr = mode == MLO_AN_INBAND ? BMCR_ANENABLE : 0; + if (mode == MLO_AN_INBAND && + linkmode_test_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, advertising)) + bmcr = BMCR_ANENABLE; + else + bmcr = 0; + ret = mdiobus_modify(pcs->bus, pcs->addr, MII_BMCR, BMCR_ANENABLE | BMCR_ISOLATE, bmcr); if (ret < 0) From 3c71e0c9ab4f428f4daeb451f6f7008a979f154a Mon Sep 17 00:00:00 2001 From: luo penghao Date: Mon, 18 Oct 2021 08:55:13 +0000 Subject: [PATCH 354/440] ethernet: Remove redundant statement The variable will be assigned again later in the if condition, there is no meaning there. drivers/net/ethernet/broadcom/tg3.c:5750:2 warning: Value stored to 'current_link_up' is never read. Reported-by: Zeal Robot Signed-off-by: luo penghao Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/tg3.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index e9518b98914b..b1328c5524b5 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -5747,7 +5747,6 @@ static int tg3_setup_fiber_phy(struct tg3 *tp, bool force_reset) tw32_f(MAC_EVENT, MAC_EVENT_LNKSTATE_CHANGED); udelay(40); - current_link_up = false; tp->link_config.rmt_adv = 0; mac_status = tr32(MAC_STATUS); From cb3dc8901ba4f748f46287a42049a3a5c1f6b63c Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 19 Oct 2021 10:49:54 +0300 Subject: [PATCH 355/440] devlink: Remove extra device_lock assert checks PCI core code in the pci_call_probe() has a path that doesn't hold device_lock. It happens because the ->probe() is called through the workqueue mechanism. 349 static int pci_call_probe(struct pci_driver *drv, struct pci_dev *dev, 350 const struct pci_device_id *id) 351 { 352 .... 377 if (cpu < nr_cpu_ids) 378 error = work_on_cpu(cpu, local_pci_probe, &ddi); Luckily enough, the core still ensures that only single flow is executed, so it safe to remove the assert checks that anyway were added for annotations purposes. Fixes: b88f7b1203bf ("devlink: Annotate devlink API calls") Reported-by: Amit Cohen Signed-off-by: Leon Romanovsky Signed-off-by: David S. Miller --- net/core/devlink.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/core/devlink.c b/net/core/devlink.c index 3ce6147a2fe8..3464854015a2 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -9147,7 +9147,6 @@ void devlink_register(struct devlink *devlink) { ASSERT_DEVLINK_NOT_REGISTERED(devlink); /* Make sure that we are in .probe() routine */ - device_lock_assert(devlink->dev); mutex_lock(&devlink_mutex); xa_set_mark(&devlinks, devlink->index, DEVLINK_REGISTERED); @@ -9165,7 +9164,6 @@ void devlink_unregister(struct devlink *devlink) { ASSERT_DEVLINK_REGISTERED(devlink); /* Make sure that we are in .remove() routine */ - device_lock_assert(devlink->dev); devlink_put(devlink); wait_for_completion(&devlink->comp); From 07fab5a469a5e01f8ca48c8459003d16c5c26f35 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Thu, 14 Oct 2021 20:26:14 +0200 Subject: [PATCH 356/440] ieee802154: Remove redundant 'flush_workqueue()' calls 'destroy_workqueue()' already drains the queue before destroying it, so there is no need to flush it explicitly. Remove the redundant 'flush_workqueue()' calls. This was generated with coccinelle: @@ expression E; @@ - flush_workqueue(E); destroy_workqueue(E); Signed-off-by: Christophe JAILLET Signed-off-by: Stefan Schmidt Signed-off-by: David S. Miller --- drivers/net/ieee802154/ca8210.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/ieee802154/ca8210.c b/drivers/net/ieee802154/ca8210.c index 3a2824f24caa..ece6ff6049f6 100644 --- a/drivers/net/ieee802154/ca8210.c +++ b/drivers/net/ieee802154/ca8210.c @@ -2938,9 +2938,7 @@ static int ca8210_dev_com_init(struct ca8210_priv *priv) */ static void ca8210_dev_com_clear(struct ca8210_priv *priv) { - flush_workqueue(priv->mlme_workqueue); destroy_workqueue(priv->mlme_workqueue); - flush_workqueue(priv->irq_workqueue); destroy_workqueue(priv->irq_workqueue); } From 05be94633783ffb3ad5b0aca7f6cff08cad6868d Mon Sep 17 00:00:00 2001 From: Cai Huoqing Date: Mon, 18 Oct 2021 21:16:29 +0800 Subject: [PATCH 357/440] net: ethernet: ixp4xx: Make use of dma_pool_zalloc() instead of dma_pool_alloc/memset() Replacing dma_pool_alloc/memset() with dma_pool_zalloc() to simplify the code. Signed-off-by: Cai Huoqing Signed-off-by: David S. Miller --- drivers/net/ethernet/xscale/ixp4xx_eth.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/xscale/ixp4xx_eth.c b/drivers/net/ethernet/xscale/ixp4xx_eth.c index 3e5fd952aeaa..65fdad1107fc 100644 --- a/drivers/net/ethernet/xscale/ixp4xx_eth.c +++ b/drivers/net/ethernet/xscale/ixp4xx_eth.c @@ -1103,10 +1103,9 @@ static int init_queues(struct port *port) return -ENOMEM; } - if (!(port->desc_tab = dma_pool_alloc(dma_pool, GFP_KERNEL, - &port->desc_tab_phys))) + port->desc_tab = dma_pool_zalloc(dma_pool, GFP_KERNEL, &port->desc_tab_phys); + if (!port->desc_tab) return -ENOMEM; - memset(port->desc_tab, 0, POOL_ALLOC_SIZE); memset(port->rx_buff_tab, 0, sizeof(port->rx_buff_tab)); /* tables */ memset(port->tx_buff_tab, 0, sizeof(port->tx_buff_tab)); From 4ecc8633056b8d2f214c6ce860a50837e5c8fb41 Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Mon, 13 Sep 2021 11:22:19 -0700 Subject: [PATCH 358/440] ice: Add support for VF rate limiting Implement ndo_set_vf_rate to support setting of min_tx_rate and max_tx_rate; set the appropriate bandwidth in the scheduler for the node representing the specified VF VSI. Co-developed-by: Tarun Singh Signed-off-by: Tarun Singh Signed-off-by: Brett Creeley Tested-by: Konrad Jankowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_lib.c | 174 ++++++++++++++++++ drivers/net/ethernet/intel/ice/ice_lib.h | 4 +- drivers/net/ethernet/intel/ice/ice_main.c | 1 + drivers/net/ethernet/intel/ice/ice_sched.c | 130 +++++++++++++ drivers/net/ethernet/intel/ice/ice_sched.h | 6 + .../net/ethernet/intel/ice/ice_virtchnl_pf.c | 160 +++++++++++++++- .../net/ethernet/intel/ice/ice_virtchnl_pf.h | 15 +- 7 files changed, 486 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index f981e77f72ad..acff8d3b1c97 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -3600,6 +3600,180 @@ int ice_clear_dflt_vsi(struct ice_sw *sw) return 0; } +/** + * ice_get_link_speed_mbps - get link speed in Mbps + * @vsi: the VSI whose link speed is being queried + * + * Return current VSI link speed and 0 if the speed is unknown. + */ +int ice_get_link_speed_mbps(struct ice_vsi *vsi) +{ + switch (vsi->port_info->phy.link_info.link_speed) { + case ICE_AQ_LINK_SPEED_100GB: + return SPEED_100000; + case ICE_AQ_LINK_SPEED_50GB: + return SPEED_50000; + case ICE_AQ_LINK_SPEED_40GB: + return SPEED_40000; + case ICE_AQ_LINK_SPEED_25GB: + return SPEED_25000; + case ICE_AQ_LINK_SPEED_20GB: + return SPEED_20000; + case ICE_AQ_LINK_SPEED_10GB: + return SPEED_10000; + case ICE_AQ_LINK_SPEED_5GB: + return SPEED_5000; + case ICE_AQ_LINK_SPEED_2500MB: + return SPEED_2500; + case ICE_AQ_LINK_SPEED_1000MB: + return SPEED_1000; + case ICE_AQ_LINK_SPEED_100MB: + return SPEED_100; + case ICE_AQ_LINK_SPEED_10MB: + return SPEED_10; + case ICE_AQ_LINK_SPEED_UNKNOWN: + default: + return 0; + } +} + +/** + * ice_get_link_speed_kbps - get link speed in Kbps + * @vsi: the VSI whose link speed is being queried + * + * Return current VSI link speed and 0 if the speed is unknown. + */ +static int ice_get_link_speed_kbps(struct ice_vsi *vsi) +{ + int speed_mbps; + + speed_mbps = ice_get_link_speed_mbps(vsi); + + return speed_mbps * 1000; +} + +/** + * ice_set_min_bw_limit - setup minimum BW limit for Tx based on min_tx_rate + * @vsi: VSI to be configured + * @min_tx_rate: min Tx rate in Kbps to be configured as BW limit + * + * If the min_tx_rate is specified as 0 that means to clear the minimum BW limit + * profile, otherwise a non-zero value will force a minimum BW limit for the VSI + * on TC 0. + */ +int ice_set_min_bw_limit(struct ice_vsi *vsi, u64 min_tx_rate) +{ + struct ice_pf *pf = vsi->back; + enum ice_status status; + struct device *dev; + int speed; + + dev = ice_pf_to_dev(pf); + if (!vsi->port_info) { + dev_dbg(dev, "VSI %d, type %u specified doesn't have valid port_info\n", + vsi->idx, vsi->type); + return -EINVAL; + } + + speed = ice_get_link_speed_kbps(vsi); + if (min_tx_rate > (u64)speed) { + dev_err(dev, "invalid min Tx rate %llu Kbps specified for %s %d is greater than current link speed %u Kbps\n", + min_tx_rate, ice_vsi_type_str(vsi->type), vsi->idx, + speed); + return -EINVAL; + } + + /* Configure min BW for VSI limit */ + if (min_tx_rate) { + status = ice_cfg_vsi_bw_lmt_per_tc(vsi->port_info, vsi->idx, 0, + ICE_MIN_BW, min_tx_rate); + if (status) { + dev_err(dev, "failed to set min Tx rate(%llu Kbps) for %s %d\n", + min_tx_rate, ice_vsi_type_str(vsi->type), + vsi->idx); + return -EIO; + } + + dev_dbg(dev, "set min Tx rate(%llu Kbps) for %s\n", + min_tx_rate, ice_vsi_type_str(vsi->type)); + } else { + status = ice_cfg_vsi_bw_dflt_lmt_per_tc(vsi->port_info, + vsi->idx, 0, + ICE_MIN_BW); + if (status) { + dev_err(dev, "failed to clear min Tx rate configuration for %s %d\n", + ice_vsi_type_str(vsi->type), vsi->idx); + return -EIO; + } + + dev_dbg(dev, "cleared min Tx rate configuration for %s %d\n", + ice_vsi_type_str(vsi->type), vsi->idx); + } + + return 0; +} + +/** + * ice_set_max_bw_limit - setup maximum BW limit for Tx based on max_tx_rate + * @vsi: VSI to be configured + * @max_tx_rate: max Tx rate in Kbps to be configured as BW limit + * + * If the max_tx_rate is specified as 0 that means to clear the maximum BW limit + * profile, otherwise a non-zero value will force a maximum BW limit for the VSI + * on TC 0. + */ +int ice_set_max_bw_limit(struct ice_vsi *vsi, u64 max_tx_rate) +{ + struct ice_pf *pf = vsi->back; + enum ice_status status; + struct device *dev; + int speed; + + dev = ice_pf_to_dev(pf); + if (!vsi->port_info) { + dev_dbg(dev, "VSI %d, type %u specified doesn't have valid port_info\n", + vsi->idx, vsi->type); + return -EINVAL; + } + + speed = ice_get_link_speed_kbps(vsi); + if (max_tx_rate > (u64)speed) { + dev_err(dev, "invalid max Tx rate %llu Kbps specified for %s %d is greater than current link speed %u Kbps\n", + max_tx_rate, ice_vsi_type_str(vsi->type), vsi->idx, + speed); + return -EINVAL; + } + + /* Configure max BW for VSI limit */ + if (max_tx_rate) { + status = ice_cfg_vsi_bw_lmt_per_tc(vsi->port_info, vsi->idx, 0, + ICE_MAX_BW, max_tx_rate); + if (status) { + dev_err(dev, "failed setting max Tx rate(%llu Kbps) for %s %d\n", + max_tx_rate, ice_vsi_type_str(vsi->type), + vsi->idx); + return -EIO; + } + + dev_dbg(dev, "set max Tx rate(%llu Kbps) for %s %d\n", + max_tx_rate, ice_vsi_type_str(vsi->type), vsi->idx); + } else { + status = ice_cfg_vsi_bw_dflt_lmt_per_tc(vsi->port_info, + vsi->idx, 0, + ICE_MAX_BW); + if (status) { + dev_err(dev, "failed clearing max Tx rate configuration for %s %d\n", + ice_vsi_type_str(vsi->type), vsi->idx); + return -EIO; + } + + dev_dbg(dev, "cleared max Tx rate configuration for %s %d\n", + ice_vsi_type_str(vsi->type), vsi->idx); + } + + return 0; +} + /** * ice_set_link - turn on/off physical link * @vsi: VSI to modify physical link on diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h index b6c429c5875d..d395bd590e84 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_lib.h @@ -116,7 +116,9 @@ bool ice_is_vsi_dflt_vsi(struct ice_sw *sw, struct ice_vsi *vsi); int ice_set_dflt_vsi(struct ice_sw *sw, struct ice_vsi *vsi); int ice_clear_dflt_vsi(struct ice_sw *sw); - +int ice_set_min_bw_limit(struct ice_vsi *vsi, u64 min_tx_rate); +int ice_set_max_bw_limit(struct ice_vsi *vsi, u64 max_tx_rate); +int ice_get_link_speed_mbps(struct ice_vsi *vsi); int ice_vsi_update_security(struct ice_vsi *vsi, void (*fill)(struct ice_vsi_ctx *)); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index f531691a3e12..99647dceefc4 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -7390,6 +7390,7 @@ static const struct net_device_ops ice_netdev_ops = { .ndo_set_vf_vlan = ice_set_vf_port_vlan, .ndo_set_vf_link_state = ice_set_vf_link_state, .ndo_get_vf_stats = ice_get_vf_stats, + .ndo_set_vf_rate = ice_set_vf_bw, .ndo_vlan_rx_add_vid = ice_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = ice_vlan_rx_kill_vid, .ndo_setup_tc = ice_setup_tc, diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c index 9f07b6641705..560e52b99f83 100644 --- a/drivers/net/ethernet/intel/ice/ice_sched.c +++ b/drivers/net/ethernet/intel/ice/ice_sched.c @@ -3770,6 +3770,136 @@ ice_cfg_q_bw_dflt_lmt(struct ice_port_info *pi, u16 vsi_handle, u8 tc, ICE_SCHED_DFLT_BW); } +/** + * ice_sched_get_node_by_id_type - get node from ID type + * @pi: port information structure + * @id: identifier + * @agg_type: type of aggregator + * @tc: traffic class + * + * This function returns node identified by ID of type aggregator, and + * based on traffic class (TC). This function needs to be called with + * the scheduler lock held. + */ +static struct ice_sched_node * +ice_sched_get_node_by_id_type(struct ice_port_info *pi, u32 id, + enum ice_agg_type agg_type, u8 tc) +{ + struct ice_sched_node *node = NULL; + + switch (agg_type) { + case ICE_AGG_TYPE_VSI: { + struct ice_vsi_ctx *vsi_ctx; + u16 vsi_handle = (u16)id; + + if (!ice_is_vsi_valid(pi->hw, vsi_handle)) + break; + /* Get sched_vsi_info */ + vsi_ctx = ice_get_vsi_ctx(pi->hw, vsi_handle); + if (!vsi_ctx) + break; + node = vsi_ctx->sched.vsi_node[tc]; + break; + } + + case ICE_AGG_TYPE_AGG: { + struct ice_sched_node *tc_node; + + tc_node = ice_sched_get_tc_node(pi, tc); + if (tc_node) + node = ice_sched_get_agg_node(pi, tc_node, id); + break; + } + + default: + break; + } + + return node; +} + +/** + * ice_sched_set_node_bw_lmt_per_tc - set node BW limit per TC + * @pi: port information structure + * @id: ID (software VSI handle or AGG ID) + * @agg_type: aggregator type (VSI or AGG type node) + * @tc: traffic class + * @rl_type: min or max + * @bw: bandwidth in Kbps + * + * This function sets BW limit of VSI or Aggregator scheduling node + * based on TC information from passed in argument BW. + */ +static enum ice_status +ice_sched_set_node_bw_lmt_per_tc(struct ice_port_info *pi, u32 id, + enum ice_agg_type agg_type, u8 tc, + enum ice_rl_type rl_type, u32 bw) +{ + enum ice_status status = ICE_ERR_PARAM; + struct ice_sched_node *node; + + if (!pi) + return status; + + if (rl_type == ICE_UNKNOWN_BW) + return status; + + mutex_lock(&pi->sched_lock); + node = ice_sched_get_node_by_id_type(pi, id, agg_type, tc); + if (!node) { + ice_debug(pi->hw, ICE_DBG_SCHED, "Wrong id, agg type, or tc\n"); + goto exit_set_node_bw_lmt_per_tc; + } + if (bw == ICE_SCHED_DFLT_BW) + status = ice_sched_set_node_bw_dflt_lmt(pi, node, rl_type); + else + status = ice_sched_set_node_bw_lmt(pi, node, rl_type, bw); + +exit_set_node_bw_lmt_per_tc: + mutex_unlock(&pi->sched_lock); + return status; +} + +/** + * ice_cfg_vsi_bw_lmt_per_tc - configure VSI BW limit per TC + * @pi: port information structure + * @vsi_handle: software VSI handle + * @tc: traffic class + * @rl_type: min or max + * @bw: bandwidth in Kbps + * + * This function configures BW limit of VSI scheduling node based on TC + * information. + */ +enum ice_status +ice_cfg_vsi_bw_lmt_per_tc(struct ice_port_info *pi, u16 vsi_handle, u8 tc, + enum ice_rl_type rl_type, u32 bw) +{ + return ice_sched_set_node_bw_lmt_per_tc(pi, vsi_handle, + ICE_AGG_TYPE_VSI, + tc, rl_type, bw); +} + +/** + * ice_cfg_vsi_bw_dflt_lmt_per_tc - configure default VSI BW limit per TC + * @pi: port information structure + * @vsi_handle: software VSI handle + * @tc: traffic class + * @rl_type: min or max + * + * This function configures default BW limit of VSI scheduling node based on TC + * information. + */ +enum ice_status +ice_cfg_vsi_bw_dflt_lmt_per_tc(struct ice_port_info *pi, u16 vsi_handle, u8 tc, + enum ice_rl_type rl_type) +{ + return ice_sched_set_node_bw_lmt_per_tc(pi, vsi_handle, + ICE_AGG_TYPE_VSI, + tc, rl_type, + ICE_SCHED_DFLT_BW); +} + /** * ice_cfg_rl_burst_size - Set burst size value * @hw: pointer to the HW struct diff --git a/drivers/net/ethernet/intel/ice/ice_sched.h b/drivers/net/ethernet/intel/ice/ice_sched.h index 9beef8f0ec76..f89b80ba3499 100644 --- a/drivers/net/ethernet/intel/ice/ice_sched.h +++ b/drivers/net/ethernet/intel/ice/ice_sched.h @@ -103,6 +103,12 @@ ice_cfg_q_bw_lmt(struct ice_port_info *pi, u16 vsi_handle, u8 tc, enum ice_status ice_cfg_q_bw_dflt_lmt(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u16 q_handle, enum ice_rl_type rl_type); +enum ice_status +ice_cfg_vsi_bw_lmt_per_tc(struct ice_port_info *pi, u16 vsi_handle, u8 tc, + enum ice_rl_type rl_type, u32 bw); +enum ice_status +ice_cfg_vsi_bw_dflt_lmt_per_tc(struct ice_port_info *pi, u16 vsi_handle, u8 tc, + enum ice_rl_type rl_type); enum ice_status ice_cfg_rl_burst_size(struct ice_hw *hw, u32 bytes); void ice_sched_replay_agg_vsi_preinit(struct ice_hw *hw); void ice_sched_replay_agg(struct ice_hw *hw); diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c index d90a3b7be713..86f265268ac8 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c @@ -5,6 +5,7 @@ #include "ice_base.h" #include "ice_lib.h" #include "ice_fltr.h" +#include "ice_dcb_lib.h" #include "ice_flow.h" #include "ice_eswitch.h" #include "ice_virtchnl_allowlist.h" @@ -884,6 +885,40 @@ static int ice_calc_vf_first_vector_idx(struct ice_pf *pf, struct ice_vf *vf) return pf->sriov_base_vector + vf->vf_id * pf->num_msix_per_vf; } +/** + * ice_vf_rebuild_host_tx_rate_cfg - re-apply the Tx rate limiting configuration + * @vf: VF to re-apply the configuration for + * + * Called after a VF VSI has been re-added/rebuild during reset. The PF driver + * needs to re-apply the host configured Tx rate limiting configuration. + */ +static int ice_vf_rebuild_host_tx_rate_cfg(struct ice_vf *vf) +{ + struct device *dev = ice_pf_to_dev(vf->pf); + struct ice_vsi *vsi = ice_get_vf_vsi(vf); + int err; + + if (vf->min_tx_rate) { + err = ice_set_min_bw_limit(vsi, (u64)vf->min_tx_rate * 1000); + if (err) { + dev_err(dev, "failed to set min Tx rate to %d Mbps for VF %u, error %d\n", + vf->min_tx_rate, vf->vf_id, err); + return err; + } + } + + if (vf->max_tx_rate) { + err = ice_set_max_bw_limit(vsi, (u64)vf->max_tx_rate * 1000); + if (err) { + dev_err(dev, "failed to set max Tx rate to %d Mbps for VF %u, error %d\n", + vf->max_tx_rate, vf->vf_id, err); + return err; + } + } + + return 0; +} + /** * ice_vf_rebuild_host_vlan_cfg - add VLAN 0 filter or rebuild the Port VLAN * @vf: VF to add MAC filters for @@ -1420,6 +1455,11 @@ static void ice_vf_rebuild_host_cfg(struct ice_vf *vf) if (ice_vf_rebuild_host_vlan_cfg(vf)) dev_err(dev, "failed to rebuild VLAN configuration for VF %u\n", vf->vf_id); + + if (ice_vf_rebuild_host_tx_rate_cfg(vf)) + dev_err(dev, "failed to rebuild Tx rate limiting configuration for VF %u\n", + vf->vf_id); + /* rebuild aggregator node config for main VF VSI */ ice_vf_rebuild_aggregator_node_cfg(vsi); } @@ -4747,8 +4787,8 @@ ice_get_vf_cfg(struct net_device *netdev, int vf_id, struct ifla_vf_info *ivi) ivi->linkstate = IFLA_VF_LINK_STATE_ENABLE; else ivi->linkstate = IFLA_VF_LINK_STATE_DISABLE; - ivi->max_tx_rate = vf->tx_rate; - ivi->min_tx_rate = 0; + ivi->max_tx_rate = vf->max_tx_rate; + ivi->min_tx_rate = vf->min_tx_rate; return 0; } @@ -4926,6 +4966,122 @@ int ice_set_vf_link_state(struct net_device *netdev, int vf_id, int link_state) return 0; } +/** + * ice_calc_all_vfs_min_tx_rate - calculate cumulative min Tx rate on all VFs + * @pf: PF associated with VFs + */ +static int ice_calc_all_vfs_min_tx_rate(struct ice_pf *pf) +{ + int rate = 0, i; + + ice_for_each_vf(pf, i) + rate += pf->vf[i].min_tx_rate; + + return rate; +} + +/** + * ice_min_tx_rate_oversubscribed - check if min Tx rate causes oversubscription + * @vf: VF trying to configure min_tx_rate + * @min_tx_rate: min Tx rate in Mbps + * + * Check if the min_tx_rate being passed in will cause oversubscription of total + * min_tx_rate based on the current link speed and all other VFs configured + * min_tx_rate + * + * Return true if the passed min_tx_rate would cause oversubscription, else + * return false + */ +static bool +ice_min_tx_rate_oversubscribed(struct ice_vf *vf, int min_tx_rate) +{ + int link_speed_mbps = ice_get_link_speed_mbps(ice_get_vf_vsi(vf)); + int all_vfs_min_tx_rate = ice_calc_all_vfs_min_tx_rate(vf->pf); + + /* this VF's previous rate is being overwritten */ + all_vfs_min_tx_rate -= vf->min_tx_rate; + + if (all_vfs_min_tx_rate + min_tx_rate > link_speed_mbps) { + dev_err(ice_pf_to_dev(vf->pf), "min_tx_rate of %d Mbps on VF %u would cause oversubscription of %d Mbps based on the current link speed %d Mbps\n", + min_tx_rate, vf->vf_id, + all_vfs_min_tx_rate + min_tx_rate - link_speed_mbps, + link_speed_mbps); + return true; + } + + return false; +} + +/** + * ice_set_vf_bw - set min/max VF bandwidth + * @netdev: network interface device structure + * @vf_id: VF identifier + * @min_tx_rate: Minimum Tx rate in Mbps + * @max_tx_rate: Maximum Tx rate in Mbps + */ +int +ice_set_vf_bw(struct net_device *netdev, int vf_id, int min_tx_rate, + int max_tx_rate) +{ + struct ice_pf *pf = ice_netdev_to_pf(netdev); + struct ice_vsi *vsi; + struct device *dev; + struct ice_vf *vf; + int ret; + + dev = ice_pf_to_dev(pf); + if (ice_validate_vf_id(pf, vf_id)) + return -EINVAL; + + vf = &pf->vf[vf_id]; + ret = ice_check_vf_ready_for_cfg(vf); + if (ret) + return ret; + + vsi = ice_get_vf_vsi(vf); + + /* when max_tx_rate is zero that means no max Tx rate limiting, so only + * check if max_tx_rate is non-zero + */ + if (max_tx_rate && min_tx_rate > max_tx_rate) { + dev_err(dev, "Cannot set min Tx rate %d Mbps greater than max Tx rate %d Mbps\n", + min_tx_rate, max_tx_rate); + return -EINVAL; + } + + if (min_tx_rate && ice_is_dcb_active(pf)) { + dev_err(dev, "DCB on PF is currently enabled. VF min Tx rate limiting not allowed on this PF.\n"); + return -EOPNOTSUPP; + } + + if (ice_min_tx_rate_oversubscribed(vf, min_tx_rate)) + return -EINVAL; + + if (vf->min_tx_rate != (unsigned int)min_tx_rate) { + ret = ice_set_min_bw_limit(vsi, (u64)min_tx_rate * 1000); + if (ret) { + dev_err(dev, "Unable to set min-tx-rate for VF %d\n", + vf->vf_id); + return ret; + } + + vf->min_tx_rate = min_tx_rate; + } + + if (vf->max_tx_rate != (unsigned int)max_tx_rate) { + ret = ice_set_max_bw_limit(vsi, (u64)max_tx_rate * 1000); + if (ret) { + dev_err(dev, "Unable to set max-tx-rate for VF %d\n", + vf->vf_id); + return ret; + } + + vf->max_tx_rate = max_tx_rate; + } + + return 0; +} + /** * ice_get_vf_stats - populate some stats for the VF * @netdev: the netdev of the PF diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h index 3115284e5411..5ff93a08f54c 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h @@ -125,7 +125,8 @@ struct ice_vf { * the main LAN VSI for the PF. */ u16 lan_vsi_num; /* ID as used by firmware */ - unsigned int tx_rate; /* Tx bandwidth limit in Mbps */ + unsigned int min_tx_rate; /* Minimum Tx bandwidth limit in Mbps */ + unsigned int max_tx_rate; /* Maximum Tx bandwidth limit in Mbps */ DECLARE_BITMAP(vf_states, ICE_VF_STATES_NBITS); /* VF runtime states */ u64 num_inval_msgs; /* number of continuous invalid msgs */ @@ -172,6 +173,10 @@ int ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos, __be16 vlan_proto); +int +ice_set_vf_bw(struct net_device *netdev, int vf_id, int min_tx_rate, + int max_tx_rate); + int ice_set_vf_trust(struct net_device *netdev, int vf_id, bool trusted); int ice_set_vf_link_state(struct net_device *netdev, int vf_id, int link_state); @@ -303,6 +308,14 @@ ice_set_vf_link_state(struct net_device __always_unused *netdev, return -EOPNOTSUPP; } +static inline int +ice_set_vf_bw(struct net_device __always_unused *netdev, + int __always_unused vf_id, int __always_unused min_tx_rate, + int __always_unused max_tx_rate) +{ + return -EOPNOTSUPP; +} + static inline int ice_calc_vf_reg_idx(struct ice_vf __always_unused *vf, struct ice_q_vector __always_unused *q_vector) From d8eb7ad5e46c65d57fa72571cf2ccca7da1e7196 Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Mon, 20 Sep 2021 12:30:12 -0700 Subject: [PATCH 359/440] ice: update dim usage and moderation The driver was having trouble with unreliable latency when doing single threaded ping-pong tests. This was root caused to the DIM algorithm landing on a too slow interrupt value, which caused high latency, and it was especially present when queues were being switched frequently by the scheduler as happens on default setups today. In attempting to improve this, we allow the upper rate limit for interrupts to move to rate limit of 4 microseconds as a max, which means that no vector can generate more than 250,000 interrupts per second. The old config was up to 100,000. The driver previously tried to program the rate limit too frequently and if the receive and transmit side were both active on the same vector, the INTRL would be set incorrectly, and this change fixes that issue as a side effect of the redesign. This driver will operate from now on with a slightly changed DIM table with more emphasis towards latency sensitivity by having more table entries with lower latency than with high latency (high being >= 64 microseconds). The driver also resets the DIM algorithm state with a new stats set when there is no work done and the data becomes stale (older than 1 second), for the respective receive or transmit portion of the interrupt. Add a new helper for setting rate limit, which will be used more in a followup patch. Signed-off-by: Jesse Brandeburg Tested-by: Gurucharan G Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_lib.c | 25 +++++ drivers/net/ethernet/intel/ice/ice_lib.h | 1 + drivers/net/ethernet/intel/ice/ice_main.c | 119 ++++++++++++---------- drivers/net/ethernet/intel/ice/ice_txrx.c | 84 ++++++++------- 4 files changed, 142 insertions(+), 87 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index acff8d3b1c97..fd894e89be3b 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -1941,6 +1941,31 @@ void ice_write_itr(struct ice_ring_container *rc, u16 itr) __ice_write_itr(q_vector, rc, itr); } +/** + * ice_set_q_vector_intrl - set up interrupt rate limiting + * @q_vector: the vector to be configured + * + * Interrupt rate limiting is local to the vector, not per-queue so we must + * detect if either ring container has dynamic moderation enabled to decide + * what to set the interrupt rate limit to via INTRL settings. In the case that + * dynamic moderation is disabled on both, write the value with the cached + * setting to make sure INTRL register matches the user visible value. + */ +void ice_set_q_vector_intrl(struct ice_q_vector *q_vector) +{ + if (ITR_IS_DYNAMIC(&q_vector->tx) || ITR_IS_DYNAMIC(&q_vector->rx)) { + /* in the case of dynamic enabled, cap each vector to no more + * than (4 us) 250,000 ints/sec, which allows low latency + * but still less than 500,000 interrupts per second, which + * reduces CPU a bit in the case of the lowest latency + * setting. The 4 here is a value in microseconds. + */ + ice_write_intrl(q_vector, 4); + } else { + ice_write_intrl(q_vector, q_vector->intrl); + } +} + /** * ice_vsi_cfg_msix - MSIX mode Interrupt Config in the HW * @vsi: the VSI being configured diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h index d395bd590e84..c79fcbf82d8f 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_lib.h @@ -103,6 +103,7 @@ int ice_status_to_errno(enum ice_status err); void ice_write_intrl(struct ice_q_vector *q_vector, u8 intrl); void ice_write_itr(struct ice_ring_container *rc, u16 itr); +void ice_set_q_vector_intrl(struct ice_q_vector *q_vector); enum ice_status ice_vsi_cfg_mac_fltr(struct ice_vsi *vsi, const u8 *macaddr, bool set); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 99647dceefc4..846623a97723 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -5502,77 +5502,59 @@ int ice_vsi_cfg(struct ice_vsi *vsi) } /* THEORY OF MODERATION: - * The below code creates custom DIM profiles for use by this driver, because - * the ice driver hardware works differently than the hardware that DIMLIB was + * The ice driver hardware works differently than the hardware that DIMLIB was * originally made for. ice hardware doesn't have packet count limits that * can trigger an interrupt, but it *does* have interrupt rate limit support, - * and this code adds that capability to be used by the driver when it's using - * DIMLIB. The DIMLIB code was always designed to be a suggestion to the driver - * for how to "respond" to traffic and interrupts, so this driver uses a - * slightly different set of moderation parameters to get best performance. + * which is hard-coded to a limit of 250,000 ints/second. + * If not using dynamic moderation, the INTRL value can be modified + * by ethtool rx-usecs-high. */ struct ice_dim { /* the throttle rate for interrupts, basically worst case delay before * an initial interrupt fires, value is stored in microseconds. */ u16 itr; - /* the rate limit for interrupts, which can cap a delay from a small - * ITR at a certain amount of interrupts per second. f.e. a 2us ITR - * could yield as much as 500,000 interrupts per second, but with a - * 10us rate limit, it limits to 100,000 interrupts per second. Value - * is stored in microseconds. - */ - u16 intrl; }; /* Make a different profile for Rx that doesn't allow quite so aggressive - * moderation at the high end (it maxes out at 128us or about 8k interrupts a - * second. The INTRL/rate parameters here are only useful to cap small ITR - * values, which is why for larger ITR's - like 128, which can only generate - * 8k interrupts per second, there is no point to rate limit and the values - * are set to zero. The rate limit values do affect latency, and so must - * be reasonably small so to not impact latency sensitive tests. + * moderation at the high end (it maxes out at 126us or about 8k interrupts a + * second. */ static const struct ice_dim rx_profile[] = { - {2, 10}, - {8, 16}, - {32, 0}, - {96, 0}, - {128, 0} + {2}, /* 500,000 ints/s, capped at 250K by INTRL */ + {8}, /* 125,000 ints/s */ + {16}, /* 62,500 ints/s */ + {62}, /* 16,129 ints/s */ + {126} /* 7,936 ints/s */ }; /* The transmit profile, which has the same sorts of values * as the previous struct */ static const struct ice_dim tx_profile[] = { - {2, 10}, - {8, 16}, - {64, 0}, - {128, 0}, - {256, 0} + {2}, /* 500,000 ints/s, capped at 250K by INTRL */ + {8}, /* 125,000 ints/s */ + {40}, /* 16,125 ints/s */ + {128}, /* 7,812 ints/s */ + {256} /* 3,906 ints/s */ }; static void ice_tx_dim_work(struct work_struct *work) { struct ice_ring_container *rc; - struct ice_q_vector *q_vector; struct dim *dim; - u16 itr, intrl; + u16 itr; dim = container_of(work, struct dim, work); - rc = container_of(dim, struct ice_ring_container, dim); - q_vector = container_of(rc, struct ice_q_vector, tx); + rc = (struct ice_ring_container *)dim->priv; - if (dim->profile_ix >= ARRAY_SIZE(tx_profile)) - dim->profile_ix = ARRAY_SIZE(tx_profile) - 1; + WARN_ON(dim->profile_ix >= ARRAY_SIZE(tx_profile)); /* look up the values in our local table */ itr = tx_profile[dim->profile_ix].itr; - intrl = tx_profile[dim->profile_ix].intrl; - ice_trace(tx_dim_work, q_vector, dim); + ice_trace(tx_dim_work, container_of(rc, struct ice_q_vector, tx), dim); ice_write_itr(rc, itr); - ice_write_intrl(q_vector, intrl); dim->state = DIM_START_MEASURE; } @@ -5580,28 +5562,65 @@ static void ice_tx_dim_work(struct work_struct *work) static void ice_rx_dim_work(struct work_struct *work) { struct ice_ring_container *rc; - struct ice_q_vector *q_vector; struct dim *dim; - u16 itr, intrl; + u16 itr; dim = container_of(work, struct dim, work); - rc = container_of(dim, struct ice_ring_container, dim); - q_vector = container_of(rc, struct ice_q_vector, rx); + rc = (struct ice_ring_container *)dim->priv; - if (dim->profile_ix >= ARRAY_SIZE(rx_profile)) - dim->profile_ix = ARRAY_SIZE(rx_profile) - 1; + WARN_ON(dim->profile_ix >= ARRAY_SIZE(rx_profile)); /* look up the values in our local table */ itr = rx_profile[dim->profile_ix].itr; - intrl = rx_profile[dim->profile_ix].intrl; - ice_trace(rx_dim_work, q_vector, dim); + ice_trace(rx_dim_work, container_of(rc, struct ice_q_vector, rx), dim); ice_write_itr(rc, itr); - ice_write_intrl(q_vector, intrl); dim->state = DIM_START_MEASURE; } +#define ICE_DIM_DEFAULT_PROFILE_IX 1 + +/** + * ice_init_moderation - set up interrupt moderation + * @q_vector: the vector containing rings to be configured + * + * Set up interrupt moderation registers, with the intent to do the right thing + * when called from reset or from probe, and whether or not dynamic moderation + * is enabled or not. Take special care to write all the registers in both + * dynamic moderation mode or not in order to make sure hardware is in a known + * state. + */ +static void ice_init_moderation(struct ice_q_vector *q_vector) +{ + struct ice_ring_container *rc; + bool tx_dynamic, rx_dynamic; + + rc = &q_vector->tx; + INIT_WORK(&rc->dim.work, ice_tx_dim_work); + rc->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; + rc->dim.profile_ix = ICE_DIM_DEFAULT_PROFILE_IX; + rc->dim.priv = rc; + tx_dynamic = ITR_IS_DYNAMIC(rc); + + /* set the initial TX ITR to match the above */ + ice_write_itr(rc, tx_dynamic ? + tx_profile[rc->dim.profile_ix].itr : rc->itr_setting); + + rc = &q_vector->rx; + INIT_WORK(&rc->dim.work, ice_rx_dim_work); + rc->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; + rc->dim.profile_ix = ICE_DIM_DEFAULT_PROFILE_IX; + rc->dim.priv = rc; + rx_dynamic = ITR_IS_DYNAMIC(rc); + + /* set the initial RX ITR to match the above */ + ice_write_itr(rc, rx_dynamic ? rx_profile[rc->dim.profile_ix].itr : + rc->itr_setting); + + ice_set_q_vector_intrl(q_vector); +} + /** * ice_napi_enable_all - Enable NAPI for all q_vectors in the VSI * @vsi: the VSI being configured @@ -5616,11 +5635,7 @@ static void ice_napi_enable_all(struct ice_vsi *vsi) ice_for_each_q_vector(vsi, q_idx) { struct ice_q_vector *q_vector = vsi->q_vectors[q_idx]; - INIT_WORK(&q_vector->tx.dim.work, ice_tx_dim_work); - q_vector->tx.dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; - - INIT_WORK(&q_vector->rx.dim.work, ice_rx_dim_work); - q_vector->rx.dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; + ice_init_moderation(q_vector); if (q_vector->rx.rx_ring || q_vector->tx.tx_ring) napi_enable(&q_vector->napi); diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 01ae331927bd..1373b97b117a 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -1259,6 +1259,41 @@ construct_skb: return failure ? budget : (int)total_rx_pkts; } +static void __ice_update_sample(struct ice_q_vector *q_vector, + struct ice_ring_container *rc, + struct dim_sample *sample, + bool is_tx) +{ + u64 packets = 0, bytes = 0; + + if (is_tx) { + struct ice_tx_ring *tx_ring; + + ice_for_each_tx_ring(tx_ring, *rc) { + packets += tx_ring->stats.pkts; + bytes += tx_ring->stats.bytes; + } + } else { + struct ice_rx_ring *rx_ring; + + ice_for_each_rx_ring(rx_ring, *rc) { + packets += rx_ring->stats.pkts; + bytes += rx_ring->stats.bytes; + } + } + + dim_update_sample(q_vector->total_events, packets, bytes, sample); + sample->comp_ctr = 0; + + /* if dim settings get stale, like when not updated for 1 + * second or longer, force it to start again. This addresses the + * frequent case of an idle queue being switched to by the + * scheduler. The 1,000 here means 1,000 milliseconds. + */ + if (ktime_ms_delta(sample->time, rc->dim.start_sample.time) >= 1000) + rc->dim.state = DIM_START_MEASURE; +} + /** * ice_net_dim - Update net DIM algorithm * @q_vector: the vector associated with the interrupt @@ -1274,34 +1309,16 @@ static void ice_net_dim(struct ice_q_vector *q_vector) struct ice_ring_container *rx = &q_vector->rx; if (ITR_IS_DYNAMIC(tx)) { - struct dim_sample dim_sample = {}; - u64 packets = 0, bytes = 0; - struct ice_tx_ring *ring; - - ice_for_each_tx_ring(ring, q_vector->tx) { - packets += ring->stats.pkts; - bytes += ring->stats.bytes; - } - - dim_update_sample(q_vector->total_events, packets, bytes, - &dim_sample); + struct dim_sample dim_sample; + __ice_update_sample(q_vector, tx, &dim_sample, true); net_dim(&tx->dim, dim_sample); } if (ITR_IS_DYNAMIC(rx)) { - struct dim_sample dim_sample = {}; - u64 packets = 0, bytes = 0; - struct ice_rx_ring *ring; - - ice_for_each_rx_ring(ring, q_vector->rx) { - packets += ring->stats.pkts; - bytes += ring->stats.bytes; - } - - dim_update_sample(q_vector->total_events, packets, bytes, - &dim_sample); + struct dim_sample dim_sample; + __ice_update_sample(q_vector, rx, &dim_sample, false); net_dim(&rx->dim, dim_sample); } } @@ -1328,15 +1345,14 @@ static u32 ice_buildreg_itr(u16 itr_idx, u16 itr) } /** - * ice_update_ena_itr - Update ITR moderation and re-enable MSI-X interrupt + * ice_enable_interrupt - re-enable MSI-X interrupt * @q_vector: the vector associated with the interrupt to enable * - * Update the net_dim() algorithm and re-enable the interrupt associated with - * this vector. - * - * If the VSI is down, the interrupt will not be re-enabled. + * If the VSI is down, the interrupt will not be re-enabled. Also, + * when enabling the interrupt always reset the wb_on_itr to false + * and trigger a software interrupt to clean out internal state. */ -static void ice_update_ena_itr(struct ice_q_vector *q_vector) +static void ice_enable_interrupt(struct ice_q_vector *q_vector) { struct ice_vsi *vsi = q_vector->vsi; bool wb_en = q_vector->wb_on_itr; @@ -1351,10 +1367,6 @@ static void ice_update_ena_itr(struct ice_q_vector *q_vector) if (wb_en) q_vector->wb_on_itr = false; - /* This will do nothing if dynamic updates are not enabled. */ - ice_net_dim(q_vector); - - /* net_dim() updates ITR out-of-band using a work item */ itr_val = ice_buildreg_itr(ICE_ITR_NONE, 0); /* trigger an immediate software interrupt when exiting * busy poll, to make sure to catch any pending cleanups @@ -1482,10 +1494,12 @@ int ice_napi_poll(struct napi_struct *napi, int budget) /* Exit the polling mode, but don't re-enable interrupts if stack might * poll us due to busy-polling */ - if (likely(napi_complete_done(napi, work_done))) - ice_update_ena_itr(q_vector); - else + if (likely(napi_complete_done(napi, work_done))) { + ice_net_dim(q_vector); + ice_enable_interrupt(q_vector); + } else { ice_set_wb_on_itr(q_vector); + } return min_t(int, work_done, budget - 1); } From d16a4f45f3a3afcb56910a7242cc621c071e80e4 Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Mon, 20 Sep 2021 12:30:13 -0700 Subject: [PATCH 360/440] ice: fix rate limit update after coalesce change If the adaptive settings are changed with ethtool -C ethx adaptive-rx off adaptive-tx off then the interrupt rate limit should be maintained as a user set value, but only if BOTH adaptive settings are off. Fix a bug where the rate limit that was being used in adaptive mode was staying set in the register but was not reported correctly by ethtool -c ethx. Due to long lines include a small refactor of q_vector variable. Fixes: b8b4772377dd ("ice: refactor interrupt moderation writes") Signed-off-by: Jesse Brandeburg Tested-by: Gurucharan G Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_ethtool.c | 17 +++++++++++------ drivers/net/ethernet/intel/ice/ice_lib.c | 4 ++-- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index 6e0af72c2020..f4b3c5b73c7d 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -3640,6 +3640,9 @@ ice_set_rc_coalesce(struct ethtool_coalesce *ec, switch (rc->type) { case ICE_RX_CONTAINER: + { + struct ice_q_vector *q_vector = rc->rx_ring->q_vector; + if (ec->rx_coalesce_usecs_high > ICE_MAX_INTRL || (ec->rx_coalesce_usecs_high && ec->rx_coalesce_usecs_high < pf->hw.intrl_gran)) { @@ -3648,22 +3651,20 @@ ice_set_rc_coalesce(struct ethtool_coalesce *ec, ICE_MAX_INTRL); return -EINVAL; } - if (ec->rx_coalesce_usecs_high != rc->rx_ring->q_vector->intrl && + if (ec->rx_coalesce_usecs_high != q_vector->intrl && (ec->use_adaptive_rx_coalesce || ec->use_adaptive_tx_coalesce)) { netdev_info(vsi->netdev, "Invalid value, %s-usecs-high cannot be changed if adaptive-tx or adaptive-rx is enabled\n", c_type_str); return -EINVAL; } - if (ec->rx_coalesce_usecs_high != rc->rx_ring->q_vector->intrl) { - rc->rx_ring->q_vector->intrl = ec->rx_coalesce_usecs_high; - ice_write_intrl(rc->rx_ring->q_vector, - ec->rx_coalesce_usecs_high); - } + if (ec->rx_coalesce_usecs_high != q_vector->intrl) + q_vector->intrl = ec->rx_coalesce_usecs_high; use_adaptive_coalesce = ec->use_adaptive_rx_coalesce; coalesce_usecs = ec->rx_coalesce_usecs; break; + } case ICE_TX_CONTAINER: use_adaptive_coalesce = ec->use_adaptive_tx_coalesce; coalesce_usecs = ec->tx_coalesce_usecs; @@ -3808,6 +3809,8 @@ __ice_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec, if (ice_set_q_coalesce(vsi, ec, v_idx)) return -EINVAL; + + ice_set_q_vector_intrl(vsi->q_vectors[v_idx]); } goto set_complete; } @@ -3815,6 +3818,8 @@ __ice_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec, if (ice_set_q_coalesce(vsi, ec, q_num)) return -EINVAL; + ice_set_q_vector_intrl(vsi->q_vectors[q_num]); + set_complete: return 0; } diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index fd894e89be3b..231f8bea2519 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -3121,7 +3121,7 @@ ice_vsi_rebuild_set_coalesce(struct ice_vsi *vsi, } vsi->q_vectors[i]->intrl = coalesce[i].intrl; - ice_write_intrl(vsi->q_vectors[i], coalesce[i].intrl); + ice_set_q_vector_intrl(vsi->q_vectors[i]); } /* the number of queue vectors increased so write whatever is in @@ -3139,7 +3139,7 @@ ice_vsi_rebuild_set_coalesce(struct ice_vsi *vsi, ice_write_itr(rc, rc->itr_setting); vsi->q_vectors[i]->intrl = coalesce[0].intrl; - ice_write_intrl(vsi->q_vectors[i], coalesce[0].intrl); + ice_set_q_vector_intrl(vsi->q_vectors[i]); } } From 23be7075b31863da9cf3983aebcc8ff7537c06ac Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Mon, 20 Sep 2021 12:30:14 -0700 Subject: [PATCH 361/440] ice: fix software generating extra interrupts The driver tried to work around missing completion events that occurred while interrupts are disabled, by triggering a software interrupt whenever we exit polling (but we had to have polled at least once). This was causing a *lot* of extra interrupts for some workloads like NVMe over TCP, which resulted in regressions in performance. It was also visible when polling didn't prevent interrupts when busy_poll was enabled. Fix the extra interrupts by utilizing our previously unused 3rd ITR (interrupt throttle) index and set it to 20K interrupts per second, and then trigger a software interrupt within that rate limit. While here, slightly refactor the code to avoid an overwrite of a local variable in the case of wb_en = true. Fixes: b7306b42beaf ("ice: manage interrupts during poll exit") Signed-off-by: Jesse Brandeburg Tested-by: Gurucharan G Signed-off-by: Tony Nguyen --- .../net/ethernet/intel/ice/ice_hw_autogen.h | 1 + drivers/net/ethernet/intel/ice/ice_txrx.c | 26 +++++++++++-------- 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h index 76021d977b60..a49082485642 100644 --- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h +++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h @@ -182,6 +182,7 @@ #define GLINT_DYN_CTL_INTERVAL_S 5 #define GLINT_DYN_CTL_INTERVAL_M ICE_M(0xFFF, 5) #define GLINT_DYN_CTL_SW_ITR_INDX_ENA_M BIT(24) +#define GLINT_DYN_CTL_SW_ITR_INDX_S 25 #define GLINT_DYN_CTL_SW_ITR_INDX_M ICE_M(0x3, 25) #define GLINT_DYN_CTL_WB_ON_ITR_M BIT(30) #define GLINT_DYN_CTL_INTENA_MSK_M BIT(31) diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 1373b97b117a..8f908af9bdd5 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -1361,21 +1361,25 @@ static void ice_enable_interrupt(struct ice_q_vector *q_vector) if (test_bit(ICE_DOWN, vsi->state)) return; - /* When exiting WB_ON_ITR, let ITR resume its normal - * interrupts-enabled path. + /* trigger an ITR delayed software interrupt when exiting busy poll, to + * make sure to catch any pending cleanups that might have been missed + * due to interrupt state transition. If busy poll or poll isn't + * enabled, then don't update ITR, and just enable the interrupt. */ - if (wb_en) + if (!wb_en) { + itr_val = ice_buildreg_itr(ICE_ITR_NONE, 0); + } else { q_vector->wb_on_itr = false; - itr_val = ice_buildreg_itr(ICE_ITR_NONE, 0); - /* trigger an immediate software interrupt when exiting - * busy poll, to make sure to catch any pending cleanups - * that might have been missed due to interrupt state - * transition. - */ - if (wb_en) { + /* do two things here with a single write. Set up the third ITR + * index to be used for software interrupt moderation, and then + * trigger a software interrupt with a rate limit of 20K on + * software interrupts, this will help avoid high interrupt + * loads due to frequently polling and exiting polling. + */ + itr_val = ice_buildreg_itr(ICE_IDX_ITR2, ICE_ITR_20K); itr_val |= GLINT_DYN_CTL_SWINT_TRIG_M | - GLINT_DYN_CTL_SW_ITR_INDX_M | + ICE_IDX_ITR2 << GLINT_DYN_CTL_SW_ITR_INDX_S | GLINT_DYN_CTL_SW_ITR_INDX_ENA_M; } wr32(&vsi->back->hw, GLINT_DYN_CTL(q_vector->reg_idx), itr_val); From 1281b7459657abcd6db74fbfbf9b20717de8966c Mon Sep 17 00:00:00 2001 From: Wojciech Drewek Date: Fri, 15 Oct 2021 10:27:19 +0200 Subject: [PATCH 362/440] ice: Forbid trusted VFs in switchdev mode Merge issues caused the check for switchdev mode has been inserted in wrong place. It should be in ice_set_vf_trust not in ice_set_vf_mac. Trusted VFs are forbidden in switchdev mode because they should be configured only from the host side. Fixes: 1c54c839935b ("ice: enable/disable switchdev when managing VFs") Signed-off-by: Wojciech Drewek Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c index 86f265268ac8..862d191284ba 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c @@ -4839,11 +4839,6 @@ int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) struct ice_vf *vf; int ret; - if (ice_is_eswitch_mode_switchdev(pf)) { - dev_info(ice_pf_to_dev(pf), "Trusted VF is forbidden in switchdev mode\n"); - return -EOPNOTSUPP; - } - if (ice_validate_vf_id(pf, vf_id)) return -EINVAL; @@ -4903,6 +4898,11 @@ int ice_set_vf_trust(struct net_device *netdev, int vf_id, bool trusted) struct ice_vf *vf; int ret; + if (ice_is_eswitch_mode_switchdev(pf)) { + dev_info(ice_pf_to_dev(pf), "Trusted VF is forbidden in switchdev mode\n"); + return -EOPNOTSUPP; + } + if (ice_validate_vf_id(pf, vf_id)) return -EINVAL; From 73b483b7902918cd7de3481313ed4943eb9c3f3b Mon Sep 17 00:00:00 2001 From: Wojciech Drewek Date: Thu, 23 Sep 2021 14:43:48 +0200 Subject: [PATCH 363/440] ice: Manage act flags for switchdev offloads Currently it is not possible to set/unset lb_en and lan_en flags for advanced rules during their creation. Both flags are enabled by default. In case of switchdev offloads for egress traffic we need lb_en to be disabled. Because of that, we work around it by updating the rule immediately after its creation. This change allows us to set/unset those flags right away and it gets rid of old workaround as well. Using ice_adv_rule_flags_info structure we can pass info about flags we want to be set for a given advanced rule. Flags are stored in flags_info.act. Values from act would be used only if act_valid was set to true, otherwise default values would be used. Signed-off-by: Wojciech Drewek Acked-by: Paul Menzel Tested-by: Sandeep Penigalapati Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_fltr.c | 127 -------------------- drivers/net/ethernet/intel/ice/ice_fltr.h | 4 - drivers/net/ethernet/intel/ice/ice_switch.c | 9 +- drivers/net/ethernet/intel/ice/ice_switch.h | 11 ++ drivers/net/ethernet/intel/ice/ice_tc_lib.c | 8 +- 5 files changed, 21 insertions(+), 138 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_fltr.c b/drivers/net/ethernet/intel/ice/ice_fltr.c index f8c59df4ff9c..c2e78eaf4ccb 100644 --- a/drivers/net/ethernet/intel/ice/ice_fltr.c +++ b/drivers/net/ethernet/intel/ice/ice_fltr.c @@ -453,133 +453,6 @@ static u32 ice_fltr_build_action(u16 vsi_id) ICE_SINGLE_ACT_VSI_FORWARDING | ICE_SINGLE_ACT_VALID_BIT; } -/** - * ice_fltr_find_adv_entry - find advanced rule - * @rules: list of rules - * @rule_id: id of wanted rule - */ -static struct ice_adv_fltr_mgmt_list_entry * -ice_fltr_find_adv_entry(struct list_head *rules, u16 rule_id) -{ - struct ice_adv_fltr_mgmt_list_entry *entry; - - list_for_each_entry(entry, rules, list_entry) { - if (entry->rule_info.fltr_rule_id == rule_id) - return entry; - } - - return NULL; -} - -/** - * ice_fltr_update_adv_rule_flags - update flags on advanced rule - * @vsi: pointer to VSI - * @recipe_id: id of recipe - * @entry: advanced rule entry - * @new_flags: flags to update - */ -static enum ice_status -ice_fltr_update_adv_rule_flags(struct ice_vsi *vsi, u16 recipe_id, - struct ice_adv_fltr_mgmt_list_entry *entry, - u32 new_flags) -{ - struct ice_adv_rule_info *info = &entry->rule_info; - struct ice_sw_act_ctrl *act = &info->sw_act; - u32 action; - - if (act->fltr_act != ICE_FWD_TO_VSI) - return ICE_ERR_NOT_SUPPORTED; - - action = ice_fltr_build_action(act->fwd_id.hw_vsi_id); - - return ice_fltr_update_rule_flags(&vsi->back->hw, info->fltr_rule_id, - recipe_id, action, info->sw_act.flag, - act->src, new_flags); -} - -/** - * ice_fltr_find_regular_entry - find regular rule - * @rules: list of rules - * @rule_id: id of wanted rule - */ -static struct ice_fltr_mgmt_list_entry * -ice_fltr_find_regular_entry(struct list_head *rules, u16 rule_id) -{ - struct ice_fltr_mgmt_list_entry *entry; - - list_for_each_entry(entry, rules, list_entry) { - if (entry->fltr_info.fltr_rule_id == rule_id) - return entry; - } - - return NULL; -} - -/** - * ice_fltr_update_regular_rule - update flags on regular rule - * @vsi: pointer to VSI - * @recipe_id: id of recipe - * @entry: regular rule entry - * @new_flags: flags to update - */ -static enum ice_status -ice_fltr_update_regular_rule(struct ice_vsi *vsi, u16 recipe_id, - struct ice_fltr_mgmt_list_entry *entry, - u32 new_flags) -{ - struct ice_fltr_info *info = &entry->fltr_info; - u32 action; - - if (info->fltr_act != ICE_FWD_TO_VSI) - return ICE_ERR_NOT_SUPPORTED; - - action = ice_fltr_build_action(info->fwd_id.hw_vsi_id); - - return ice_fltr_update_rule_flags(&vsi->back->hw, info->fltr_rule_id, - recipe_id, action, info->flag, - info->src, new_flags); -} - -/** - * ice_fltr_update_flags - update flags on rule - * @vsi: pointer to VSI - * @rule_id: id of rule - * @recipe_id: id of recipe - * @new_flags: flags to update - * - * Function updates flags on regular and advance rule. - * - * Flags should be a combination of ICE_SINGLE_ACT_LB_ENABLE and - * ICE_SINGLE_ACT_LAN_ENABLE. - */ -enum ice_status -ice_fltr_update_flags(struct ice_vsi *vsi, u16 rule_id, u16 recipe_id, - u32 new_flags) -{ - struct ice_adv_fltr_mgmt_list_entry *adv_entry; - struct ice_fltr_mgmt_list_entry *regular_entry; - struct ice_hw *hw = &vsi->back->hw; - struct ice_sw_recipe *recp_list; - struct list_head *fltr_rules; - - recp_list = &hw->switch_info->recp_list[recipe_id]; - if (!recp_list) - return ICE_ERR_DOES_NOT_EXIST; - - fltr_rules = &recp_list->filt_rules; - regular_entry = ice_fltr_find_regular_entry(fltr_rules, rule_id); - if (regular_entry) - return ice_fltr_update_regular_rule(vsi, recipe_id, - regular_entry, new_flags); - - adv_entry = ice_fltr_find_adv_entry(fltr_rules, rule_id); - if (adv_entry) - return ice_fltr_update_adv_rule_flags(vsi, recipe_id, - adv_entry, new_flags); - - return ICE_ERR_DOES_NOT_EXIST; -} - /** * ice_fltr_update_flags_dflt_rule - update flags on default rule * @vsi: pointer to VSI diff --git a/drivers/net/ethernet/intel/ice/ice_fltr.h b/drivers/net/ethernet/intel/ice/ice_fltr.h index 949e38ce016c..8eec4febead1 100644 --- a/drivers/net/ethernet/intel/ice/ice_fltr.h +++ b/drivers/net/ethernet/intel/ice/ice_fltr.h @@ -36,10 +36,6 @@ enum ice_status ice_fltr_remove_eth(struct ice_vsi *vsi, u16 ethertype, u16 flag, enum ice_sw_fwd_act_type action); void ice_fltr_remove_all(struct ice_vsi *vsi); - -enum ice_status -ice_fltr_update_flags(struct ice_vsi *vsi, u16 rule_id, u16 recipe_id, - u32 new_flags); enum ice_status ice_fltr_update_flags_dflt_rule(struct ice_vsi *vsi, u16 rule_id, u8 direction, u32 new_flags); diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c index 0d07547b40f1..a4a299012f9f 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.c +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@ -4783,7 +4783,14 @@ ice_add_adv_rule(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups, s_rule = kzalloc(rule_buf_sz, GFP_KERNEL); if (!s_rule) return ICE_ERR_NO_MEMORY; - act |= ICE_SINGLE_ACT_LB_ENABLE | ICE_SINGLE_ACT_LAN_ENABLE; + if (!rinfo->flags_info.act_valid) { + act |= ICE_SINGLE_ACT_LAN_ENABLE; + act |= ICE_SINGLE_ACT_LB_ENABLE; + } else { + act |= rinfo->flags_info.act & (ICE_SINGLE_ACT_LAN_ENABLE | + ICE_SINGLE_ACT_LB_ENABLE); + } + switch (rinfo->sw_act.fltr_act) { case ICE_FWD_TO_VSI: act |= (rinfo->sw_act.fwd_id.hw_vsi_id << diff --git a/drivers/net/ethernet/intel/ice/ice_switch.h b/drivers/net/ethernet/intel/ice/ice_switch.h index 34b7f74b1ab8..d4c0a3b594af 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.h +++ b/drivers/net/ethernet/intel/ice/ice_switch.h @@ -160,11 +160,22 @@ struct ice_rule_query_data { u16 vsi_handle; }; +/* This structure allows to pass info about lb_en and lan_en + * flags to ice_add_adv_rule. Values in act would be used + * only if act_valid was set to true, otherwise default + * values would be used. + */ +struct ice_adv_rule_flags_info { + u32 act; + u8 act_valid; /* indicate if flags in act are valid */ +}; + struct ice_adv_rule_info { struct ice_sw_act_ctrl sw_act; u32 priority; u8 rx; /* true means LOOKUP_RX otherwise LOOKUP_TX */ u16 fltr_rule_id; + struct ice_adv_rule_flags_info flags_info; }; /* A collection of one or more four word recipe */ diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.c b/drivers/net/ethernet/intel/ice/ice_tc_lib.c index 4c1daa1a02a1..1dccfd116bc9 100644 --- a/drivers/net/ethernet/intel/ice/ice_tc_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.c @@ -274,6 +274,8 @@ ice_eswitch_add_tc_fltr(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr) rule_info.sw_act.flag |= ICE_FLTR_TX; rule_info.sw_act.src = vsi->idx; rule_info.rx = false; + rule_info.flags_info.act = ICE_SINGLE_ACT_LAN_ENABLE; + rule_info.flags_info.act_valid = true; } /* specify the cookie as filter_rule_id */ @@ -296,12 +298,6 @@ ice_eswitch_add_tc_fltr(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr) fltr->rid = rule_added.rid; fltr->rule_id = rule_added.rule_id; - if (fltr->direction == ICE_ESWITCH_FLTR_EGRESS) { - if (ice_fltr_update_flags(vsi, fltr->rule_id, fltr->rid, - ICE_SINGLE_ACT_LAN_ENABLE)) - ice_rem_adv_rule_by_id(hw, &rule_added); - } - exit: kfree(list); return ret; From 3f13f570ff2c413dbdf088058a3e019db376f49d Mon Sep 17 00:00:00 2001 From: Wojciech Drewek Date: Fri, 8 Oct 2021 10:44:03 +0200 Subject: [PATCH 364/440] ice: Refactor PR ethtool ops This patch improves a few things: - it fixes issue where ethtool -i reports that PR supports priv-flags and tests when in fact it does not support them - instead of using the same functions for both PF and PR ethtool ops, this patch introduces separate ops for both cases and internal functions with core logic. - prevent accessing VF VSI while VF is not ready by calling ice_check_vf_ready_for_cfg - all PR specific functions in ethtool.c were moved to one place in file - instead overwriting n_priv_flags in ice_repr_get_drvinfo, priv-flags code was moved from __ice_get_drvinfo to ice_get_drvinfo Signed-off-by: Wojciech Drewek Tested-by: Sandeep Penigalapati Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_ethtool.c | 99 +++++++++++++++----- 1 file changed, 74 insertions(+), 25 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index f4b3c5b73c7d..8b3eef6632e9 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -192,7 +192,6 @@ __ice_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo, strscpy(drvinfo->bus_info, pci_name(pf->pdev), sizeof(drvinfo->bus_info)); - drvinfo->n_priv_flags = ICE_PRIV_FLAG_ARRAY_SIZE; } static void @@ -201,18 +200,8 @@ ice_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo) struct ice_netdev_priv *np = netdev_priv(netdev); __ice_get_drvinfo(netdev, drvinfo, np->vsi); -} -static void -ice_repr_get_drvinfo(struct net_device *netdev, - struct ethtool_drvinfo *drvinfo) -{ - struct ice_repr *repr = ice_netdev_to_repr(netdev); - - if (ice_check_vf_ready_for_cfg(repr->vf)) - return; - - __ice_get_drvinfo(netdev, drvinfo, repr->src_vsi); + drvinfo->n_priv_flags = ICE_PRIV_FLAG_ARRAY_SIZE; } static int ice_get_regs_len(struct net_device __always_unused *netdev) @@ -886,10 +875,10 @@ skip_ol_tests: netdev_info(netdev, "testing finished\n"); } -static void ice_get_strings(struct net_device *netdev, u32 stringset, u8 *data) +static void +__ice_get_strings(struct net_device *netdev, u32 stringset, u8 *data, + struct ice_vsi *vsi) { - struct ice_netdev_priv *np = netdev_priv(netdev); - struct ice_vsi *vsi = ice_get_netdev_priv_vsi(np); unsigned int i; u8 *p = data; @@ -940,6 +929,13 @@ static void ice_get_strings(struct net_device *netdev, u32 stringset, u8 *data) } } +static void ice_get_strings(struct net_device *netdev, u32 stringset, u8 *data) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + + __ice_get_strings(netdev, stringset, data, np->vsi); +} + static int ice_set_phys_id(struct net_device *netdev, enum ethtool_phys_id_state state) { @@ -1331,9 +1327,6 @@ static int ice_get_sset_count(struct net_device *netdev, int sset) * order of strings will suffer from race conditions and are * not safe. */ - if (ice_is_port_repr_netdev(netdev)) - return ICE_VSI_STATS_LEN; - return ICE_ALL_STATS_LEN(netdev); case ETH_SS_TEST: return ICE_TEST_LEN; @@ -1345,11 +1338,10 @@ static int ice_get_sset_count(struct net_device *netdev, int sset) } static void -ice_get_ethtool_stats(struct net_device *netdev, - struct ethtool_stats __always_unused *stats, u64 *data) +__ice_get_ethtool_stats(struct net_device *netdev, + struct ethtool_stats __always_unused *stats, u64 *data, + struct ice_vsi *vsi) { - struct ice_netdev_priv *np = netdev_priv(netdev); - struct ice_vsi *vsi = ice_get_netdev_priv_vsi(np); struct ice_pf *pf = vsi->back; struct ice_tx_ring *tx_ring; struct ice_rx_ring *rx_ring; @@ -1416,6 +1408,15 @@ ice_get_ethtool_stats(struct net_device *netdev, } } +static void +ice_get_ethtool_stats(struct net_device *netdev, + struct ethtool_stats __always_unused *stats, u64 *data) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + + __ice_get_ethtool_stats(netdev, stats, data, np->vsi); +} + #define ICE_PHY_TYPE_LOW_MASK_MIN_1G (ICE_PHY_TYPE_LOW_100BASE_TX | \ ICE_PHY_TYPE_LOW_100M_SGMII) @@ -3839,6 +3840,54 @@ ice_set_per_q_coalesce(struct net_device *netdev, u32 q_num, return __ice_set_coalesce(netdev, ec, q_num); } +static void +ice_repr_get_drvinfo(struct net_device *netdev, + struct ethtool_drvinfo *drvinfo) +{ + struct ice_repr *repr = ice_netdev_to_repr(netdev); + + if (ice_check_vf_ready_for_cfg(repr->vf)) + return; + + __ice_get_drvinfo(netdev, drvinfo, repr->src_vsi); +} + +static void +ice_repr_get_strings(struct net_device *netdev, u32 stringset, u8 *data) +{ + struct ice_repr *repr = ice_netdev_to_repr(netdev); + + /* for port representors only ETH_SS_STATS is supported */ + if (ice_check_vf_ready_for_cfg(repr->vf) || + stringset != ETH_SS_STATS) + return; + + __ice_get_strings(netdev, stringset, data, repr->src_vsi); +} + +static void +ice_repr_get_ethtool_stats(struct net_device *netdev, + struct ethtool_stats __always_unused *stats, + u64 *data) +{ + struct ice_repr *repr = ice_netdev_to_repr(netdev); + + if (ice_check_vf_ready_for_cfg(repr->vf)) + return; + + __ice_get_ethtool_stats(netdev, stats, data, repr->src_vsi); +} + +static int ice_repr_get_sset_count(struct net_device *netdev, int sset) +{ + switch (sset) { + case ETH_SS_STATS: + return ICE_VSI_STATS_LEN; + default: + return -EOPNOTSUPP; + } +} + #define ICE_I2C_EEPROM_DEV_ADDR 0xA0 #define ICE_I2C_EEPROM_DEV_ADDR2 0xA2 #define ICE_MODULE_TYPE_SFP 0x03 @@ -4093,9 +4142,9 @@ void ice_set_ethtool_safe_mode_ops(struct net_device *netdev) static const struct ethtool_ops ice_ethtool_repr_ops = { .get_drvinfo = ice_repr_get_drvinfo, .get_link = ethtool_op_get_link, - .get_strings = ice_get_strings, - .get_ethtool_stats = ice_get_ethtool_stats, - .get_sset_count = ice_get_sset_count, + .get_strings = ice_repr_get_strings, + .get_ethtool_stats = ice_repr_get_ethtool_stats, + .get_sset_count = ice_repr_get_sset_count, }; /** From 7c1b694adab1a4a24477c393843730a90f595124 Mon Sep 17 00:00:00 2001 From: Cai Huoqing Date: Wed, 22 Sep 2021 20:59:46 +0800 Subject: [PATCH 365/440] ice: Make use of the helper function devm_add_action_or_reset() The helper function devm_add_action_or_reset() will internally call devm_add_action(), and if devm_add_action() fails then it will execute the action mentioned and return the error code. So use devm_add_action_or_reset() instead of devm_add_action() to simplify the error handling, reduce the code. Signed-off-by: Cai Huoqing Tested-by: Gurucharan G Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_devlink.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_devlink.c b/drivers/net/ethernet/intel/ice/ice_devlink.c index 55353bf4cbef..e3d9f3f3a631 100644 --- a/drivers/net/ethernet/intel/ice/ice_devlink.c +++ b/drivers/net/ethernet/intel/ice/ice_devlink.c @@ -452,10 +452,8 @@ struct ice_pf *ice_allocate_pf(struct device *dev) return NULL; /* Add an action to teardown the devlink when unwinding the driver */ - if (devm_add_action(dev, ice_devlink_free, devlink)) { - devlink_free(devlink); + if (devm_add_action_or_reset(dev, ice_devlink_free, devlink)) return NULL; - } return devlink_priv(devlink); } From 6f3323536aa8895b1e3fc6fe6762e7ddee916de1 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 6 Oct 2021 13:09:08 -0500 Subject: [PATCH 366/440] ice: use devm_kcalloc() instead of devm_kzalloc() Use 2-factor multiplication argument form devm_kcalloc() instead of devm_kzalloc(). Link: https://github.com/KSPP/linux/issues/162 Signed-off-by: Gustavo A. R. Silva Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c | 4 ++-- drivers/net/ethernet/intel/ice/ice_txrx.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c index 16de603b280c..38960bcc384c 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c @@ -706,7 +706,7 @@ ice_create_init_fdir_rule(struct ice_pf *pf, enum ice_fltr_ptype flow) if (!seg) return -ENOMEM; - tun_seg = devm_kzalloc(dev, sizeof(*seg) * ICE_FD_HW_SEG_MAX, + tun_seg = devm_kcalloc(dev, sizeof(*seg), ICE_FD_HW_SEG_MAX, GFP_KERNEL); if (!tun_seg) { devm_kfree(dev, seg); @@ -1068,7 +1068,7 @@ ice_cfg_fdir_xtrct_seq(struct ice_pf *pf, struct ethtool_rx_flow_spec *fsp, if (!seg) return -ENOMEM; - tun_seg = devm_kzalloc(dev, sizeof(*seg) * ICE_FD_HW_SEG_MAX, + tun_seg = devm_kcalloc(dev, sizeof(*seg), ICE_FD_HW_SEG_MAX, GFP_KERNEL); if (!tun_seg) { devm_kfree(dev, seg); diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 8f908af9bdd5..bc3ba19dc88f 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -343,7 +343,7 @@ int ice_setup_tx_ring(struct ice_tx_ring *tx_ring) /* warn if we are about to overwrite the pointer */ WARN_ON(tx_ring->tx_buf); tx_ring->tx_buf = - devm_kzalloc(dev, sizeof(*tx_ring->tx_buf) * tx_ring->count, + devm_kcalloc(dev, sizeof(*tx_ring->tx_buf), tx_ring->count, GFP_KERNEL); if (!tx_ring->tx_buf) return -ENOMEM; @@ -475,7 +475,7 @@ int ice_setup_rx_ring(struct ice_rx_ring *rx_ring) /* warn if we are about to overwrite the pointer */ WARN_ON(rx_ring->rx_buf); rx_ring->rx_buf = - devm_kzalloc(dev, sizeof(*rx_ring->rx_buf) * rx_ring->count, + devm_kcalloc(dev, sizeof(*rx_ring->rx_buf), rx_ring->count, GFP_KERNEL); if (!rx_ring->rx_buf) return -ENOMEM; From 8702ed0b0de146c980883896b4cbb40ce860142e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 13 Oct 2021 11:00:12 +0300 Subject: [PATCH 367/440] ice: fix an error code in ice_ena_vfs() Return the error code if ice_eswitch_configure() fails. Don't return success. Fixes: 1c54c839935b ("ice: enable/disable switchdev when managing VFs") Signed-off-by: Dan Carpenter Reviewed-by: Simon Horman Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c index 862d191284ba..8e3f9ec4e35b 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c @@ -2015,7 +2015,8 @@ static int ice_ena_vfs(struct ice_pf *pf, u16 num_vfs) clear_bit(ICE_VF_DIS, pf->state); - if (ice_eswitch_configure(pf)) + ret = ice_eswitch_configure(pf); + if (ret) goto err_unroll_sriov; return 0; From 4c57e2fac41cefa49583b9836677e5b59cbe9f64 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 18 Oct 2021 17:34:01 -0700 Subject: [PATCH 368/440] net: sched: fix logic error in qdisc_run_begin() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For non TCQ_F_NOLOCK qdisc, qdisc_run_begin() tries to set __QDISC_STATE_RUNNING and should return true if the bit was not set. test_and_set_bit() returns old bit value, therefore we need to invert. Fixes: 29cbcd858283 ("net: sched: Remove Qdisc::running sequence counter") Signed-off-by: Eric Dumazet Cc: Ahmed S. Darwish Tested-by: Ido Schimmel Acked-by: Sebastian Andrzej Siewior Tested-by: Toke Høiland-Jørgensen Signed-off-by: Jakub Kicinski --- include/net/sch_generic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index baad2ab4d971..e0988c56dd8f 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -217,7 +217,7 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc) */ return spin_trylock(&qdisc->seqlock); } - return test_and_set_bit(__QDISC_STATE_RUNNING, &qdisc->state); + return !test_and_set_bit(__QDISC_STATE_RUNNING, &qdisc->state); } static inline void qdisc_run_end(struct Qdisc *qdisc) From 97604c65bcda4b0e953cb86dae5335632e199f94 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 18 Oct 2021 17:34:02 -0700 Subject: [PATCH 369/440] net: sched: remove one pair of atomic operations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit __QDISC_STATE_RUNNING is only set/cleared from contexts owning qdisc lock. Thus we can use less expensive bit operations, as we were doing before commit f9eb8aea2a1e ("net_sched: transform qdisc running bit into a seqcount") Fixes: 29cbcd858283 ("net: sched: Remove Qdisc::running sequence counter") Signed-off-by: Eric Dumazet Cc: Ahmed S. Darwish Acked-by: Sebastian Andrzej Siewior Tested-by: Toke Høiland-Jørgensen Signed-off-by: Jakub Kicinski --- include/net/sch_generic.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index e0988c56dd8f..ada02c4a4f51 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -38,10 +38,13 @@ enum qdisc_state_t { __QDISC_STATE_DEACTIVATED, __QDISC_STATE_MISSED, __QDISC_STATE_DRAINING, +}; + +enum qdisc_state2_t { /* Only for !TCQ_F_NOLOCK qdisc. Never access it directly. * Use qdisc_run_begin/end() or qdisc_is_running() instead. */ - __QDISC_STATE_RUNNING, + __QDISC_STATE2_RUNNING, }; #define QDISC_STATE_MISSED BIT(__QDISC_STATE_MISSED) @@ -114,6 +117,7 @@ struct Qdisc { struct gnet_stats_basic_sync bstats; struct gnet_stats_queue qstats; unsigned long state; + unsigned long state2; /* must be written under qdisc spinlock */ struct Qdisc *next_sched; struct sk_buff_head skb_bad_txq; @@ -154,7 +158,7 @@ static inline bool qdisc_is_running(struct Qdisc *qdisc) { if (qdisc->flags & TCQ_F_NOLOCK) return spin_is_locked(&qdisc->seqlock); - return test_bit(__QDISC_STATE_RUNNING, &qdisc->state); + return test_bit(__QDISC_STATE2_RUNNING, &qdisc->state2); } static inline bool nolock_qdisc_is_empty(const struct Qdisc *qdisc) @@ -217,7 +221,7 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc) */ return spin_trylock(&qdisc->seqlock); } - return !test_and_set_bit(__QDISC_STATE_RUNNING, &qdisc->state); + return !__test_and_set_bit(__QDISC_STATE2_RUNNING, &qdisc->state2); } static inline void qdisc_run_end(struct Qdisc *qdisc) @@ -229,7 +233,7 @@ static inline void qdisc_run_end(struct Qdisc *qdisc) &qdisc->state))) __netif_schedule(qdisc); } else { - clear_bit(__QDISC_STATE_RUNNING, &qdisc->state); + __clear_bit(__QDISC_STATE2_RUNNING, &qdisc->state2); } } From 7a414b6e1a1c799fa74451db3a84c686b727bd7e Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 18 Oct 2021 22:29:49 +0300 Subject: [PATCH 370/440] dt-bindings: net: dsa: sja1105: fix example so all ports have a phy-handle of fixed-link All ports require either a phy-handle or a fixed-link, and port 3 in the example didn't have one. Add it. Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/dsa/nxp,sja1105.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/net/dsa/nxp,sja1105.yaml b/Documentation/devicetree/bindings/net/dsa/nxp,sja1105.yaml index f978f8719d8e..f97a22772e6f 100644 --- a/Documentation/devicetree/bindings/net/dsa/nxp,sja1105.yaml +++ b/Documentation/devicetree/bindings/net/dsa/nxp,sja1105.yaml @@ -113,6 +113,7 @@ examples: }; port@3 { + phy-handle = <&rgmii_phy4>; phy-mode = "rgmii-id"; reg = <3>; }; From e00eb643324c7a57f4f3fcb2bc5e882f4b88a3a1 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 18 Oct 2021 22:29:50 +0300 Subject: [PATCH 371/440] dt-bindings: net: dsa: inherit the ethernet-controller DT schema Since a switch is basically a bunch of Ethernet controllers, just inherit the common schema for one to get stronger type validation of the properties of a port. For example, before this change it was valid to have a phy-mode = "xfi" even if "xfi" is not part of ethernet-controller.yaml, now it is not. Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/dsa/dsa.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/devicetree/bindings/net/dsa/dsa.yaml b/Documentation/devicetree/bindings/net/dsa/dsa.yaml index 224cfa45de9a..9cfd08cd31da 100644 --- a/Documentation/devicetree/bindings/net/dsa/dsa.yaml +++ b/Documentation/devicetree/bindings/net/dsa/dsa.yaml @@ -46,6 +46,9 @@ patternProperties: type: object description: Ethernet switch ports + allOf: + - $ref: "http://devicetree.org/schemas/net/ethernet-controller.yaml#" + properties: reg: description: Port number From ac41ac81e331319a1773d5a6a6744e8ce7389f08 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 18 Oct 2021 22:29:51 +0300 Subject: [PATCH 372/440] dt-bindings: net: dsa: sja1105: add {rx,tx}-internal-delay-ps Add a schema validator to nxp,sja1105.yaml and to dsa.yaml for explicit MAC-level RGMII delays. These properties must be per port and must be present only for a phy-mode that represents RGMII. We tell dsa.yaml that these port properties might be present, we also define their valid values for SJA1105. We create a common definition for the RX and TX valid range, since it's quite a mouthful. We also modify the example to include the explicit RGMII delay properties. On the fixed-link ports (in the example, port 4), having these explicit delays is actually mandatory, since with the new behavior, the driver shouts that it is interpreting what delays to apply based on phy-mode. Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- .../devicetree/bindings/net/dsa/dsa.yaml | 4 ++ .../bindings/net/dsa/nxp,sja1105.yaml | 42 +++++++++++++++++++ 2 files changed, 46 insertions(+) diff --git a/Documentation/devicetree/bindings/net/dsa/dsa.yaml b/Documentation/devicetree/bindings/net/dsa/dsa.yaml index 9cfd08cd31da..2ad7f79ad371 100644 --- a/Documentation/devicetree/bindings/net/dsa/dsa.yaml +++ b/Documentation/devicetree/bindings/net/dsa/dsa.yaml @@ -97,6 +97,10 @@ patternProperties: managed: true + rx-internal-delay-ps: true + + tx-internal-delay-ps: true + required: - reg diff --git a/Documentation/devicetree/bindings/net/dsa/nxp,sja1105.yaml b/Documentation/devicetree/bindings/net/dsa/nxp,sja1105.yaml index f97a22772e6f..24cd733c11d1 100644 --- a/Documentation/devicetree/bindings/net/dsa/nxp,sja1105.yaml +++ b/Documentation/devicetree/bindings/net/dsa/nxp,sja1105.yaml @@ -74,10 +74,42 @@ properties: - compatible - reg +patternProperties: + "^(ethernet-)?ports$": + patternProperties: + "^(ethernet-)?port@[0-9]+$": + allOf: + - if: + properties: + phy-mode: + contains: + enum: + - rgmii + - rgmii-rxid + - rgmii-txid + - rgmii-id + then: + properties: + rx-internal-delay-ps: + $ref: "#/$defs/internal-delay-ps" + tx-internal-delay-ps: + $ref: "#/$defs/internal-delay-ps" + required: - compatible - reg +$defs: + internal-delay-ps: + description: + Disable tunable delay lines using 0 ps, or enable them and select + the phase between 1640 ps (73.8 degree shift at 1Gbps) and 2260 ps + (101.7 degree shift) in increments of 0.9 degrees (20 ps). + enum: + [0, 1640, 1660, 1680, 1700, 1720, 1740, 1760, 1780, 1800, 1820, 1840, + 1860, 1880, 1900, 1920, 1940, 1960, 1980, 2000, 2020, 2040, 2060, 2080, + 2100, 2120, 2140, 2160, 2180, 2200, 2220, 2240, 2260] + unevaluatedProperties: false examples: @@ -97,30 +129,40 @@ examples: port@0 { phy-handle = <&rgmii_phy6>; phy-mode = "rgmii-id"; + rx-internal-delay-ps = <0>; + tx-internal-delay-ps = <0>; reg = <0>; }; port@1 { phy-handle = <&rgmii_phy3>; phy-mode = "rgmii-id"; + rx-internal-delay-ps = <0>; + tx-internal-delay-ps = <0>; reg = <1>; }; port@2 { phy-handle = <&rgmii_phy4>; phy-mode = "rgmii-id"; + rx-internal-delay-ps = <0>; + tx-internal-delay-ps = <0>; reg = <2>; }; port@3 { phy-handle = <&rgmii_phy4>; phy-mode = "rgmii-id"; + rx-internal-delay-ps = <0>; + tx-internal-delay-ps = <0>; reg = <3>; }; port@4 { ethernet = <&enet2>; phy-mode = "rgmii"; + rx-internal-delay-ps = <0>; + tx-internal-delay-ps = <0>; reg = <4>; fixed-link { From 9ca482a246f017773c6ae9b39a02f51dfcc956d7 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 18 Oct 2021 22:29:52 +0300 Subject: [PATCH 373/440] net: dsa: sja1105: parse {rx, tx}-internal-delay-ps properties for RGMII delays This change does not fix any functional issue or address any real life use case that wasn't possible before. It is just a small step in the process of standardizing the way in which Ethernet MAC drivers may apply RGMII delays (traditionally these have been applied by PHYs, with no clear definition of what to do in the case of a fixed-link). The sja1105 driver used to apply MAC-level RGMII delays on the RX data lines when in fixed-link mode and using a phy-mode of "rgmii-rxid" or "rgmii-id" and on the TX data lines when using "rgmii-txid" or "rgmii-id". But the standard definitions don't say anything about behaving differently when the port is in fixed-link vs when it isn't, and the new device tree bindings are about having a way of applying the delays in a way that is independent of the phy-mode and of the fixed-link property. When the {rx,tx}-internal-delay-ps properties are present, use them, otherwise fall back to the old behavior and warn. One other thing to note is that the SJA1105 hardware applies a delay value in degrees rather than in picoseconds (the delay in ps changes depending on the frequency of the RGMII clock - 125 MHz at 1G, 25 MHz at 100M, 2.5MHz at 10M). I assume that is fine, we calculate the phase shift of the internal delay lines assuming that the device tree meant gigabit, and we let the hardware scale those according to the link speed. Link: https://patchwork.kernel.org/project/netdevbpf/patch/20210723173108.459770-6-prasanna.vengateshan@microchip.com/ Link: https://patchwork.ozlabs.org/project/netdev/patch/20200616074955.GA9092@laureti-dev/#2461123 Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/sja1105/sja1105.h | 25 +++++- drivers/net/dsa/sja1105/sja1105_clocking.c | 35 ++++---- drivers/net/dsa/sja1105/sja1105_main.c | 94 ++++++++++++++++------ 3 files changed, 107 insertions(+), 47 deletions(-) diff --git a/drivers/net/dsa/sja1105/sja1105.h b/drivers/net/dsa/sja1105/sja1105.h index 618c8d6a8be1..808419f3b808 100644 --- a/drivers/net/dsa/sja1105/sja1105.h +++ b/drivers/net/dsa/sja1105/sja1105.h @@ -20,6 +20,27 @@ #define SJA1105_AGEING_TIME_MS(ms) ((ms) / 10) #define SJA1105_NUM_L2_POLICERS SJA1110_MAX_L2_POLICING_COUNT +/* Calculated assuming 1Gbps, where the clock has 125 MHz (8 ns period) + * To avoid floating point operations, we'll multiply the degrees by 10 + * to get a "phase" and get 1 decimal point precision. + */ +#define SJA1105_RGMII_DELAY_PS_TO_PHASE(ps) \ + (((ps) * 360) / 800) +#define SJA1105_RGMII_DELAY_PHASE_TO_PS(phase) \ + ((800 * (phase)) / 360) +#define SJA1105_RGMII_DELAY_PHASE_TO_HW(phase) \ + (((phase) - 738) / 9) +#define SJA1105_RGMII_DELAY_PS_TO_HW(ps) \ + SJA1105_RGMII_DELAY_PHASE_TO_HW(SJA1105_RGMII_DELAY_PS_TO_PHASE(ps)) + +/* Valid range in degrees is a value between 73.8 and 101.7 + * in 0.9 degree increments + */ +#define SJA1105_RGMII_DELAY_MIN_PS \ + SJA1105_RGMII_DELAY_PHASE_TO_PS(738) +#define SJA1105_RGMII_DELAY_MAX_PS \ + SJA1105_RGMII_DELAY_PHASE_TO_PS(1017) + typedef enum { SPI_READ = 0, SPI_WRITE = 1, @@ -222,8 +243,8 @@ struct sja1105_flow_block { struct sja1105_private { struct sja1105_static_config static_config; - bool rgmii_rx_delay[SJA1105_MAX_NUM_PORTS]; - bool rgmii_tx_delay[SJA1105_MAX_NUM_PORTS]; + int rgmii_rx_delay_ps[SJA1105_MAX_NUM_PORTS]; + int rgmii_tx_delay_ps[SJA1105_MAX_NUM_PORTS]; phy_interface_t phy_mode[SJA1105_MAX_NUM_PORTS]; bool fixed_link[SJA1105_MAX_NUM_PORTS]; unsigned long ucast_egress_floods; diff --git a/drivers/net/dsa/sja1105/sja1105_clocking.c b/drivers/net/dsa/sja1105/sja1105_clocking.c index 5bbf1707f2af..e3699f76f6d7 100644 --- a/drivers/net/dsa/sja1105/sja1105_clocking.c +++ b/drivers/net/dsa/sja1105/sja1105_clocking.c @@ -498,17 +498,6 @@ sja1110_cfg_pad_mii_id_packing(void *buf, struct sja1105_cfg_pad_mii_id *cmd, sja1105_packing(buf, &cmd->txc_pd, 0, 0, size, op); } -/* Valid range in degrees is an integer between 73.8 and 101.7 */ -static u64 sja1105_rgmii_delay(u64 phase) -{ - /* UM11040.pdf: The delay in degree phase is 73.8 + delay_tune * 0.9. - * To avoid floating point operations we'll multiply by 10 - * and get 1 decimal point precision. - */ - phase *= 10; - return (phase - 738) / 9; -} - /* The RGMII delay setup procedure is 2-step and gets called upon each * .phylink_mac_config. Both are strategic. * The reason is that the RX Tunable Delay Line of the SJA1105 MAC has issues @@ -521,13 +510,15 @@ int sja1105pqrs_setup_rgmii_delay(const void *ctx, int port) const struct sja1105_private *priv = ctx; const struct sja1105_regs *regs = priv->info->regs; struct sja1105_cfg_pad_mii_id pad_mii_id = {0}; + int rx_delay = priv->rgmii_rx_delay_ps[port]; + int tx_delay = priv->rgmii_tx_delay_ps[port]; u8 packed_buf[SJA1105_SIZE_CGU_CMD] = {0}; int rc; - if (priv->rgmii_rx_delay[port]) - pad_mii_id.rxc_delay = sja1105_rgmii_delay(90); - if (priv->rgmii_tx_delay[port]) - pad_mii_id.txc_delay = sja1105_rgmii_delay(90); + if (rx_delay) + pad_mii_id.rxc_delay = SJA1105_RGMII_DELAY_PS_TO_HW(rx_delay); + if (tx_delay) + pad_mii_id.txc_delay = SJA1105_RGMII_DELAY_PS_TO_HW(tx_delay); /* Stage 1: Turn the RGMII delay lines off. */ pad_mii_id.rxc_bypass = 1; @@ -542,11 +533,11 @@ int sja1105pqrs_setup_rgmii_delay(const void *ctx, int port) return rc; /* Stage 2: Turn the RGMII delay lines on. */ - if (priv->rgmii_rx_delay[port]) { + if (rx_delay) { pad_mii_id.rxc_bypass = 0; pad_mii_id.rxc_pd = 0; } - if (priv->rgmii_tx_delay[port]) { + if (tx_delay) { pad_mii_id.txc_bypass = 0; pad_mii_id.txc_pd = 0; } @@ -561,20 +552,22 @@ int sja1110_setup_rgmii_delay(const void *ctx, int port) const struct sja1105_private *priv = ctx; const struct sja1105_regs *regs = priv->info->regs; struct sja1105_cfg_pad_mii_id pad_mii_id = {0}; + int rx_delay = priv->rgmii_rx_delay_ps[port]; + int tx_delay = priv->rgmii_tx_delay_ps[port]; u8 packed_buf[SJA1105_SIZE_CGU_CMD] = {0}; pad_mii_id.rxc_pd = 1; pad_mii_id.txc_pd = 1; - if (priv->rgmii_rx_delay[port]) { - pad_mii_id.rxc_delay = sja1105_rgmii_delay(90); + if (rx_delay) { + pad_mii_id.rxc_delay = SJA1105_RGMII_DELAY_PS_TO_HW(rx_delay); /* The "BYPASS" bit in SJA1110 is actually a "don't bypass" */ pad_mii_id.rxc_bypass = 1; pad_mii_id.rxc_pd = 0; } - if (priv->rgmii_tx_delay[port]) { - pad_mii_id.txc_delay = sja1105_rgmii_delay(90); + if (tx_delay) { + pad_mii_id.txc_delay = SJA1105_RGMII_DELAY_PS_TO_HW(tx_delay); pad_mii_id.txc_bypass = 1; pad_mii_id.txc_pd = 0; } diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index 0f1bba0076a8..1832d4bd3440 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -1109,27 +1109,78 @@ static int sja1105_static_config_load(struct sja1105_private *priv) return sja1105_static_config_upload(priv); } -static int sja1105_parse_rgmii_delays(struct sja1105_private *priv) +/* This is the "new way" for a MAC driver to configure its RGMII delay lines, + * based on the explicit "rx-internal-delay-ps" and "tx-internal-delay-ps" + * properties. It has the advantage of working with fixed links and with PHYs + * that apply RGMII delays too, and the MAC driver needs not perform any + * special checks. + * + * Previously we were acting upon the "phy-mode" property when we were + * operating in fixed-link, basically acting as a PHY, but with a reversed + * interpretation: PHY_INTERFACE_MODE_RGMII_TXID means that the MAC should + * behave as if it is connected to a PHY which has applied RGMII delays in the + * TX direction. So if anything, RX delays should have been added by the MAC, + * but we were adding TX delays. + * + * If the "{rx,tx}-internal-delay-ps" properties are not specified, we fall + * back to the legacy behavior and apply delays on fixed-link ports based on + * the reverse interpretation of the phy-mode. This is a deviation from the + * expected default behavior which is to simply apply no delays. To achieve + * that behavior with the new bindings, it is mandatory to specify + * "{rx,tx}-internal-delay-ps" with a value of 0. + */ +static int sja1105_parse_rgmii_delays(struct sja1105_private *priv, int port, + struct device_node *port_dn) { - struct dsa_switch *ds = priv->ds; - int port; + phy_interface_t phy_mode = priv->phy_mode[port]; + struct device *dev = &priv->spidev->dev; + int rx_delay = -1, tx_delay = -1; - for (port = 0; port < ds->num_ports; port++) { - if (!priv->fixed_link[port]) - continue; + if (!phy_interface_mode_is_rgmii(phy_mode)) + return 0; - if (priv->phy_mode[port] == PHY_INTERFACE_MODE_RGMII_RXID || - priv->phy_mode[port] == PHY_INTERFACE_MODE_RGMII_ID) - priv->rgmii_rx_delay[port] = true; + of_property_read_u32(port_dn, "rx-internal-delay-ps", &rx_delay); + of_property_read_u32(port_dn, "tx-internal-delay-ps", &tx_delay); - if (priv->phy_mode[port] == PHY_INTERFACE_MODE_RGMII_TXID || - priv->phy_mode[port] == PHY_INTERFACE_MODE_RGMII_ID) - priv->rgmii_tx_delay[port] = true; + if (rx_delay == -1 && tx_delay == -1 && priv->fixed_link[port]) { + dev_warn(dev, + "Port %d interpreting RGMII delay settings based on \"phy-mode\" property, " + "please update device tree to specify \"rx-internal-delay-ps\" and " + "\"tx-internal-delay-ps\"", + port); - if ((priv->rgmii_rx_delay[port] || priv->rgmii_tx_delay[port]) && - !priv->info->setup_rgmii_delay) - return -EINVAL; + if (phy_mode == PHY_INTERFACE_MODE_RGMII_RXID || + phy_mode == PHY_INTERFACE_MODE_RGMII_ID) + rx_delay = 2000; + + if (phy_mode == PHY_INTERFACE_MODE_RGMII_TXID || + phy_mode == PHY_INTERFACE_MODE_RGMII_ID) + tx_delay = 2000; } + + if (rx_delay < 0) + rx_delay = 0; + if (tx_delay < 0) + tx_delay = 0; + + if ((rx_delay || tx_delay) && !priv->info->setup_rgmii_delay) { + dev_err(dev, "Chip cannot apply RGMII delays\n"); + return -EINVAL; + } + + if ((rx_delay && rx_delay < SJA1105_RGMII_DELAY_MIN_PS) || + (tx_delay && tx_delay < SJA1105_RGMII_DELAY_MIN_PS) || + (rx_delay > SJA1105_RGMII_DELAY_MAX_PS) || + (tx_delay > SJA1105_RGMII_DELAY_MAX_PS)) { + dev_err(dev, + "port %d RGMII delay values out of range, must be between %d and %d ps\n", + port, SJA1105_RGMII_DELAY_MIN_PS, SJA1105_RGMII_DELAY_MAX_PS); + return -ERANGE; + } + + priv->rgmii_rx_delay_ps[port] = rx_delay; + priv->rgmii_tx_delay_ps[port] = tx_delay; + return 0; } @@ -1180,6 +1231,10 @@ static int sja1105_parse_ports_node(struct sja1105_private *priv, } priv->phy_mode[index] = phy_mode; + + err = sja1105_parse_rgmii_delays(priv, index, child); + if (err) + return err; } return 0; @@ -3317,15 +3372,6 @@ static int sja1105_probe(struct spi_device *spi) return rc; } - /* Error out early if internal delays are required through DT - * and we can't apply them. - */ - rc = sja1105_parse_rgmii_delays(priv); - if (rc < 0) { - dev_err(ds->dev, "RGMII delay not supported\n"); - return rc; - } - if (IS_ENABLED(CONFIG_NET_SCH_CBS)) { priv->cbs = devm_kcalloc(dev, priv->info->num_cbs_shapers, sizeof(struct sja1105_cbs_entry), From 414c6a3c84d7f0633d2b37de061b35387c951304 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 19 Oct 2021 08:00:06 -0700 Subject: [PATCH 374/440] ethernet: netsec: use eth_hw_addr_set() Commit 406f42fa0d3c ("net-next: When a bond have a massive amount of VLANs...") introduced a rbtree for faster Ethernet address look up. To maintain netdev->dev_addr in this tree we need to make all the writes to it got through appropriate helpers. Read the address into an array on the stack, then call eth_hw_addr_set(). Signed-off-by: Jakub Kicinski Acked-by: Ilias Apalodimas Signed-off-by: David S. Miller --- drivers/net/ethernet/socionext/netsec.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c index baa9f5d1c549..de7d8bf2c226 100644 --- a/drivers/net/ethernet/socionext/netsec.c +++ b/drivers/net/ethernet/socionext/netsec.c @@ -2037,13 +2037,15 @@ static int netsec_probe(struct platform_device *pdev) if (ret && priv->eeprom_base) { void __iomem *macp = priv->eeprom_base + NETSEC_EEPROM_MAC_ADDRESS; + u8 addr[ETH_ALEN]; - ndev->dev_addr[0] = readb(macp + 3); - ndev->dev_addr[1] = readb(macp + 2); - ndev->dev_addr[2] = readb(macp + 1); - ndev->dev_addr[3] = readb(macp + 0); - ndev->dev_addr[4] = readb(macp + 7); - ndev->dev_addr[5] = readb(macp + 6); + addr[0] = readb(macp + 3); + addr[1] = readb(macp + 2); + addr[2] = readb(macp + 1); + addr[3] = readb(macp + 0); + addr[4] = readb(macp + 7); + addr[5] = readb(macp + 6); + eth_hw_addr_set(ndev, addr); } if (!is_valid_ether_addr(ndev->dev_addr)) { From 7f9b8fe5445cc3644a42c085421b3f123d78cf6e Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 19 Oct 2021 08:00:07 -0700 Subject: [PATCH 375/440] ethernet: stmmac: use eth_hw_addr_set() Commit 406f42fa0d3c ("net-next: When a bond have a massive amount of VLANs...") introduced a rbtree for faster Ethernet address look up. To maintain netdev->dev_addr in this tree we need to make all the writes to it got through appropriate helpers. Read the address into an array on the stack, then call eth_hw_addr_set(). Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index b720539ccb0a..88d3053d2f87 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -2818,9 +2818,13 @@ static int stmmac_get_hw_features(struct stmmac_priv *priv) */ static void stmmac_check_ether_addr(struct stmmac_priv *priv) { + u8 addr[ETH_ALEN]; + if (!is_valid_ether_addr(priv->dev->dev_addr)) { - stmmac_get_umac_addr(priv, priv->hw, priv->dev->dev_addr, 0); - if (!is_valid_ether_addr(priv->dev->dev_addr)) + stmmac_get_umac_addr(priv, priv->hw, addr, 0); + if (is_valid_ether_addr(addr)) + eth_hw_addr_set(priv->dev, addr); + else eth_hw_addr_random(priv->dev); dev_info(priv->device, "device MAC address %pM\n", priv->dev->dev_addr); From 3d9c64ca52d5a94a49df900e53ff1e60895720dd Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 19 Oct 2021 08:00:08 -0700 Subject: [PATCH 376/440] ethernet: tehuti: use eth_hw_addr_set() Commit 406f42fa0d3c ("net-next: When a bond have a massive amount of VLANs...") introduced a rbtree for faster Ethernet address look up. To maintain netdev->dev_addr in this tree we need to make all the writes to it got through appropriate helpers. Break the address up into an array on the stack, then call eth_hw_addr_set(). Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/tehuti/tehuti.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/tehuti/tehuti.c b/drivers/net/ethernet/tehuti/tehuti.c index 3e8a3fde8302..0775a5542f2f 100644 --- a/drivers/net/ethernet/tehuti/tehuti.c +++ b/drivers/net/ethernet/tehuti/tehuti.c @@ -840,6 +840,7 @@ static int bdx_set_mac(struct net_device *ndev, void *p) static int bdx_read_mac(struct bdx_priv *priv) { u16 macAddress[3], i; + u8 addr[ETH_ALEN]; ENTER; macAddress[2] = READ_REG(priv, regUNC_MAC0_A); @@ -849,9 +850,10 @@ static int bdx_read_mac(struct bdx_priv *priv) macAddress[0] = READ_REG(priv, regUNC_MAC2_A); macAddress[0] = READ_REG(priv, regUNC_MAC2_A); for (i = 0; i < 3; i++) { - priv->ndev->dev_addr[i * 2 + 1] = macAddress[i]; - priv->ndev->dev_addr[i * 2] = macAddress[i] >> 8; + addr[i * 2 + 1] = macAddress[i]; + addr[i * 2] = macAddress[i] >> 8; } + eth_hw_addr_set(priv->ndev, addr); RET(0); } From 41a19eb084f0466f667c5784feed503edcf95412 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 19 Oct 2021 08:00:09 -0700 Subject: [PATCH 377/440] ethernet: tlan: use eth_hw_addr_set() Commit 406f42fa0d3c ("net-next: When a bond have a massive amount of VLANs...") introduced a rbtree for faster Ethernet address look up. To maintain netdev->dev_addr in this tree we need to make all the writes to it got through appropriate helpers. Read the address into an array on the stack, do the swapping, then call eth_hw_addr_set(). Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/tlan.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/ti/tlan.c b/drivers/net/ethernet/ti/tlan.c index eab7d78d7c72..741c42c6a417 100644 --- a/drivers/net/ethernet/ti/tlan.c +++ b/drivers/net/ethernet/ti/tlan.c @@ -817,6 +817,7 @@ static int tlan_init(struct net_device *dev) int err; int i; struct tlan_priv *priv; + u8 addr[ETH_ALEN]; priv = netdev_priv(dev); @@ -842,7 +843,7 @@ static int tlan_init(struct net_device *dev) for (i = 0; i < ETH_ALEN; i++) err |= tlan_ee_read_byte(dev, (u8) priv->adapter->addr_ofs + i, - (u8 *) &dev->dev_addr[i]); + addr + i); if (err) { pr_err("%s: Error reading MAC from eeprom: %d\n", dev->name, err); @@ -850,11 +851,12 @@ static int tlan_init(struct net_device *dev) /* Olicom OC-2325/OC-2326 have the address byte-swapped */ if (priv->adapter->addr_ofs == 0xf8) { for (i = 0; i < ETH_ALEN; i += 2) { - char tmp = dev->dev_addr[i]; - dev->dev_addr[i] = dev->dev_addr[i + 1]; - dev->dev_addr[i + 1] = tmp; + char tmp = addr[i]; + addr[i] = addr[i + 1]; + addr[i + 1] = tmp; } } + eth_hw_addr_set(dev, addr); netif_carrier_off(dev); From 83f262babddeca1a7c71477402b778ea0171fa9e Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 19 Oct 2021 08:00:10 -0700 Subject: [PATCH 378/440] ethernet: via-rhine: use eth_hw_addr_set() Commit 406f42fa0d3c ("net-next: When a bond have a massive amount of VLANs...") introduced a rbtree for faster Ethernet address look up. To maintain netdev->dev_addr in this tree we need to make all the writes to it got through appropriate helpers. Read the address into an array on the stack, then call eth_hw_addr_set(). Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/via/via-rhine.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/via/via-rhine.c b/drivers/net/ethernet/via/via-rhine.c index 3b73a9c55a5a..509c5e9b29df 100644 --- a/drivers/net/ethernet/via/via-rhine.c +++ b/drivers/net/ethernet/via/via-rhine.c @@ -899,6 +899,7 @@ static int rhine_init_one_common(struct device *hwdev, u32 quirks, struct net_device *dev; struct rhine_private *rp; int i, rc, phy_id; + u8 addr[ETH_ALEN]; const char *name; /* this should always be supported */ @@ -933,7 +934,8 @@ static int rhine_init_one_common(struct device *hwdev, u32 quirks, rhine_hw_init(dev, pioaddr); for (i = 0; i < 6; i++) - dev->dev_addr[i] = ioread8(ioaddr + StationAddr + i); + addr[i] = ioread8(ioaddr + StationAddr + i); + eth_hw_addr_set(dev, addr); if (!is_valid_ether_addr(dev->dev_addr)) { /* Report it and use a random ethernet address instead */ From 0b271c48d9c5ca999ca6174a0e9a260c6c312f10 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 19 Oct 2021 08:00:11 -0700 Subject: [PATCH 379/440] ethernet: via-velocity: use eth_hw_addr_set() Commit 406f42fa0d3c ("net-next: When a bond have a massive amount of VLANs...") introduced a rbtree for faster Ethernet address look up. To maintain netdev->dev_addr in this tree we need to make all the writes to it got through appropriate helpers. Read the address into an array on the stack, then call eth_hw_addr_set(). Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/via/via-velocity.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/via/via-velocity.c b/drivers/net/ethernet/via/via-velocity.c index 4b9c30f735b5..be2b992f24d9 100644 --- a/drivers/net/ethernet/via/via-velocity.c +++ b/drivers/net/ethernet/via/via-velocity.c @@ -2767,6 +2767,7 @@ static int velocity_probe(struct device *dev, int irq, struct velocity_info *vptr; struct mac_regs __iomem *regs; int ret = -ENOMEM; + u8 addr[ETH_ALEN]; /* FIXME: this driver, like almost all other ethernet drivers, * can support more than MAX_UNITS. @@ -2820,7 +2821,8 @@ static int velocity_probe(struct device *dev, int irq, mac_wol_reset(regs); for (i = 0; i < 6; i++) - netdev->dev_addr[i] = readb(®s->PAR[i]); + addr[i] = readb(®s->PAR[i]); + eth_hw_addr_set(netdev, addr); velocity_get_options(&vptr->options, velocity_nics); From 040e926f5813a5f4cc18dbff7c942d1e52f368f2 Mon Sep 17 00:00:00 2001 From: Ansuel Smith Date: Tue, 19 Oct 2021 02:08:50 +0200 Subject: [PATCH 380/440] net: dsa: qca8k: tidy for loop in setup and add cpu port check Tidy and organize qca8k setup function from multiple for loop. Change for loop in bridge leave/join to scan all port and skip cpu port. No functional change intended. Signed-off-by: Ansuel Smith Signed-off-by: David S. Miller --- drivers/net/dsa/qca8k.c | 74 ++++++++++++++++++++++++----------------- 1 file changed, 44 insertions(+), 30 deletions(-) diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c index 0992790da6b6..ea7f12778922 100644 --- a/drivers/net/dsa/qca8k.c +++ b/drivers/net/dsa/qca8k.c @@ -1122,28 +1122,34 @@ qca8k_setup(struct dsa_switch *ds) if (ret) dev_warn(priv->dev, "mib init failed"); - /* Enable QCA header mode on the cpu port */ - ret = qca8k_write(priv, QCA8K_REG_PORT_HDR_CTRL(cpu_port), - QCA8K_PORT_HDR_CTRL_ALL << QCA8K_PORT_HDR_CTRL_TX_S | - QCA8K_PORT_HDR_CTRL_ALL << QCA8K_PORT_HDR_CTRL_RX_S); - if (ret) { - dev_err(priv->dev, "failed enabling QCA header mode"); - return ret; - } - - /* Disable forwarding by default on all ports */ + /* Initial setup of all ports */ for (i = 0; i < QCA8K_NUM_PORTS; i++) { + /* Disable forwarding by default on all ports */ ret = qca8k_rmw(priv, QCA8K_PORT_LOOKUP_CTRL(i), QCA8K_PORT_LOOKUP_MEMBER, 0); if (ret) return ret; + + /* Enable QCA header mode on all cpu ports */ + if (dsa_is_cpu_port(ds, i)) { + ret = qca8k_write(priv, QCA8K_REG_PORT_HDR_CTRL(i), + QCA8K_PORT_HDR_CTRL_ALL << QCA8K_PORT_HDR_CTRL_TX_S | + QCA8K_PORT_HDR_CTRL_ALL << QCA8K_PORT_HDR_CTRL_RX_S); + if (ret) { + dev_err(priv->dev, "failed enabling QCA header mode"); + return ret; + } + } + + /* Disable MAC by default on all user ports */ + if (dsa_is_user_port(ds, i)) + qca8k_port_set_status(priv, i, 0); } - /* Disable MAC by default on all ports */ - for (i = 1; i < QCA8K_NUM_PORTS; i++) - qca8k_port_set_status(priv, i, 0); - - /* Forward all unknown frames to CPU port for Linux processing */ + /* Forward all unknown frames to CPU port for Linux processing + * Notice that in multi-cpu config only one port should be set + * for igmp, unknown, multicast and broadcast packet + */ ret = qca8k_write(priv, QCA8K_REG_GLOBAL_FW_CTRL1, BIT(cpu_port) << QCA8K_GLOBAL_FW_CTRL1_IGMP_DP_S | BIT(cpu_port) << QCA8K_GLOBAL_FW_CTRL1_BC_DP_S | @@ -1152,11 +1158,13 @@ qca8k_setup(struct dsa_switch *ds) if (ret) return ret; - /* Setup connection between CPU port & user ports */ + /* Setup connection between CPU port & user ports + * Configure specific switch configuration for ports + */ for (i = 0; i < QCA8K_NUM_PORTS; i++) { /* CPU port gets connected to all user ports of the switch */ if (dsa_is_cpu_port(ds, i)) { - ret = qca8k_rmw(priv, QCA8K_PORT_LOOKUP_CTRL(cpu_port), + ret = qca8k_rmw(priv, QCA8K_PORT_LOOKUP_CTRL(i), QCA8K_PORT_LOOKUP_MEMBER, dsa_user_ports(ds)); if (ret) return ret; @@ -1193,16 +1201,14 @@ qca8k_setup(struct dsa_switch *ds) if (ret) return ret; } - } - /* The port 5 of the qca8337 have some problem in flood condition. The - * original legacy driver had some specific buffer and priority settings - * for the different port suggested by the QCA switch team. Add this - * missing settings to improve switch stability under load condition. - * This problem is limited to qca8337 and other qca8k switch are not affected. - */ - if (priv->switch_id == QCA8K_ID_QCA8337) { - for (i = 0; i < QCA8K_NUM_PORTS; i++) { + /* The port 5 of the qca8337 have some problem in flood condition. The + * original legacy driver had some specific buffer and priority settings + * for the different port suggested by the QCA switch team. Add this + * missing settings to improve switch stability under load condition. + * This problem is limited to qca8337 and other qca8k switch are not affected. + */ + if (priv->switch_id == QCA8K_ID_QCA8337) { switch (i) { /* The 2 CPU port and port 5 requires some different * priority than any other ports. @@ -1238,6 +1244,12 @@ qca8k_setup(struct dsa_switch *ds) QCA8K_PORT_HOL_CTRL1_WRED_EN, mask); } + + /* Set initial MTU for every port. + * We have only have a general MTU setting. So track + * every port and set the max across all port. + */ + priv->port_mtu[i] = ETH_FRAME_LEN + ETH_FCS_LEN; } /* Special GLOBAL_FC_THRESH value are needed for ar8327 switch */ @@ -1251,8 +1263,6 @@ qca8k_setup(struct dsa_switch *ds) } /* Setup our port MTUs to match power on defaults */ - for (i = 0; i < QCA8K_NUM_PORTS; i++) - priv->port_mtu[i] = ETH_FRAME_LEN + ETH_FCS_LEN; ret = qca8k_write(priv, QCA8K_MAX_FRAME_SIZE, ETH_FRAME_LEN + ETH_FCS_LEN); if (ret) dev_warn(priv->dev, "failed setting MTU settings"); @@ -1728,7 +1738,9 @@ qca8k_port_bridge_join(struct dsa_switch *ds, int port, struct net_device *br) cpu_port = dsa_to_port(ds, port)->cpu_dp->index; port_mask = BIT(cpu_port); - for (i = 1; i < QCA8K_NUM_PORTS; i++) { + for (i = 0; i < QCA8K_NUM_PORTS; i++) { + if (dsa_is_cpu_port(ds, i)) + continue; if (dsa_to_port(ds, i)->bridge_dev != br) continue; /* Add this port to the portvlan mask of the other ports @@ -1758,7 +1770,9 @@ qca8k_port_bridge_leave(struct dsa_switch *ds, int port, struct net_device *br) cpu_port = dsa_to_port(ds, port)->cpu_dp->index; - for (i = 1; i < QCA8K_NUM_PORTS; i++) { + for (i = 0; i < QCA8K_NUM_PORTS; i++) { + if (dsa_is_cpu_port(ds, i)) + continue; if (dsa_to_port(ds, i)->bridge_dev != br) continue; /* Remove this port to the portvlan mask of the other ports From 818a76a55d6e9e7ec7568ae580390e941da84b62 Mon Sep 17 00:00:00 2001 From: Tim Gardner Date: Tue, 19 Oct 2021 06:19:25 -0600 Subject: [PATCH 381/440] soc: fsl: dpio: Unsigned compared against 0 in qbman_swp_set_irq_coalescing() Coverity complains of unsigned compare against 0. There are 2 cases in this function: 1821 itp = (irq_holdoff * 1000) / p->desc->qman_256_cycles_per_ns; CID 121131 (#1 of 1): Unsigned compared against 0 (NO_EFFECT) unsigned_compare: This less-than-zero comparison of an unsigned value is never true. itp < 0U. 1822 if (itp < 0 || itp > 4096) { 1823 max_holdoff = (p->desc->qman_256_cycles_per_ns * 4096) / 1000; 1824 pr_err("irq_holdoff must be between 0..%dus\n", max_holdoff); 1825 return -EINVAL; 1826 } 1827 unsigned_compare: This less-than-zero comparison of an unsigned value is never true. irq_threshold < 0U. 1828 if (irq_threshold >= p->dqrr.dqrr_size || irq_threshold < 0) { 1829 pr_err("irq_threshold must be between 0..%d\n", 1830 p->dqrr.dqrr_size - 1); 1831 return -EINVAL; 1832 } Fix this by removing the comparisons altogether as they are incorrect. Zero is a possible value in either case. Also fix a minor comment typo and update the 2 pr_err() calls to use %u formatting as well as be more precise regarding the exact error. Fixes: ed1d2143fee5 ("soc: fsl: dpio: add support for irq coalescing per software portal") Cc: Ioana Ciornei Cc: Roy Pledge Cc: Li Yang Cc: linux-kernel@vger.kernel.org Cc: linuxppc-dev@lists.ozlabs.org Cc: linux-arm-kernel@lists.infradead.org Cc: netdev@vger.kernel.org Signed-off-by: Tim Gardner Tested-by: Ioana Ciornei Reviewed-by: Ioana Ciornei Signed-off-by: David S. Miller --- drivers/soc/fsl/dpio/qbman-portal.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/soc/fsl/dpio/qbman-portal.c b/drivers/soc/fsl/dpio/qbman-portal.c index d3c58df6240d..3474bf5f88d5 100644 --- a/drivers/soc/fsl/dpio/qbman-portal.c +++ b/drivers/soc/fsl/dpio/qbman-portal.c @@ -1816,18 +1816,17 @@ int qbman_swp_set_irq_coalescing(struct qbman_swp *p, u32 irq_threshold, u32 itp, max_holdoff; /* Convert irq_holdoff value from usecs to 256 QBMAN clock cycles - * increments. This depends to the QBMAN internal frequency. + * increments. This depends on the QBMAN internal frequency. */ itp = (irq_holdoff * 1000) / p->desc->qman_256_cycles_per_ns; - if (itp < 0 || itp > 4096) { + if (itp > 4096) { max_holdoff = (p->desc->qman_256_cycles_per_ns * 4096) / 1000; - pr_err("irq_holdoff must be between 0..%dus\n", max_holdoff); + pr_err("irq_holdoff must be <= %uus\n", max_holdoff); return -EINVAL; } - if (irq_threshold >= p->dqrr.dqrr_size || irq_threshold < 0) { - pr_err("irq_threshold must be between 0..%d\n", - p->dqrr.dqrr_size - 1); + if (irq_threshold >= p->dqrr.dqrr_size) { + pr_err("irq_threshold must be < %u\n", p->dqrr.dqrr_size - 1); return -EINVAL; } From 47ce5f1e3e4e87f141310c975497a4de87f49ab9 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 19 Oct 2021 09:30:07 -0700 Subject: [PATCH 382/440] batman-adv: prepare for const netdev->dev_addr netdev->dev_addr will be constant soon, make sure the qualifier is propagated thru batman-adv. Signed-off-by: Jakub Kicinski Acked-by: Sven Eckelmann Signed-off-by: David S. Miller --- net/batman-adv/bridge_loop_avoidance.c | 14 +++++++------- net/batman-adv/routing.c | 3 ++- net/batman-adv/tp_meter.c | 2 +- net/batman-adv/tvlv.c | 4 ++-- net/batman-adv/tvlv.h | 4 ++-- 5 files changed, 14 insertions(+), 13 deletions(-) diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 1669744304c5..7242b32fff80 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -254,7 +254,7 @@ batadv_claim_hash_find(struct batadv_priv *bat_priv, * Return: backbone gateway if found or NULL otherwise */ static struct batadv_bla_backbone_gw * -batadv_backbone_hash_find(struct batadv_priv *bat_priv, u8 *addr, +batadv_backbone_hash_find(struct batadv_priv *bat_priv, const u8 *addr, unsigned short vid) { struct batadv_hashtable *hash = bat_priv->bla.backbone_hash; @@ -336,7 +336,7 @@ batadv_bla_del_backbone_claims(struct batadv_bla_backbone_gw *backbone_gw) * @vid: the VLAN ID * @claimtype: the type of the claim (CLAIM, UNCLAIM, ANNOUNCE, ...) */ -static void batadv_bla_send_claim(struct batadv_priv *bat_priv, u8 *mac, +static void batadv_bla_send_claim(struct batadv_priv *bat_priv, const u8 *mac, unsigned short vid, int claimtype) { struct sk_buff *skb; @@ -488,7 +488,7 @@ static void batadv_bla_loopdetect_report(struct work_struct *work) * Return: the (possibly created) backbone gateway or NULL on error */ static struct batadv_bla_backbone_gw * -batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, u8 *orig, +batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, const u8 *orig, unsigned short vid, bool own_backbone) { struct batadv_bla_backbone_gw *entry; @@ -926,7 +926,7 @@ static bool batadv_handle_request(struct batadv_priv *bat_priv, */ static bool batadv_handle_unclaim(struct batadv_priv *bat_priv, struct batadv_hard_iface *primary_if, - u8 *backbone_addr, u8 *claim_addr, + const u8 *backbone_addr, const u8 *claim_addr, unsigned short vid) { struct batadv_bla_backbone_gw *backbone_gw; @@ -964,7 +964,7 @@ static bool batadv_handle_unclaim(struct batadv_priv *bat_priv, */ static bool batadv_handle_claim(struct batadv_priv *bat_priv, struct batadv_hard_iface *primary_if, - u8 *backbone_addr, u8 *claim_addr, + const u8 *backbone_addr, const u8 *claim_addr, unsigned short vid) { struct batadv_bla_backbone_gw *backbone_gw; @@ -2126,7 +2126,7 @@ batadv_bla_claim_dump_entry(struct sk_buff *msg, u32 portid, struct batadv_hard_iface *primary_if, struct batadv_bla_claim *claim) { - u8 *primary_addr = primary_if->net_dev->dev_addr; + const u8 *primary_addr = primary_if->net_dev->dev_addr; u16 backbone_crc; bool is_own; void *hdr; @@ -2294,7 +2294,7 @@ batadv_bla_backbone_dump_entry(struct sk_buff *msg, u32 portid, struct batadv_hard_iface *primary_if, struct batadv_bla_backbone_gw *backbone_gw) { - u8 *primary_addr = primary_if->net_dev->dev_addr; + const u8 *primary_addr = primary_if->net_dev->dev_addr; u16 backbone_crc; bool is_own; int msecs; diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 970d0d7ccc98..83f31494ea4d 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -747,7 +747,8 @@ batadv_reroute_unicast_packet(struct batadv_priv *bat_priv, struct sk_buff *skb, struct batadv_orig_node *orig_node = NULL; struct batadv_hard_iface *primary_if = NULL; bool ret = false; - u8 *orig_addr, orig_ttvn; + const u8 *orig_addr; + u8 orig_ttvn; if (batadv_is_my_client(bat_priv, dst_addr, vid)) { primary_if = batadv_primary_if_get_selected(bat_priv); diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c index 56b9fe97b3b4..fbcb15c7c29b 100644 --- a/net/batman-adv/tp_meter.c +++ b/net/batman-adv/tp_meter.c @@ -631,9 +631,9 @@ static void batadv_tp_recv_ack(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node = NULL; const struct batadv_icmp_tp_packet *icmp; struct batadv_tp_vars *tp_vars; + const unsigned char *dev_addr; size_t packet_len, mss; u32 rtt, recv_ack, cwnd; - unsigned char *dev_addr; packet_len = BATADV_TP_PLEN; mss = BATADV_TP_PLEN; diff --git a/net/batman-adv/tvlv.c b/net/batman-adv/tvlv.c index 992773376e51..0cb58eb04093 100644 --- a/net/batman-adv/tvlv.c +++ b/net/batman-adv/tvlv.c @@ -587,8 +587,8 @@ void batadv_tvlv_handler_unregister(struct batadv_priv *bat_priv, * @tvlv_value: tvlv content * @tvlv_value_len: tvlv content length */ -void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, u8 *src, - u8 *dst, u8 type, u8 version, +void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, const u8 *src, + const u8 *dst, u8 type, u8 version, void *tvlv_value, u16 tvlv_value_len) { struct batadv_unicast_tvlv_packet *unicast_tvlv_packet; diff --git a/net/batman-adv/tvlv.h b/net/batman-adv/tvlv.h index 54f2a35653d0..4cf8af00fc11 100644 --- a/net/batman-adv/tvlv.h +++ b/net/batman-adv/tvlv.h @@ -42,8 +42,8 @@ int batadv_tvlv_containers_process(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, u8 *src, u8 *dst, void *tvlv_buff, u16 tvlv_buff_len); -void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, u8 *src, - u8 *dst, u8 type, u8 version, +void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, const u8 *src, + const u8 *dst, u8 type, u8 version, void *tvlv_value, u16 tvlv_value_len); #endif /* _NET_BATMAN_ADV_TVLV_H_ */ From 659f4e02f15aa0ab61555717978219f87505ff5a Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 19 Oct 2021 09:36:05 -0700 Subject: [PATCH 383/440] mac802154: use dev_addr_set() Commit 406f42fa0d3c ("net-next: When a bond have a massive amount of VLANs...") introduced a rbtree for faster Ethernet address look up. To maintain netdev->dev_addr in this tree we need to make all the writes to it got through appropriate helpers. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/mac802154/iface.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac802154/iface.c b/net/mac802154/iface.c index 323d3d2d986f..3210dec64a6a 100644 --- a/net/mac802154/iface.c +++ b/net/mac802154/iface.c @@ -129,7 +129,7 @@ static int mac802154_wpan_mac_addr(struct net_device *dev, void *p) if (!ieee802154_is_valid_extended_unicast_addr(extended_addr)) return -EINVAL; - memcpy(dev->dev_addr, addr->sa_data, dev->addr_len); + dev_addr_set(dev, addr->sa_data); sdata->wpan_dev.extended_addr = extended_addr; /* update lowpan interface mac address when From 08bb7516e53038a9f6609f621024b047de4c2d3e Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 19 Oct 2021 09:36:06 -0700 Subject: [PATCH 384/440] mac802154: use dev_addr_set() - manual Commit 406f42fa0d3c ("net-next: When a bond have a massive amount of VLANs...") introduced a rbtree for faster Ethernet address look up. To maintain netdev->dev_addr in this tree we need to make all the writes to it got through appropriate helpers. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/ieee802154/6lowpan/core.c | 2 +- net/mac802154/iface.c | 15 ++++++++------- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c index 3297e7fa9945..2cf62718a282 100644 --- a/net/ieee802154/6lowpan/core.c +++ b/net/ieee802154/6lowpan/core.c @@ -157,7 +157,7 @@ static int lowpan_newlink(struct net *src_net, struct net_device *ldev, lowpan_802154_dev(ldev)->wdev = wdev; /* Set the lowpan hardware address to the wpan hardware address. */ - memcpy(ldev->dev_addr, wdev->dev_addr, IEEE802154_ADDR_LEN); + __dev_addr_set(ldev, wdev->dev_addr, IEEE802154_ADDR_LEN); /* We need headroom for possible wpan_dev_hard_header call and tailroom * for encryption/fcs handling. The lowpan interface will replace * the IPv6 header with 6LoWPAN header. At worst case the 6LoWPAN diff --git a/net/mac802154/iface.c b/net/mac802154/iface.c index 3210dec64a6a..500ed1b81250 100644 --- a/net/mac802154/iface.c +++ b/net/mac802154/iface.c @@ -136,8 +136,7 @@ static int mac802154_wpan_mac_addr(struct net_device *dev, void *p) * wpan mac has been changed */ if (sdata->wpan_dev.lowpan_dev) - memcpy(sdata->wpan_dev.lowpan_dev->dev_addr, dev->dev_addr, - dev->addr_len); + dev_addr_set(sdata->wpan_dev.lowpan_dev, dev->dev_addr); return mac802154_wpan_update_llsec(dev); } @@ -615,6 +614,7 @@ ieee802154_if_add(struct ieee802154_local *local, const char *name, unsigned char name_assign_type, enum nl802154_iftype type, __le64 extended_addr) { + u8 addr[IEEE802154_EXTENDED_ADDR_LEN]; struct net_device *ndev = NULL; struct ieee802154_sub_if_data *sdata = NULL; int ret; @@ -638,11 +638,12 @@ ieee802154_if_add(struct ieee802154_local *local, const char *name, switch (type) { case NL802154_IFTYPE_NODE: ndev->type = ARPHRD_IEEE802154; - if (ieee802154_is_valid_extended_unicast_addr(extended_addr)) - ieee802154_le64_to_be64(ndev->dev_addr, &extended_addr); - else - memcpy(ndev->dev_addr, ndev->perm_addr, - IEEE802154_EXTENDED_ADDR_LEN); + if (ieee802154_is_valid_extended_unicast_addr(extended_addr)) { + ieee802154_le64_to_be64(addr, &extended_addr); + dev_addr_set(ndev, addr); + } else { + dev_addr_set(ndev, ndev->perm_addr); + } break; case NL802154_IFTYPE_MONITOR: ndev->type = ARPHRD_IEEE802154_MONITOR; From 0f00e70ef645e43527a1b622721ae084149c21ca Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 19 Oct 2021 09:39:27 -0700 Subject: [PATCH 385/440] batman-adv: use eth_hw_addr_set() instead of ether_addr_copy() Commit 406f42fa0d3c ("net-next: When a bond have a massive amount of VLANs...") introduced a rbtree for faster Ethernet address look up. To maintain netdev->dev_addr in this tree we need to make all the writes to it got through appropriate helpers. Convert batman from ether_addr_copy() to eth_hw_addr_set(): @@ expression dev, np; @@ - ether_addr_copy(dev->dev_addr, np) + eth_hw_addr_set(dev, np) Signed-off-by: Jakub Kicinski Acked-by: Sven Eckelmann Signed-off-by: David S. Miller --- net/batman-adv/soft-interface.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 0604b0279573..7ee09337fc40 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -134,7 +134,7 @@ static int batadv_interface_set_mac_addr(struct net_device *dev, void *p) return -EADDRNOTAVAIL; ether_addr_copy(old_addr, dev->dev_addr); - ether_addr_copy(dev->dev_addr, addr->sa_data); + eth_hw_addr_set(dev, addr->sa_data); /* only modify transtable if it has been initialized before */ if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE) From 5b92be649605504e1019a1ad0c95b0d74a4e2be1 Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Tue, 19 Oct 2021 09:42:28 -0700 Subject: [PATCH 386/440] net-core: use netdev_* calls for kernel messages While loading a driver and changing the number of queues, I noticed this message in the kernel log: "[253489.070080] Number of in use tx queues changed invalidating tc mappings. Priority traffic classification disabled!" But I had no idea what interface was being talked about because this message used pr_warn(). After investigating, it appears we can use the netdev_* helpers already defined to create predictably formatted messages, and that already handle cases, in more of the messages in dev.c. After this change, this message (and others) will look like this: "[ 170.181093] ice 0000:3b:00.0 ens785f0: Number of in use tx queues changed invalidating tc mappings. Priority traffic classification disabled!" One goal here was not to change the message significantly from the original format so as to not break user's expectations, so I just changed messages that used pr_* and generally started with %s == dev->name. Signed-off-by: Jesse Brandeburg Signed-off-by: David S. Miller --- net/core/dev.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 26cdf971ca95..4e3d19a06de4 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1296,8 +1296,8 @@ rollback: old_assign_type = NET_NAME_RENAMED; goto rollback; } else { - pr_err("%s: name change rollback failed: %d\n", - dev->name, ret); + netdev_err(dev, "name change rollback failed: %d\n", + ret); } } @@ -2351,7 +2351,7 @@ static void netif_setup_tc(struct net_device *dev, unsigned int txq) /* If TC0 is invalidated disable TC mapping */ if (tc->offset + tc->count > txq) { - pr_warn("Number of in use tx queues changed invalidating tc mappings. Priority traffic classification disabled!\n"); + netdev_warn(dev, "Number of in use tx queues changed invalidating tc mappings. Priority traffic classification disabled!\n"); dev->num_tc = 0; return; } @@ -2362,8 +2362,8 @@ static void netif_setup_tc(struct net_device *dev, unsigned int txq) tc = &dev->tc_to_txq[q]; if (tc->offset + tc->count > txq) { - pr_warn("Number of in use tx queues changed. Priority %i to tc mapping %i is no longer valid. Setting map to 0\n", - i, q); + netdev_warn(dev, "Number of in use tx queues changed. Priority %i to tc mapping %i is no longer valid. Setting map to 0\n", + i, q); netdev_set_prio_tc_map(dev, i, 0); } } @@ -3416,7 +3416,7 @@ EXPORT_SYMBOL(__skb_gso_segment); #ifdef CONFIG_BUG static void do_netdev_rx_csum_fault(struct net_device *dev, struct sk_buff *skb) { - pr_err("%s: hw csum failure\n", dev ? dev->name : ""); + netdev_err(dev, "hw csum failure\n"); skb_dump(KERN_ERR, skb, true); dump_stack(); } @@ -7012,8 +7012,8 @@ static int __napi_poll(struct napi_struct *n, bool *repoll) } if (unlikely(work > weight)) - pr_err_once("NAPI poll function %pS returned %d, exceeding its budget of %d.\n", - n->poll, work, weight); + netdev_err_once(n->dev, "NAPI poll function %pS returned %d, exceeding its budget of %d.\n", + n->poll, work, weight); if (likely(work < weight)) return work; @@ -8567,8 +8567,7 @@ static int __dev_set_promiscuity(struct net_device *dev, int inc, bool notify) dev->flags &= ~IFF_PROMISC; else { dev->promiscuity -= inc; - pr_warn("%s: promiscuity touches roof, set promiscuity failed. promiscuity feature of device might be broken.\n", - dev->name); + netdev_warn(dev, "promiscuity touches roof, set promiscuity failed. promiscuity feature of device might be broken.\n"); return -EOVERFLOW; } } @@ -8638,8 +8637,7 @@ static int __dev_set_allmulti(struct net_device *dev, int inc, bool notify) dev->flags &= ~IFF_ALLMULTI; else { dev->allmulti -= inc; - pr_warn("%s: allmulti touches roof, set allmulti failed. allmulti feature of device might be broken.\n", - dev->name); + netdev_warn(dev, "allmulti touches roof, set allmulti failed. allmulti feature of device might be broken.\n"); return -EOVERFLOW; } } From cd4bc63de774eee95e9bac26a565cd80e0fca421 Mon Sep 17 00:00:00 2001 From: Tim Gardner Date: Tue, 19 Oct 2021 12:19:50 -0600 Subject: [PATCH 387/440] net: enetc: unmap DMA in enetc_send_cmd() Coverity complains of a possible dereference of a null return value. 5. returned_null: kzalloc returns NULL. [show details] 6. var_assigned: Assigning: si_data = NULL return value from kzalloc. 488 si_data = kzalloc(data_size, __GFP_DMA | GFP_KERNEL); 489 cbd.length = cpu_to_le16(data_size); 490 491 dma = dma_map_single(&priv->si->pdev->dev, si_data, 492 data_size, DMA_FROM_DEVICE); While this kzalloc() is unlikely to fail, I did notice that the function returned without unmapping si_data. Fix this by refactoring the error paths and checking for kzalloc() failure. Fixes: 888ae5a3952ba ("net: enetc: add tc flower psfp offload driver") Cc: Claudiu Manoil Cc: "David S. Miller" Cc: Jakub Kicinski Cc: netdev@vger.kernel.org Cc: linux-kernel@vger.kernel.org (open list) Signed-off-by: Tim Gardner Acked-by: Claudiu Manoil Signed-off-by: David S. Miller --- .../net/ethernet/freescale/enetc/enetc_qos.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc_qos.c b/drivers/net/ethernet/freescale/enetc/enetc_qos.c index 4577226d3c6a..0536d2c76fbc 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_qos.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_qos.c @@ -486,14 +486,16 @@ static int enetc_streamid_hw_set(struct enetc_ndev_priv *priv, data_size = sizeof(struct streamid_data); si_data = kzalloc(data_size, __GFP_DMA | GFP_KERNEL); + if (!si_data) + return -ENOMEM; cbd.length = cpu_to_le16(data_size); dma = dma_map_single(&priv->si->pdev->dev, si_data, data_size, DMA_FROM_DEVICE); if (dma_mapping_error(&priv->si->pdev->dev, dma)) { netdev_err(priv->si->ndev, "DMA mapping failed!\n"); - kfree(si_data); - return -ENOMEM; + err = -ENOMEM; + goto out; } cbd.addr[0] = cpu_to_le32(lower_32_bits(dma)); @@ -512,12 +514,10 @@ static int enetc_streamid_hw_set(struct enetc_ndev_priv *priv, err = enetc_send_cmd(priv->si, &cbd); if (err) - return -EINVAL; + goto out; - if (!enable) { - kfree(si_data); - return 0; - } + if (!enable) + goto out; /* Enable the entry overwrite again incase space flushed by hardware */ memset(&cbd, 0, sizeof(cbd)); @@ -560,6 +560,10 @@ static int enetc_streamid_hw_set(struct enetc_ndev_priv *priv, } err = enetc_send_cmd(priv->si, &cbd); +out: + if (!dma_mapping_error(&priv->si->pdev->dev, dma)) + dma_unmap_single(&priv->si->pdev->dev, dma, data_size, DMA_FROM_DEVICE); + kfree(si_data); return err; From 2641b62d2fab52648e34cdc6994b2eacde2d27c1 Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Tue, 19 Oct 2021 21:16:47 +0200 Subject: [PATCH 388/440] phy: micrel: ksz8041nl: do not use power down mode Some Micrel KSZ8041NL PHY chips exhibit continuous RX errors after using the power down mode bit (0.11). If the PHY is taken out of power down mode in a certain temperature range, the PHY enters a weird state which leads to continuously reporting RX errors. In that state, the MAC is not able to receive or send any Ethernet frames and the activity LED is constantly blinking. Since Linux is using the suspend callback when the interface is taken down, ending up in that state can easily happen during a normal startup. Micrel confirmed the issue in errata DS80000700A [*], caused by abnormal clock recovery when using power down mode. Even the latest revision (A4, Revision ID 0x1513) seems to suffer that problem, and according to the errata is not going to be fixed. Remove the suspend/resume callback to avoid using the power down mode completely. [*] https://ww1.microchip.com/downloads/en/DeviceDoc/80000700A.pdf Fixes: 1a5465f5d6a2 ("phy/micrel: Add suspend/resume support to Micrel PHYs") Signed-off-by: Stefan Agner Acked-by: Marcel Ziswiler Signed-off-by: Francesco Dolcini Signed-off-by: David S. Miller --- drivers/net/phy/micrel.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index ff452669130a..44a24b99c894 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -1676,8 +1676,9 @@ static struct phy_driver ksphy_driver[] = { .get_sset_count = kszphy_get_sset_count, .get_strings = kszphy_get_strings, .get_stats = kszphy_get_stats, - .suspend = genphy_suspend, - .resume = genphy_resume, + /* No suspend/resume callbacks because of errata DS80000700A, + * receiver error following software power down. + */ }, { .phy_id = PHY_ID_KSZ8041RNLI, .phy_id_mask = MICREL_PHY_ID_MASK, From dfcb63ce1de6b10ba98dee928f9463f37e5a5512 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Tue, 19 Oct 2021 19:47:09 +0200 Subject: [PATCH 389/440] fq_codel: generalise ce_threshold marking for subset of traffic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit e72aeb9ee0e3 ("fq_codel: implement L4S style ce_threshold_ect1 marking") expanded the ce_threshold feature of FQ-CoDel so it can be applied to a subset of the traffic, using the ECT(1) bit of the ECN field as the classifier. However, hard-coding ECT(1) as the only classifier for this feature seems limiting, so let's expand it to be more general. To this end, change the parameter from a ce_threshold_ect1 boolean, to a one-byte selector/mask pair (ce_threshold_{selector,mask}) which is applied to the whole diffserv/ECN field in the IP header. This makes it possible to classify packets by any value in either the ECN field or the diffserv field. In particular, setting a selector of INET_ECN_ECT_1 and a mask of INET_ECN_MASK corresponds to the functionality before this patch, and a mask of ~INET_ECN_MASK allows using the selector as a straight-forward match against a diffserv code point: # apply ce_threshold to ECT(1) traffic tc qdisc replace dev eth0 root fq_codel ce_threshold 1ms ce_threshold_selector 0x1/0x3 # apply ce_threshold to ECN-capable traffic marked as diffserv AF22 tc qdisc replace dev eth0 root fq_codel ce_threshold 1ms ce_threshold_selector 0x50/0xfc Regardless of the selector chosen, the normal rules for ECN-marking of packets still apply, i.e., the flow must still declare itself ECN-capable by setting one of the bits in the ECN field to get marked at all. v2: - Add tc usage examples to patch description Signed-off-by: Toke Høiland-Jørgensen Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20211019174709.69081-1-toke@redhat.com Signed-off-by: Jakub Kicinski --- include/net/codel.h | 7 +++++-- include/net/codel_impl.h | 14 +++++++------- include/uapi/linux/pkt_sched.h | 3 ++- net/mac80211/sta_info.c | 3 ++- net/sched/sch_fq_codel.c | 13 +++++++++---- 5 files changed, 25 insertions(+), 15 deletions(-) diff --git a/include/net/codel.h b/include/net/codel.h index 5e8b181b76b8..a6c9e34e62b8 100644 --- a/include/net/codel.h +++ b/include/net/codel.h @@ -102,7 +102,9 @@ static inline u32 codel_time_to_us(codel_time_t val) * @interval: width of moving time window * @mtu: device mtu, or minimal queue backlog in bytes. * @ecn: is Explicit Congestion Notification enabled - * @ce_threshold_ect1: if ce_threshold only marks ECT(1) packets + * @ce_threshold_selector: apply ce_threshold to packets matching this value + * in the diffserv/ECN byte of the IP header + * @ce_threshold_mask: mask to apply to ce_threshold_selector comparison */ struct codel_params { codel_time_t target; @@ -110,7 +112,8 @@ struct codel_params { codel_time_t interval; u32 mtu; bool ecn; - bool ce_threshold_ect1; + u8 ce_threshold_selector; + u8 ce_threshold_mask; }; /** diff --git a/include/net/codel_impl.h b/include/net/codel_impl.h index 7af2c3eb3c43..137d40d8cbeb 100644 --- a/include/net/codel_impl.h +++ b/include/net/codel_impl.h @@ -54,7 +54,8 @@ static void codel_params_init(struct codel_params *params) params->interval = MS2TIME(100); params->target = MS2TIME(5); params->ce_threshold = CODEL_DISABLED_THRESHOLD; - params->ce_threshold_ect1 = false; + params->ce_threshold_mask = 0; + params->ce_threshold_selector = 0; params->ecn = false; } @@ -250,13 +251,12 @@ end: if (skb && codel_time_after(vars->ldelay, params->ce_threshold)) { bool set_ce = true; - if (params->ce_threshold_ect1) { - /* Note: if skb_get_dsfield() returns -1, following - * gives INET_ECN_MASK, which is != INET_ECN_ECT_1. - */ - u8 ecn = skb_get_dsfield(skb) & INET_ECN_MASK; + if (params->ce_threshold_mask) { + int dsfield = skb_get_dsfield(skb); - set_ce = (ecn == INET_ECN_ECT_1); + set_ce = (dsfield >= 0 && + (((u8)dsfield & params->ce_threshold_mask) == + params->ce_threshold_selector)); } if (set_ce && INET_ECN_set_ce(skb)) stats->ce_mark++; diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index 6be9a84cccfa..f292b467b27f 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -840,7 +840,8 @@ enum { TCA_FQ_CODEL_CE_THRESHOLD, TCA_FQ_CODEL_DROP_BATCH_SIZE, TCA_FQ_CODEL_MEMORY_LIMIT, - TCA_FQ_CODEL_CE_THRESHOLD_ECT1, + TCA_FQ_CODEL_CE_THRESHOLD_SELECTOR, + TCA_FQ_CODEL_CE_THRESHOLD_MASK, __TCA_FQ_CODEL_MAX }; diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index a39830418434..bd52ac3bee90 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -513,7 +513,8 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, sta->cparams.target = MS2TIME(20); sta->cparams.interval = MS2TIME(100); sta->cparams.ecn = true; - sta->cparams.ce_threshold_ect1 = false; + sta->cparams.ce_threshold_selector = 0; + sta->cparams.ce_threshold_mask = 0; sta_dbg(sdata, "Allocated STA %pM\n", sta->sta.addr); diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index 033d65d06eb1..839e1235db05 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -362,7 +362,8 @@ static const struct nla_policy fq_codel_policy[TCA_FQ_CODEL_MAX + 1] = { [TCA_FQ_CODEL_CE_THRESHOLD] = { .type = NLA_U32 }, [TCA_FQ_CODEL_DROP_BATCH_SIZE] = { .type = NLA_U32 }, [TCA_FQ_CODEL_MEMORY_LIMIT] = { .type = NLA_U32 }, - [TCA_FQ_CODEL_CE_THRESHOLD_ECT1] = { .type = NLA_U8 }, + [TCA_FQ_CODEL_CE_THRESHOLD_SELECTOR] = { .type = NLA_U8 }, + [TCA_FQ_CODEL_CE_THRESHOLD_MASK] = { .type = NLA_U8 }, }; static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt, @@ -409,8 +410,10 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt, q->cparams.ce_threshold = (val * NSEC_PER_USEC) >> CODEL_SHIFT; } - if (tb[TCA_FQ_CODEL_CE_THRESHOLD_ECT1]) - q->cparams.ce_threshold_ect1 = !!nla_get_u8(tb[TCA_FQ_CODEL_CE_THRESHOLD_ECT1]); + if (tb[TCA_FQ_CODEL_CE_THRESHOLD_SELECTOR]) + q->cparams.ce_threshold_selector = nla_get_u8(tb[TCA_FQ_CODEL_CE_THRESHOLD_SELECTOR]); + if (tb[TCA_FQ_CODEL_CE_THRESHOLD_MASK]) + q->cparams.ce_threshold_mask = nla_get_u8(tb[TCA_FQ_CODEL_CE_THRESHOLD_MASK]); if (tb[TCA_FQ_CODEL_INTERVAL]) { u64 interval = nla_get_u32(tb[TCA_FQ_CODEL_INTERVAL]); @@ -552,7 +555,9 @@ static int fq_codel_dump(struct Qdisc *sch, struct sk_buff *skb) if (nla_put_u32(skb, TCA_FQ_CODEL_CE_THRESHOLD, codel_time_to_us(q->cparams.ce_threshold))) goto nla_put_failure; - if (nla_put_u8(skb, TCA_FQ_CODEL_CE_THRESHOLD_ECT1, q->cparams.ce_threshold_ect1)) + if (nla_put_u8(skb, TCA_FQ_CODEL_CE_THRESHOLD_SELECTOR, q->cparams.ce_threshold_selector)) + goto nla_put_failure; + if (nla_put_u8(skb, TCA_FQ_CODEL_CE_THRESHOLD_MASK, q->cparams.ce_threshold_mask)) goto nla_put_failure; } From 0754d65bd4be5bb7392aa59339a290c80346a93c Mon Sep 17 00:00:00 2001 From: Kiran Patil Date: Fri, 15 Oct 2021 16:35:15 -0700 Subject: [PATCH 390/440] ice: Add infrastructure for mqprio support via ndo_setup_tc Add infrastructure required for "ndo_setup_tc:qdisc_mqprio". ice_vsi_setup is modified to configure traffic classes based on mqprio data received from the stack. This includes low-level functions to configure min, max rate-limit parameters in hardware for traffic classes. Each traffic class gets mapped to a hardware channel (VSI) which can be individually configured with different bandwidth parameters. Co-developed-by: Tarun Singh Signed-off-by: Tarun Singh Signed-off-by: Kiran Patil Signed-off-by: Amritha Nambiar Signed-off-by: Sudheer Mogilappagari Tested-by: Bharathi Sreenivas Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice.h | 78 +++- drivers/net/ethernet/intel/ice/ice_base.c | 34 +- drivers/net/ethernet/intel/ice/ice_dcb_lib.c | 201 ++++++--- drivers/net/ethernet/intel/ice/ice_dcb_lib.h | 10 +- drivers/net/ethernet/intel/ice/ice_eswitch.c | 2 +- drivers/net/ethernet/intel/ice/ice_lib.c | 382 +++++++++++++++--- drivers/net/ethernet/intel/ice/ice_lib.h | 11 +- drivers/net/ethernet/intel/ice/ice_main.c | 6 +- drivers/net/ethernet/intel/ice/ice_sched.c | 68 +++- drivers/net/ethernet/intel/ice/ice_sched.h | 2 + drivers/net/ethernet/intel/ice/ice_txrx.h | 7 + drivers/net/ethernet/intel/ice/ice_type.h | 3 + .../net/ethernet/intel/ice/ice_virtchnl_pf.c | 4 +- 13 files changed, 671 insertions(+), 137 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 80f14886b5b1..c2814e31c438 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -104,6 +105,10 @@ #define ICE_INVAL_VFID 256 #define ICE_MAX_RXQS_PER_TC 256 /* Used when setting VSI context per TC Rx queues */ + +#define ICE_CHNL_START_TC 1 +#define ICE_CHNL_MAX_TC 16 + #define ICE_MAX_RESET_WAIT 20 #define ICE_VSIQF_HKEY_ARRAY_SIZE ((VSIQF_HKEY_MAX_INDEX + 1) * 4) @@ -145,6 +150,9 @@ #define ice_for_each_q_vector(vsi, i) \ for ((i) = 0; (i) < (vsi)->num_q_vectors; (i)++) +#define ice_for_each_chnl_tc(i) \ + for ((i) = ICE_CHNL_START_TC; (i) < ICE_CHNL_MAX_TC; (i)++) + #define ICE_UCAST_PROMISC_BITS (ICE_PROMISC_UCAST_TX | ICE_PROMISC_MCAST_TX | \ ICE_PROMISC_UCAST_RX | ICE_PROMISC_MCAST_RX) @@ -172,6 +180,21 @@ enum ice_feature { DECLARE_STATIC_KEY_FALSE(ice_xdp_locking_key); +struct ice_channel { + struct list_head list; + u8 type; + u16 sw_id; + u16 base_q; + u16 num_rxq; + u16 num_txq; + u16 vsi_num; + u8 ena_tc; + struct ice_aqc_vsi_props info; + u64 max_tx_rate; + u64 min_tx_rate; + struct ice_vsi *ch_vsi; +}; + struct ice_txq_meta { u32 q_teid; /* Tx-scheduler element identifier */ u16 q_id; /* Entry in VSI's txq_map bitmap */ @@ -189,7 +212,7 @@ struct ice_tc_info { struct ice_tc_cfg { u8 numtc; /* Total number of enabled TCs */ - u8 ena_tc; /* Tx map */ + u16 ena_tc; /* Tx map */ struct ice_tc_info tc_info[ICE_MAX_TRAFFIC_CLASS]; }; @@ -361,6 +384,34 @@ struct ice_vsi { struct net_device **target_netdevs; + struct tc_mqprio_qopt_offload mqprio_qopt; /* queue parameters */ + + /* Channel Specific Fields */ + struct ice_vsi *tc_map_vsi[ICE_CHNL_MAX_TC]; + u16 cnt_q_avail; + u16 next_base_q; /* next queue to be used for channel setup */ + struct list_head ch_list; + u16 num_chnl_rxq; + u16 num_chnl_txq; + u16 ch_rss_size; + /* store away rss size info before configuring ADQ channels so that, + * it can be used after tc-qdisc delete, to get back RSS setting as + * they were before + */ + u16 orig_rss_size; + /* this keeps tracks of all enabled TC with and without DCB + * and inclusive of ADQ, vsi->mqprio_opt keeps track of queue + * information + */ + u8 all_numtc; + u16 all_enatc; + + /* store away TC info, to be used for rebuild logic */ + u8 old_numtc; + u16 old_ena_tc; + + struct ice_channel *ch; + /* setup back reference, to which aggregator node this VSI * corresponds to */ @@ -407,6 +458,7 @@ enum ice_pf_flags { ICE_FLAG_PTP, /* PTP is enabled by software */ ICE_FLAG_AUX_ENA, ICE_FLAG_ADV_FEATURES, + ICE_FLAG_TC_MQPRIO, /* support for Multi queue TC */ ICE_FLAG_CLS_FLOWER, ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, ICE_FLAG_TOTAL_PORT_SHUTDOWN_ENA, @@ -704,6 +756,30 @@ static inline void ice_clear_sriov_cap(struct ice_pf *pf) ((base_idx) * ICE_FD_STAT_CTR_BLOCK_COUNT) #define ICE_FD_SB_STAT_IDX(base_idx) ICE_FD_STAT_PF_IDX(base_idx) +/** + * ice_is_adq_active - any active ADQs + * @pf: pointer to PF + * + * This function returns true if there are any ADQs configured (which is + * determined by looking at VSI type (which should be VSI_PF), numtc, and + * TC_MQPRIO flag) otherwise return false + */ +static inline bool ice_is_adq_active(struct ice_pf *pf) +{ + struct ice_vsi *vsi; + + vsi = ice_get_main_vsi(pf); + if (!vsi) + return false; + + /* is ADQ configured */ + if (vsi->tc_cfg.numtc > ICE_CHNL_START_TC && + test_bit(ICE_FLAG_TC_MQPRIO, pf->flags)) + return true; + + return false; +} + bool netif_is_ice(struct net_device *dev); int ice_vsi_setup_tx_rings(struct ice_vsi *vsi); int ice_vsi_setup_rx_rings(struct ice_vsi *vsi); diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c index be625977addf..fa6cd63cbf1f 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.c +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -213,6 +213,9 @@ static u16 ice_calc_txq_handle(struct ice_vsi *vsi, struct ice_tx_ring *ring, u8 { WARN_ONCE(ice_ring_is_xdp(ring) && tc, "XDP ring can't belong to TC other than 0\n"); + if (ring->ch) + return ring->q_index - ring->ch->base_q; + /* Idea here for calculation is that we subtract the number of queue * count from TC that ring belongs to from it's absolute queue index * and as a result we get the queue's index within TC. @@ -300,7 +303,10 @@ ice_setup_tx_ctx(struct ice_tx_ring *ring, struct ice_tlan_ctx *tlan_ctx, u16 pf case ICE_VSI_LB: case ICE_VSI_CTRL: case ICE_VSI_PF: - tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_PF; + if (ring->ch) + tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_VMQ; + else + tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_PF; break; case ICE_VSI_VF: /* Firmware expects vmvf_num to be absolute VF ID */ @@ -315,7 +321,10 @@ ice_setup_tx_ctx(struct ice_tx_ring *ring, struct ice_tlan_ctx *tlan_ctx, u16 pf } /* make sure the context is associated with the right VSI */ - tlan_ctx->src_vsi = ice_get_hw_vsi_num(hw, vsi->idx); + if (ring->ch) + tlan_ctx->src_vsi = ring->ch->vsi_num; + else + tlan_ctx->src_vsi = ice_get_hw_vsi_num(hw, vsi->idx); /* Restrict Tx timestamps to the PF VSI */ switch (vsi->type) { @@ -747,6 +756,7 @@ ice_vsi_cfg_txq(struct ice_vsi *vsi, struct ice_tx_ring *ring, u8 buf_len = struct_size(qg_buf, txqs, 1); struct ice_tlan_ctx tlan_ctx = { 0 }; struct ice_aqc_add_txqs_perq *txq; + struct ice_channel *ch = ring->ch; struct ice_pf *pf = vsi->back; struct ice_hw *hw = &pf->hw; enum ice_status status; @@ -785,8 +795,14 @@ ice_vsi_cfg_txq(struct ice_vsi *vsi, struct ice_tx_ring *ring, ring->q_handle = ice_calc_txq_handle(vsi, ring, tc); } - status = ice_ena_vsi_txq(vsi->port_info, vsi->idx, tc, ring->q_handle, - 1, qg_buf, buf_len, NULL); + if (ch) + status = ice_ena_vsi_txq(vsi->port_info, ch->ch_vsi->idx, 0, + ring->q_handle, 1, qg_buf, buf_len, + NULL); + else + status = ice_ena_vsi_txq(vsi->port_info, vsi->idx, tc, + ring->q_handle, 1, qg_buf, buf_len, + NULL); if (status) { dev_err(ice_pf_to_dev(pf), "Failed to set LAN Tx queue context, error: %s\n", ice_stat_str(status)); @@ -967,6 +983,7 @@ void ice_fill_txq_meta(struct ice_vsi *vsi, struct ice_tx_ring *ring, struct ice_txq_meta *txq_meta) { + struct ice_channel *ch = ring->ch; u8 tc; if (IS_ENABLED(CONFIG_DCB)) @@ -977,6 +994,11 @@ ice_fill_txq_meta(struct ice_vsi *vsi, struct ice_tx_ring *ring, txq_meta->q_id = ring->reg_idx; txq_meta->q_teid = ring->txq_teid; txq_meta->q_handle = ring->q_handle; - txq_meta->vsi_idx = vsi->idx; - txq_meta->tc = tc; + if (ch) { + txq_meta->vsi_idx = ch->ch_vsi->idx; + txq_meta->tc = 0; + } else { + txq_meta->vsi_idx = vsi->idx; + txq_meta->tc = tc; + } } diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c index 4284526e9e24..a72e18320a22 100644 --- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c @@ -4,53 +4,11 @@ #include "ice_dcb_lib.h" #include "ice_dcb_nl.h" -/** - * ice_vsi_cfg_netdev_tc - Setup the netdev TC configuration - * @vsi: the VSI being configured - * @ena_tc: TC map to be enabled - */ -void ice_vsi_cfg_netdev_tc(struct ice_vsi *vsi, u8 ena_tc) -{ - struct net_device *netdev = vsi->netdev; - struct ice_pf *pf = vsi->back; - struct ice_dcbx_cfg *dcbcfg; - u8 netdev_tc; - int i; - - if (!netdev) - return; - - if (!ena_tc) { - netdev_reset_tc(netdev); - return; - } - - if (netdev_set_num_tc(netdev, vsi->tc_cfg.numtc)) - return; - - dcbcfg = &pf->hw.port_info->qos_cfg.local_dcbx_cfg; - - ice_for_each_traffic_class(i) - if (vsi->tc_cfg.ena_tc & BIT(i)) - netdev_set_tc_queue(netdev, - vsi->tc_cfg.tc_info[i].netdev_tc, - vsi->tc_cfg.tc_info[i].qcount_tx, - vsi->tc_cfg.tc_info[i].qoffset); - - for (i = 0; i < ICE_MAX_USER_PRIORITY; i++) { - u8 ets_tc = dcbcfg->etscfg.prio_table[i]; - - /* Get the mapped netdev TC# for the UP */ - netdev_tc = vsi->tc_cfg.tc_info[ets_tc].netdev_tc; - netdev_set_prio_tc_map(netdev, i, netdev_tc); - } -} - /** * ice_dcb_get_ena_tc - return bitmap of enabled TCs * @dcbcfg: DCB config to evaluate for enabled TCs */ -u8 ice_dcb_get_ena_tc(struct ice_dcbx_cfg *dcbcfg) +static u8 ice_dcb_get_ena_tc(struct ice_dcbx_cfg *dcbcfg) { u8 i, num_tc, ena_tc = 1; @@ -178,6 +136,67 @@ u8 ice_dcb_get_num_tc(struct ice_dcbx_cfg *dcbcfg) return ret; } +/** + * ice_get_first_droptc - returns number of first droptc + * @vsi: used to find the first droptc + * + * This function returns the value of first_droptc. + * When DCB is enabled, first droptc information is derived from enabled_tc + * and PFC enabled bits. otherwise this function returns 0 as there is one + * TC without DCB (tc0) + */ +static u8 ice_get_first_droptc(struct ice_vsi *vsi) +{ + struct ice_dcbx_cfg *cfg = &vsi->port_info->qos_cfg.local_dcbx_cfg; + struct device *dev = ice_pf_to_dev(vsi->back); + u8 num_tc, ena_tc_map, pfc_ena_map; + u8 i; + + num_tc = ice_dcb_get_num_tc(cfg); + + /* get bitmap of enabled TCs */ + ena_tc_map = ice_dcb_get_ena_tc(cfg); + + /* get bitmap of PFC enabled TCs */ + pfc_ena_map = cfg->pfc.pfcena; + + /* get first TC that is not PFC enabled */ + for (i = 0; i < num_tc; i++) { + if ((ena_tc_map & BIT(i)) && (!(pfc_ena_map & BIT(i)))) { + dev_dbg(dev, "first drop tc = %d\n", i); + return i; + } + } + + dev_dbg(dev, "first drop tc = 0\n"); + return 0; +} + +/** + * ice_vsi_set_dcb_tc_cfg - Set VSI's TC based on DCB configuration + * @vsi: pointer to the VSI instance + */ +void ice_vsi_set_dcb_tc_cfg(struct ice_vsi *vsi) +{ + struct ice_dcbx_cfg *cfg = &vsi->port_info->qos_cfg.local_dcbx_cfg; + + switch (vsi->type) { + case ICE_VSI_PF: + vsi->tc_cfg.ena_tc = ice_dcb_get_ena_tc(cfg); + vsi->tc_cfg.numtc = ice_dcb_get_num_tc(cfg); + break; + case ICE_VSI_CHNL: + vsi->tc_cfg.ena_tc = BIT(ice_get_first_droptc(vsi)); + vsi->tc_cfg.numtc = 1; + break; + case ICE_VSI_CTRL: + case ICE_VSI_LB: + default: + vsi->tc_cfg.ena_tc = ICE_DFLT_TRAFFIC_CLASS; + vsi->tc_cfg.numtc = 1; + } +} + /** * ice_dcb_get_tc - Get the TC associated with the queue * @vsi: ptr to the VSI @@ -218,11 +237,68 @@ void ice_vsi_cfg_dcb_rings(struct ice_vsi *vsi) qoffset = vsi->tc_cfg.tc_info[n].qoffset; qcount = vsi->tc_cfg.tc_info[n].qcount_tx; - for (i = qoffset; i < (qoffset + qcount); i++) { - tx_ring = vsi->tx_rings[i]; - rx_ring = vsi->rx_rings[i]; - tx_ring->dcb_tc = n; - rx_ring->dcb_tc = n; + for (i = qoffset; i < (qoffset + qcount); i++) + vsi->tx_rings[i]->dcb_tc = n; + + qcount = vsi->tc_cfg.tc_info[n].qcount_rx; + for (i = qoffset; i < (qoffset + qcount); i++) + vsi->rx_rings[i]->dcb_tc = n; + } + /* applicable only if "all_enatc" is set, which will be set from + * setup_tc method as part of configuring channels + */ + if (vsi->all_enatc) { + u8 first_droptc = ice_get_first_droptc(vsi); + + /* When DCB is configured, TC for ADQ queues (which are really + * PF queues) should be the first drop TC of the main VSI + */ + ice_for_each_chnl_tc(n) { + if (!(vsi->all_enatc & BIT(n))) + break; + + qoffset = vsi->mqprio_qopt.qopt.offset[n]; + qcount = vsi->mqprio_qopt.qopt.count[n]; + for (i = qoffset; i < (qoffset + qcount); i++) { + vsi->tx_rings[i]->dcb_tc = first_droptc; + vsi->rx_rings[i]->dcb_tc = first_droptc; + } + } + } +} + +/** + * ice_dcb_ena_dis_vsi - disable certain VSIs for DCB config/reconfig + * @pf: pointer to the PF instance + * @ena: true to enable VSIs, false to disable + * @locked: true if caller holds RTNL lock, false otherwise + * + * Before a new DCB configuration can be applied, VSIs of type PF, SWITCHDEV + * and CHNL need to be brought down. Following completion of DCB configuration + * the VSIs that were downed need to be brought up again. This helper function + * does both. + */ +static void ice_dcb_ena_dis_vsi(struct ice_pf *pf, bool ena, bool locked) +{ + int i; + + ice_for_each_vsi(pf, i) { + struct ice_vsi *vsi = pf->vsi[i]; + + if (!vsi) + continue; + + switch (vsi->type) { + case ICE_VSI_CHNL: + case ICE_VSI_SWITCHDEV_CTRL: + case ICE_VSI_PF: + if (ena) + ice_ena_vsi(vsi, locked); + else + ice_dis_vsi(vsi, locked); + break; + default: + continue; } } } @@ -331,7 +407,9 @@ int ice_pf_dcb_cfg(struct ice_pf *pf, struct ice_dcbx_cfg *new_cfg, bool locked) */ if (!locked) rtnl_lock(); - ice_dis_vsi(pf_vsi, true); + + /* disable VSIs affected by DCB changes */ + ice_dcb_ena_dis_vsi(pf, false, true); memcpy(curr_cfg, new_cfg, sizeof(*curr_cfg)); memcpy(&curr_cfg->etsrec, &curr_cfg->etscfg, sizeof(curr_cfg->etsrec)); @@ -359,7 +437,8 @@ int ice_pf_dcb_cfg(struct ice_pf *pf, struct ice_dcbx_cfg *new_cfg, bool locked) ice_pf_dcb_recfg(pf); out: - ice_ena_vsi(pf_vsi, true); + /* enable previously downed VSIs */ + ice_dcb_ena_dis_vsi(pf, true, true); if (!locked) rtnl_unlock(); free_cfg: @@ -674,6 +753,8 @@ void ice_pf_dcb_recfg(struct ice_pf *pf) tc_map = ICE_DFLT_TRAFFIC_CLASS; ice_dcb_noncontig_cfg(pf); } + } else if (vsi->type == ICE_VSI_CHNL) { + tc_map = BIT(ice_get_first_droptc(vsi)); } else { tc_map = ICE_DFLT_TRAFFIC_CLASS; } @@ -684,10 +765,11 @@ void ice_pf_dcb_recfg(struct ice_pf *pf) vsi->idx); continue; } - /* no need to proceed with remaining cfg if it is switchdev - * VSI + /* no need to proceed with remaining cfg if it is CHNL + * or switchdev VSI */ - if (vsi->type == ICE_VSI_SWITCHDEV_CTRL) + if (vsi->type == ICE_VSI_CHNL || + vsi->type == ICE_VSI_SWITCHDEV_CTRL) continue; ice_vsi_map_rings_to_vectors(vsi); @@ -862,7 +944,6 @@ ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf, struct ice_dcbx_cfg tmp_dcbx_cfg; bool need_reconfig = false; struct ice_port_info *pi; - struct ice_vsi *pf_vsi; u8 mib_type; int ret; @@ -938,14 +1019,9 @@ ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf, clear_bit(ICE_FLAG_DCB_ENA, pf->flags); } - pf_vsi = ice_get_main_vsi(pf); - if (!pf_vsi) { - dev_dbg(dev, "PF VSI doesn't exist\n"); - goto out; - } - rtnl_lock(); - ice_dis_vsi(pf_vsi, true); + /* disable VSIs affected by DCB changes */ + ice_dcb_ena_dis_vsi(pf, false, true); ret = ice_query_port_ets(pf->hw.port_info, &buf, sizeof(buf), NULL); if (ret) { @@ -956,7 +1032,8 @@ ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf, /* changes in configuration update VSI */ ice_pf_dcb_recfg(pf); - ice_ena_vsi(pf_vsi, true); + /* enable previously downed VSIs */ + ice_dcb_ena_dis_vsi(pf, true, true); unlock_rtnl: rtnl_unlock(); out: diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.h b/drivers/net/ethernet/intel/ice/ice_dcb_lib.h index 6700e97b3b51..4c421c842a13 100644 --- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.h @@ -16,7 +16,6 @@ void ice_dcb_rebuild(struct ice_pf *pf); int ice_dcb_sw_dflt_cfg(struct ice_pf *pf, bool ets_willing, bool locked); -u8 ice_dcb_get_ena_tc(struct ice_dcbx_cfg *dcbcfg); u8 ice_dcb_get_num_tc(struct ice_dcbx_cfg *dcbcfg); void ice_vsi_set_dcb_tc_cfg(struct ice_vsi *vsi); bool ice_is_pfc_causing_hung_q(struct ice_pf *pf, unsigned int txqueue); @@ -34,8 +33,6 @@ ice_tx_prepare_vlan_flags_dcb(struct ice_tx_ring *tx_ring, void ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf, struct ice_rq_event_info *event); -void ice_vsi_cfg_netdev_tc(struct ice_vsi *vsi, u8 ena_tc); - /** * ice_find_q_in_range * @low: start of queue range for a TC i.e. offset of TC @@ -69,6 +66,12 @@ static inline u8 ice_get_pfc_mode(struct ice_pf *pf) #else static inline void ice_dcb_rebuild(struct ice_pf *pf) { } +static inline void ice_vsi_set_dcb_tc_cfg(struct ice_vsi *vsi) +{ + vsi->tc_cfg.ena_tc = ICE_DFLT_TRAFFIC_CLASS; + vsi->tc_cfg.numtc = 1; +} + static inline u8 ice_dcb_get_ena_tc(struct ice_dcbx_cfg __always_unused *dcbcfg) { return ICE_DFLT_TRAFFIC_CLASS; @@ -130,7 +133,6 @@ static inline void ice_vsi_cfg_dcb_rings(struct ice_vsi *vsi) { } static inline void ice_update_dcb_stats(struct ice_pf *pf) { } static inline void ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf, struct ice_rq_event_info *event) { } -static inline void ice_vsi_cfg_netdev_tc(struct ice_vsi *vsi, u8 ena_tc) { } static inline void ice_set_cgd_num(struct ice_tlan_ctx *tlan_ctx, u8 dcb_tc) { } #endif /* CONFIG_DCB */ #endif /* _ICE_DCB_LIB_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch.c b/drivers/net/ethernet/intel/ice/ice_eswitch.c index a20c38446e76..6cb50653b18d 100644 --- a/drivers/net/ethernet/intel/ice/ice_eswitch.c +++ b/drivers/net/ethernet/intel/ice/ice_eswitch.c @@ -329,7 +329,7 @@ static void ice_eswitch_release_env(struct ice_pf *pf) static struct ice_vsi * ice_eswitch_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi) { - return ice_vsi_setup(pf, pi, ICE_VSI_SWITCHDEV_CTRL, ICE_INVAL_VFID); + return ice_vsi_setup(pf, pi, ICE_VSI_SWITCHDEV_CTRL, ICE_INVAL_VFID, NULL); } /** diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 231f8bea2519..62adbb39670b 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -22,6 +22,8 @@ const char *ice_vsi_type_str(enum ice_vsi_type vsi_type) return "ICE_VSI_VF"; case ICE_VSI_CTRL: return "ICE_VSI_CTRL"; + case ICE_VSI_CHNL: + return "ICE_VSI_CHNL"; case ICE_VSI_LB: return "ICE_VSI_LB"; case ICE_VSI_SWITCHDEV_CTRL: @@ -73,6 +75,8 @@ static int ice_vsi_alloc_arrays(struct ice_vsi *vsi) struct device *dev; dev = ice_pf_to_dev(pf); + if (vsi->type == ICE_VSI_CHNL) + return 0; /* allocate memory for both Tx and Rx ring pointers */ vsi->tx_rings = devm_kcalloc(dev, vsi->alloc_txq, @@ -229,6 +233,10 @@ static void ice_vsi_set_num_qs(struct ice_vsi *vsi, u16 vf_id) vsi->alloc_rxq = 1; vsi->num_q_vectors = 1; break; + case ICE_VSI_CHNL: + vsi->alloc_txq = 0; + vsi->alloc_rxq = 0; + break; case ICE_VSI_LB: vsi->alloc_txq = 1; vsi->alloc_rxq = 1; @@ -274,7 +282,7 @@ static int ice_get_free_slot(void *array, int size, int curr) * ice_vsi_delete - delete a VSI from the switch * @vsi: pointer to VSI being removed */ -static void ice_vsi_delete(struct ice_vsi *vsi) +void ice_vsi_delete(struct ice_vsi *vsi) { struct ice_pf *pf = vsi->back; struct ice_vsi_ctx *ctxt; @@ -345,7 +353,7 @@ static void ice_vsi_free_arrays(struct ice_vsi *vsi) * * Returns 0 on success, negative on failure */ -static int ice_vsi_clear(struct ice_vsi *vsi) +int ice_vsi_clear(struct ice_vsi *vsi) { struct ice_pf *pf = NULL; struct device *dev; @@ -438,12 +446,14 @@ static irqreturn_t ice_eswitch_msix_clean_rings(int __always_unused irq, void *d * ice_vsi_alloc - Allocates the next available struct VSI in the PF * @pf: board private structure * @vsi_type: type of VSI + * @ch: ptr to channel * @vf_id: ID of the VF being configured * * returns a pointer to a VSI on success, NULL on failure. */ static struct ice_vsi * -ice_vsi_alloc(struct ice_pf *pf, enum ice_vsi_type vsi_type, u16 vf_id) +ice_vsi_alloc(struct ice_pf *pf, enum ice_vsi_type vsi_type, + struct ice_channel *ch, u16 vf_id) { struct device *dev = ice_pf_to_dev(pf); struct ice_vsi *vsi = NULL; @@ -470,7 +480,7 @@ ice_vsi_alloc(struct ice_pf *pf, enum ice_vsi_type vsi_type, u16 vf_id) if (vsi_type == ICE_VSI_VF) ice_vsi_set_num_qs(vsi, vf_id); - else + else if (vsi_type != ICE_VSI_CHNL) ice_vsi_set_num_qs(vsi, ICE_INVAL_VFID); switch (vsi->type) { @@ -499,6 +509,13 @@ ice_vsi_alloc(struct ice_pf *pf, enum ice_vsi_type vsi_type, u16 vf_id) if (ice_vsi_alloc_arrays(vsi)) goto err_rings; break; + case ICE_VSI_CHNL: + if (!ch) + goto err_rings; + vsi->num_rxq = ch->num_rxq; + vsi->num_txq = ch->num_txq; + vsi->next_base_q = ch->base_q; + break; case ICE_VSI_LB: if (ice_vsi_alloc_arrays(vsi)) goto err_rings; @@ -615,6 +632,9 @@ static int ice_vsi_get_qs(struct ice_vsi *vsi) }; int ret; + if (vsi->type == ICE_VSI_CHNL) + return 0; + ret = __ice_vsi_get_qs(&tx_qs_cfg); if (ret) return ret; @@ -733,11 +753,16 @@ static void ice_vsi_set_rss_params(struct ice_vsi *vsi) cap = &pf->hw.func_caps.common_cap; switch (vsi->type) { + case ICE_VSI_CHNL: case ICE_VSI_PF: /* PF VSI will inherit RSS instance of PF */ vsi->rss_table_size = (u16)cap->rss_table_size; - vsi->rss_size = min_t(u16, num_online_cpus(), - BIT(cap->rss_table_entry_width)); + if (vsi->type == ICE_VSI_CHNL) + vsi->rss_size = min_t(u16, vsi->num_rxq, + BIT(cap->rss_table_entry_width)); + else + vsi->rss_size = min_t(u16, num_online_cpus(), + BIT(cap->rss_table_entry_width)); vsi->rss_lut_type = ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_PF; break; case ICE_VSI_SWITCHDEV_CTRL: @@ -814,21 +839,13 @@ static void ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt) u16 num_txq_per_tc, num_rxq_per_tc; u16 qcount_tx = vsi->alloc_txq; u16 qcount_rx = vsi->alloc_rxq; - bool ena_tc0 = false; u8 netdev_tc = 0; int i; - /* at least TC0 should be enabled by default */ - if (vsi->tc_cfg.numtc) { - if (!(vsi->tc_cfg.ena_tc & BIT(0))) - ena_tc0 = true; - } else { - ena_tc0 = true; - } - - if (ena_tc0) { - vsi->tc_cfg.numtc++; - vsi->tc_cfg.ena_tc |= 1; + if (!vsi->tc_cfg.numtc) { + /* at least TC0 should be enabled by default */ + vsi->tc_cfg.numtc = 1; + vsi->tc_cfg.ena_tc = 1; } num_rxq_per_tc = min_t(u16, qcount_rx / vsi->tc_cfg.numtc, ICE_MAX_RXQS_PER_TC); @@ -970,6 +987,7 @@ static void ice_set_rss_vsi_ctx(struct ice_vsi_ctx *ctxt, struct ice_vsi *vsi) dev = ice_pf_to_dev(pf); switch (vsi->type) { + case ICE_VSI_CHNL: case ICE_VSI_PF: /* PF VSI will inherit RSS instance of PF */ lut_type = ICE_AQ_VSI_Q_OPT_RSS_LUT_PF; @@ -992,6 +1010,28 @@ static void ice_set_rss_vsi_ctx(struct ice_vsi_ctx *ctxt, struct ice_vsi *vsi) ICE_AQ_VSI_Q_OPT_RSS_HASH_M); } +static void +ice_chnl_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt) +{ + struct ice_pf *pf = vsi->back; + u16 qcount, qmap; + u8 offset = 0; + int pow; + + qcount = min_t(int, vsi->num_rxq, pf->num_lan_msix); + + pow = order_base_2(qcount); + qmap = ((offset << ICE_AQ_VSI_TC_Q_OFFSET_S) & + ICE_AQ_VSI_TC_Q_OFFSET_M) | + ((pow << ICE_AQ_VSI_TC_Q_NUM_S) & + ICE_AQ_VSI_TC_Q_NUM_M); + + ctxt->info.tc_mapping[0] = cpu_to_le16(qmap); + ctxt->info.mapping_flags |= cpu_to_le16(ICE_AQ_VSI_Q_MAP_CONTIG); + ctxt->info.q_mapping[0] = cpu_to_le16(vsi->next_base_q); + ctxt->info.q_mapping[1] = cpu_to_le16(qcount); +} + /** * ice_vsi_init - Create and initialize a VSI * @vsi: the VSI being configured @@ -1020,6 +1060,7 @@ static int ice_vsi_init(struct ice_vsi *vsi, bool init_vsi) ctxt->flags = ICE_AQ_VSI_TYPE_PF; break; case ICE_VSI_SWITCHDEV_CTRL: + case ICE_VSI_CHNL: ctxt->flags = ICE_AQ_VSI_TYPE_VMDQ2; break; case ICE_VSI_VF: @@ -1032,6 +1073,21 @@ static int ice_vsi_init(struct ice_vsi *vsi, bool init_vsi) goto out; } + /* Handle VLAN pruning for channel VSI if main VSI has VLAN + * prune enabled + */ + if (vsi->type == ICE_VSI_CHNL) { + struct ice_vsi *main_vsi; + + main_vsi = ice_get_main_vsi(pf); + if (main_vsi && ice_vsi_is_vlan_pruning_ena(main_vsi)) + ctxt->info.sw_flags2 |= + ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA; + else + ctxt->info.sw_flags2 &= + ~ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA; + } + ice_set_dflt_vsi_ctx(ctxt); if (test_bit(ICE_FLAG_FD_ENA, pf->flags)) ice_set_fd_vsi_ctx(ctxt, vsi); @@ -1052,13 +1108,17 @@ static int ice_vsi_init(struct ice_vsi *vsi, bool init_vsi) } ctxt->info.sw_id = vsi->port_info->sw_id; - ice_vsi_setup_q_map(vsi, ctxt); - if (!init_vsi) /* means VSI being updated */ - /* must to indicate which section of VSI context are - * being modified - */ - ctxt->info.valid_sections |= - cpu_to_le16(ICE_AQ_VSI_PROP_RXQ_MAP_VALID); + if (vsi->type == ICE_VSI_CHNL) { + ice_chnl_vsi_setup_q_map(vsi, ctxt); + } else { + ice_vsi_setup_q_map(vsi, ctxt); + if (!init_vsi) /* means VSI being updated */ + /* must to indicate which section of VSI context are + * being modified + */ + ctxt->info.valid_sections |= + cpu_to_le16(ICE_AQ_VSI_PROP_RXQ_MAP_VALID); + } /* enable/disable MAC and VLAN anti-spoof when spoofchk is on/off * respectively @@ -1237,6 +1297,8 @@ static int ice_vsi_setup_vector_base(struct ice_vsi *vsi) /* SRIOV doesn't grab irq_tracker entries for each VSI */ if (vsi->type == ICE_VSI_VF) return 0; + if (vsi->type == ICE_VSI_CHNL) + return 0; if (vsi->base_vector) { dev_dbg(dev, "VSI %d has non-zero base vector %d\n", @@ -1403,7 +1465,7 @@ void ice_vsi_manage_rss_lut(struct ice_vsi *vsi, bool ena) * ice_vsi_cfg_rss_lut_key - Configure RSS params for a VSI * @vsi: VSI to be configured */ -static int ice_vsi_cfg_rss_lut_key(struct ice_vsi *vsi) +int ice_vsi_cfg_rss_lut_key(struct ice_vsi *vsi) { struct ice_pf *pf = vsi->back; struct device *dev; @@ -1411,7 +1473,25 @@ static int ice_vsi_cfg_rss_lut_key(struct ice_vsi *vsi) int err; dev = ice_pf_to_dev(pf); - vsi->rss_size = min_t(u16, vsi->rss_size, vsi->num_rxq); + if (vsi->type == ICE_VSI_PF && vsi->ch_rss_size && + (test_bit(ICE_FLAG_TC_MQPRIO, pf->flags))) { + vsi->rss_size = min_t(u16, vsi->rss_size, vsi->ch_rss_size); + } else { + vsi->rss_size = min_t(u16, vsi->rss_size, vsi->num_rxq); + + /* If orig_rss_size is valid and it is less than determined + * main VSI's rss_size, update main VSI's rss_size to be + * orig_rss_size so that when tc-qdisc is deleted, main VSI + * RSS table gets programmed to be correct (whatever it was + * to begin with (prior to setup-tc for ADQ config) + */ + if (vsi->orig_rss_size && vsi->rss_size < vsi->orig_rss_size && + vsi->orig_rss_size <= vsi->num_rxq) { + vsi->rss_size = vsi->orig_rss_size; + /* now orig_rss_size is used, reset it to zero */ + vsi->orig_rss_size = 0; + } + } lut = kzalloc(vsi->rss_table_size, GFP_KERNEL); if (!lut) @@ -2260,10 +2340,14 @@ err_out: static void ice_vsi_set_tc_cfg(struct ice_vsi *vsi) { - struct ice_dcbx_cfg *cfg = &vsi->port_info->qos_cfg.local_dcbx_cfg; + if (!test_bit(ICE_FLAG_DCB_ENA, vsi->back->flags)) { + vsi->tc_cfg.ena_tc = ICE_DFLT_TRAFFIC_CLASS; + vsi->tc_cfg.numtc = 1; + return; + } - vsi->tc_cfg.ena_tc = ice_dcb_get_ena_tc(cfg); - vsi->tc_cfg.numtc = ice_dcb_get_num_tc(cfg); + /* set VSI TC information based on DCB config */ + ice_vsi_set_dcb_tc_cfg(vsi); } /** @@ -2376,6 +2460,7 @@ static void ice_set_agg_vsi(struct ice_vsi *vsi) switch (vsi->type) { case ICE_VSI_CTRL: + case ICE_VSI_CHNL: case ICE_VSI_LB: case ICE_VSI_PF: case ICE_VSI_SWITCHDEV_CTRL: @@ -2475,6 +2560,7 @@ static void ice_set_agg_vsi(struct ice_vsi *vsi) * @vf_id: defines VF ID to which this VSI connects. This field is meant to be * used only for ICE_VSI_VF VSI type. For other VSI types, should * fill-in ICE_INVAL_VFID as input. + * @ch: ptr to channel * * This allocates the sw VSI structure and its queue resources. * @@ -2483,7 +2569,7 @@ static void ice_set_agg_vsi(struct ice_vsi *vsi) */ struct ice_vsi * ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, - enum ice_vsi_type vsi_type, u16 vf_id) + enum ice_vsi_type vsi_type, u16 vf_id, struct ice_channel *ch) { u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 }; struct device *dev = ice_pf_to_dev(pf); @@ -2491,10 +2577,12 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, struct ice_vsi *vsi; int ret, i; - if (vsi_type == ICE_VSI_VF || vsi_type == ICE_VSI_CTRL) - vsi = ice_vsi_alloc(pf, vsi_type, vf_id); + if (vsi_type == ICE_VSI_CHNL) + vsi = ice_vsi_alloc(pf, vsi_type, ch, ICE_INVAL_VFID); + else if (vsi_type == ICE_VSI_VF || vsi_type == ICE_VSI_CTRL) + vsi = ice_vsi_alloc(pf, vsi_type, NULL, vf_id); else - vsi = ice_vsi_alloc(pf, vsi_type, ICE_INVAL_VFID); + vsi = ice_vsi_alloc(pf, vsi_type, NULL, ICE_INVAL_VFID); if (!vsi) { dev_err(dev, "could not allocate VSI\n"); @@ -2511,10 +2599,12 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, ice_alloc_fd_res(vsi); - if (ice_vsi_get_qs(vsi)) { - dev_err(dev, "Failed to allocate queues. vsi->idx = %d\n", - vsi->idx); - goto unroll_vsi_alloc; + if (vsi_type != ICE_VSI_CHNL) { + if (ice_vsi_get_qs(vsi)) { + dev_err(dev, "Failed to allocate queues. vsi->idx = %d\n", + vsi->idx); + goto unroll_vsi_alloc; + } } /* set RSS capabilities */ @@ -2573,6 +2663,12 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, } ice_init_arfs(vsi); break; + case ICE_VSI_CHNL: + if (test_bit(ICE_FLAG_RSS_ENA, pf->flags)) { + ice_vsi_cfg_rss_lut_key(vsi); + ice_vsi_set_rss_flow_fld(vsi); + } + break; case ICE_VSI_VF: /* VF driver will take care of creating netdev for this type and * map queues to vectors through Virtchnl, PF driver only @@ -2611,9 +2707,21 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, } /* configure VSI nodes based on number of queues and TC's */ - for (i = 0; i < vsi->tc_cfg.numtc; i++) - max_txqs[i] = vsi->alloc_txq; + ice_for_each_traffic_class(i) { + if (!(vsi->tc_cfg.ena_tc & BIT(i))) + continue; + if (vsi->type == ICE_VSI_CHNL) { + if (!vsi->alloc_txq && vsi->num_txq) + max_txqs[i] = vsi->num_txq; + else + max_txqs[i] = pf->num_lan_tx; + } else { + max_txqs[i] = vsi->alloc_txq; + } + } + + dev_dbg(dev, "vsi->tc_cfg.ena_tc = %d\n", vsi->tc_cfg.ena_tc); status = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc, max_txqs); if (status) { @@ -3269,6 +3377,12 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi) if (ret) goto err_vectors; + break; + case ICE_VSI_CHNL: + if (test_bit(ICE_FLAG_RSS_ENA, pf->flags)) { + ice_vsi_cfg_rss_lut_key(vsi); + ice_vsi_set_rss_flow_fld(vsi); + } break; default: break; @@ -3276,14 +3390,30 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi) /* configure VSI nodes based on number of queues and TC's */ for (i = 0; i < vsi->tc_cfg.numtc; i++) { - max_txqs[i] = vsi->alloc_txq; + /* configure VSI nodes based on number of queues and TC's. + * ADQ creates VSIs for each TC/Channel but doesn't + * allocate queues instead it reconfigures the PF queues + * as per the TC command. So max_txqs should point to the + * PF Tx queues. + */ + if (vtype == ICE_VSI_CHNL) + max_txqs[i] = pf->num_lan_tx; + else + max_txqs[i] = vsi->alloc_txq; if (ice_is_xdp_ena_vsi(vsi)) max_txqs[i] += vsi->num_xdp_txq; } - status = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc, - max_txqs); + if (test_bit(ICE_FLAG_TC_MQPRIO, pf->flags)) + /* If MQPRIO is set, means channel code path, hence for main + * VSI's, use TC as 1 + */ + status = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, 1, max_txqs); + else + status = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, + vsi->tc_cfg.ena_tc, max_txqs); + if (status) { dev_err(ice_pf_to_dev(pf), "VSI %d failed lan queue config, error %s\n", vsi->vsi_num, ice_stat_str(status)); @@ -3355,7 +3485,6 @@ int ice_wait_for_reset(struct ice_pf *pf, unsigned long timeout) return 0; } -#ifdef CONFIG_DCB /** * ice_vsi_update_q_map - update our copy of the VSI info with new queue map * @vsi: VSI being configured @@ -3370,6 +3499,146 @@ static void ice_vsi_update_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctx) sizeof(vsi->info.tc_mapping)); } +/** + * ice_vsi_cfg_netdev_tc - Setup the netdev TC configuration + * @vsi: the VSI being configured + * @ena_tc: TC map to be enabled + */ +void ice_vsi_cfg_netdev_tc(struct ice_vsi *vsi, u8 ena_tc) +{ + struct net_device *netdev = vsi->netdev; + struct ice_pf *pf = vsi->back; + int numtc = vsi->tc_cfg.numtc; + struct ice_dcbx_cfg *dcbcfg; + u8 netdev_tc; + int i; + + if (!netdev) + return; + + /* CHNL VSI doesn't have it's own netdev, hence, no netdev_tc */ + if (vsi->type == ICE_VSI_CHNL) + return; + + if (!ena_tc) { + netdev_reset_tc(netdev); + return; + } + + if (vsi->type == ICE_VSI_PF && ice_is_adq_active(pf)) + numtc = vsi->all_numtc; + + if (netdev_set_num_tc(netdev, numtc)) + return; + + dcbcfg = &pf->hw.port_info->qos_cfg.local_dcbx_cfg; + + ice_for_each_traffic_class(i) + if (vsi->tc_cfg.ena_tc & BIT(i)) + netdev_set_tc_queue(netdev, + vsi->tc_cfg.tc_info[i].netdev_tc, + vsi->tc_cfg.tc_info[i].qcount_tx, + vsi->tc_cfg.tc_info[i].qoffset); + /* setup TC queue map for CHNL TCs */ + ice_for_each_chnl_tc(i) { + if (!(vsi->all_enatc & BIT(i))) + break; + if (!vsi->mqprio_qopt.qopt.count[i]) + break; + netdev_set_tc_queue(netdev, i, + vsi->mqprio_qopt.qopt.count[i], + vsi->mqprio_qopt.qopt.offset[i]); + } + + if (test_bit(ICE_FLAG_TC_MQPRIO, pf->flags)) + return; + + for (i = 0; i < ICE_MAX_USER_PRIORITY; i++) { + u8 ets_tc = dcbcfg->etscfg.prio_table[i]; + + /* Get the mapped netdev TC# for the UP */ + netdev_tc = vsi->tc_cfg.tc_info[ets_tc].netdev_tc; + netdev_set_prio_tc_map(netdev, i, netdev_tc); + } +} + +/** + * ice_vsi_setup_q_map_mqprio - Prepares mqprio based tc_config + * @vsi: the VSI being configured, + * @ctxt: VSI context structure + * @ena_tc: number of traffic classes to enable + * + * Prepares VSI tc_config to have queue configurations based on MQPRIO options. + */ +static void +ice_vsi_setup_q_map_mqprio(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt, + u8 ena_tc) +{ + u16 pow, offset = 0, qcount_tx = 0, qcount_rx = 0, qmap; + u16 tc0_offset = vsi->mqprio_qopt.qopt.offset[0]; + int tc0_qcount = vsi->mqprio_qopt.qopt.count[0]; + u8 netdev_tc = 0; + int i; + + vsi->tc_cfg.ena_tc = ena_tc ? ena_tc : 1; + + pow = order_base_2(tc0_qcount); + qmap = ((tc0_offset << ICE_AQ_VSI_TC_Q_OFFSET_S) & + ICE_AQ_VSI_TC_Q_OFFSET_M) | + ((pow << ICE_AQ_VSI_TC_Q_NUM_S) & ICE_AQ_VSI_TC_Q_NUM_M); + + ice_for_each_traffic_class(i) { + if (!(vsi->tc_cfg.ena_tc & BIT(i))) { + /* TC is not enabled */ + vsi->tc_cfg.tc_info[i].qoffset = 0; + vsi->tc_cfg.tc_info[i].qcount_rx = 1; + vsi->tc_cfg.tc_info[i].qcount_tx = 1; + vsi->tc_cfg.tc_info[i].netdev_tc = 0; + ctxt->info.tc_mapping[i] = 0; + continue; + } + + offset = vsi->mqprio_qopt.qopt.offset[i]; + qcount_rx = vsi->mqprio_qopt.qopt.count[i]; + qcount_tx = vsi->mqprio_qopt.qopt.count[i]; + vsi->tc_cfg.tc_info[i].qoffset = offset; + vsi->tc_cfg.tc_info[i].qcount_rx = qcount_rx; + vsi->tc_cfg.tc_info[i].qcount_tx = qcount_tx; + vsi->tc_cfg.tc_info[i].netdev_tc = netdev_tc++; + } + + if (vsi->all_numtc && vsi->all_numtc != vsi->tc_cfg.numtc) { + ice_for_each_chnl_tc(i) { + if (!(vsi->all_enatc & BIT(i))) + continue; + offset = vsi->mqprio_qopt.qopt.offset[i]; + qcount_rx = vsi->mqprio_qopt.qopt.count[i]; + qcount_tx = vsi->mqprio_qopt.qopt.count[i]; + } + } + + /* Set actual Tx/Rx queue pairs */ + vsi->num_txq = offset + qcount_tx; + vsi->num_rxq = offset + qcount_rx; + + /* Setup queue TC[0].qmap for given VSI context */ + ctxt->info.tc_mapping[0] = cpu_to_le16(qmap); + ctxt->info.q_mapping[0] = cpu_to_le16(vsi->rxq_map[0]); + ctxt->info.q_mapping[1] = cpu_to_le16(tc0_qcount); + + /* Find queue count available for channel VSIs and starting offset + * for channel VSIs + */ + if (tc0_qcount && tc0_qcount < vsi->num_rxq) { + vsi->cnt_q_avail = vsi->num_rxq - tc0_qcount; + vsi->next_base_q = tc0_qcount; + } + dev_dbg(ice_pf_to_dev(vsi->back), "vsi->num_txq = %d\n", vsi->num_txq); + dev_dbg(ice_pf_to_dev(vsi->back), "vsi->num_rxq = %d\n", vsi->num_rxq); + dev_dbg(ice_pf_to_dev(vsi->back), "all_numtc %u, all_enatc: 0x%04x, tc_cfg.numtc %u\n", + vsi->all_numtc, vsi->all_enatc, vsi->tc_cfg.numtc); +} + /** * ice_vsi_cfg_tc - Configure VSI Tx Sched for given TC map * @vsi: VSI to be configured @@ -3388,6 +3657,9 @@ int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc) u8 num_tc = 0; dev = ice_pf_to_dev(pf); + if (vsi->tc_cfg.ena_tc == ena_tc && + vsi->mqprio_qopt.mode != TC_MQPRIO_MODE_CHANNEL) + return ret; ice_for_each_traffic_class(i) { /* build bitmap of enabled TCs */ @@ -3395,6 +3667,12 @@ int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc) num_tc++; /* populate max_txqs per TC */ max_txqs[i] = vsi->alloc_txq; + /* Update max_txqs if it is CHNL VSI, because alloc_t[r]xq are + * zero for CHNL VSI, hence use num_txq instead as max_txqs + */ + if (vsi->type == ICE_VSI_CHNL && + test_bit(ICE_FLAG_TC_MQPRIO, pf->flags)) + max_txqs[i] = vsi->num_txq; } vsi->tc_cfg.ena_tc = ena_tc; @@ -3407,7 +3685,11 @@ int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc) ctx->vf_num = 0; ctx->info = vsi->info; - ice_vsi_setup_q_map(vsi, ctx); + if (vsi->type == ICE_VSI_PF && + test_bit(ICE_FLAG_TC_MQPRIO, pf->flags)) + ice_vsi_setup_q_map_mqprio(vsi, ctx, ena_tc); + else + ice_vsi_setup_q_map(vsi, ctx); /* must to indicate which section of VSI context are being modified */ ctx->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_RXQ_MAP_VALID); @@ -3418,8 +3700,13 @@ int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc) goto out; } - status = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc, - max_txqs); + if (vsi->type == ICE_VSI_PF && + test_bit(ICE_FLAG_TC_MQPRIO, pf->flags)) + status = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, 1, + max_txqs); + else + status = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, + vsi->tc_cfg.ena_tc, max_txqs); if (status) { dev_err(dev, "VSI %d failed TC config, error %s\n", @@ -3435,7 +3722,6 @@ out: kfree(ctx); return ret; } -#endif /* CONFIG_DCB */ /** * ice_update_ring_stats - Update ring statistics diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h index c79fcbf82d8f..b44ceffe40bf 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_lib.h @@ -51,13 +51,18 @@ void ice_cfg_sw_lldp(struct ice_vsi *vsi, bool tx, bool create); int ice_set_link(struct ice_vsi *vsi, bool ena); -#ifdef CONFIG_DCB +void ice_vsi_delete(struct ice_vsi *vsi); +int ice_vsi_clear(struct ice_vsi *vsi); + int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc); -#endif /* CONFIG_DCB */ + +int ice_vsi_cfg_rss_lut_key(struct ice_vsi *vsi); + +void ice_vsi_cfg_netdev_tc(struct ice_vsi *vsi, u8 ena_tc); struct ice_vsi * ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, - enum ice_vsi_type vsi_type, u16 vf_id); + enum ice_vsi_type vsi_type, u16 vf_id, struct ice_channel *ch); void ice_napi_del(struct ice_vsi *vsi); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 846623a97723..20c912842ad0 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -3240,7 +3240,7 @@ void ice_fill_rss_lut(u8 *lut, u16 rss_table_size, u16 rss_size) static struct ice_vsi * ice_pf_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi) { - return ice_vsi_setup(pf, pi, ICE_VSI_PF, ICE_INVAL_VFID); + return ice_vsi_setup(pf, pi, ICE_VSI_PF, ICE_INVAL_VFID, NULL); } /** @@ -3254,7 +3254,7 @@ ice_pf_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi) static struct ice_vsi * ice_ctrl_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi) { - return ice_vsi_setup(pf, pi, ICE_VSI_CTRL, ICE_INVAL_VFID); + return ice_vsi_setup(pf, pi, ICE_VSI_CTRL, ICE_INVAL_VFID, NULL); } /** @@ -3268,7 +3268,7 @@ ice_ctrl_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi) struct ice_vsi * ice_lb_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi) { - return ice_vsi_setup(pf, pi, ICE_VSI_LB, ICE_INVAL_VFID); + return ice_vsi_setup(pf, pi, ICE_VSI_LB, ICE_INVAL_VFID, NULL); } /** diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c index 560e52b99f83..fe36c3b5eb4c 100644 --- a/drivers/net/ethernet/intel/ice/ice_sched.c +++ b/drivers/net/ethernet/intel/ice/ice_sched.c @@ -2998,6 +2998,43 @@ static void ice_set_clear_shared_bw(struct ice_bw_type_info *bw_t_info, u32 bw) } } +/** + * ice_sched_save_vsi_bw - save VSI node's BW information + * @pi: port information structure + * @vsi_handle: sw VSI handle + * @tc: traffic class + * @rl_type: rate limit type min, max, or shared + * @bw: bandwidth in Kbps - Kilo bits per sec + * + * Save BW information of VSI type node for post replay use. + */ +static int +ice_sched_save_vsi_bw(struct ice_port_info *pi, u16 vsi_handle, u8 tc, + enum ice_rl_type rl_type, u32 bw) +{ + struct ice_vsi_ctx *vsi_ctx; + + if (!ice_is_vsi_valid(pi->hw, vsi_handle)) + return -EINVAL; + vsi_ctx = ice_get_vsi_ctx(pi->hw, vsi_handle); + if (!vsi_ctx) + return -EINVAL; + switch (rl_type) { + case ICE_MIN_BW: + ice_set_clear_cir_bw(&vsi_ctx->sched.bw_t_info[tc], bw); + break; + case ICE_MAX_BW: + ice_set_clear_eir_bw(&vsi_ctx->sched.bw_t_info[tc], bw); + break; + case ICE_SHARED_BW: + ice_set_clear_shared_bw(&vsi_ctx->sched.bw_t_info[tc], bw); + break; + default: + return -EINVAL; + } + return 0; +} + /** * ice_sched_calc_wakeup - calculate RL profile wakeup parameter * @hw: pointer to the HW struct @@ -3875,9 +3912,17 @@ enum ice_status ice_cfg_vsi_bw_lmt_per_tc(struct ice_port_info *pi, u16 vsi_handle, u8 tc, enum ice_rl_type rl_type, u32 bw) { - return ice_sched_set_node_bw_lmt_per_tc(pi, vsi_handle, - ICE_AGG_TYPE_VSI, - tc, rl_type, bw); + int status; + + status = ice_sched_set_node_bw_lmt_per_tc(pi, vsi_handle, + ICE_AGG_TYPE_VSI, + tc, rl_type, bw); + if (!status) { + mutex_lock(&pi->sched_lock); + status = ice_sched_save_vsi_bw(pi, vsi_handle, tc, rl_type, bw); + mutex_unlock(&pi->sched_lock); + } + return status; } /** @@ -3894,10 +3939,19 @@ enum ice_status ice_cfg_vsi_bw_dflt_lmt_per_tc(struct ice_port_info *pi, u16 vsi_handle, u8 tc, enum ice_rl_type rl_type) { - return ice_sched_set_node_bw_lmt_per_tc(pi, vsi_handle, - ICE_AGG_TYPE_VSI, - tc, rl_type, - ICE_SCHED_DFLT_BW); + int status; + + status = ice_sched_set_node_bw_lmt_per_tc(pi, vsi_handle, + ICE_AGG_TYPE_VSI, + tc, rl_type, + ICE_SCHED_DFLT_BW); + if (!status) { + mutex_lock(&pi->sched_lock); + status = ice_sched_save_vsi_bw(pi, vsi_handle, tc, rl_type, + ICE_SCHED_DFLT_BW); + mutex_unlock(&pi->sched_lock); + } + return status; } /** diff --git a/drivers/net/ethernet/intel/ice/ice_sched.h b/drivers/net/ethernet/intel/ice/ice_sched.h index f89b80ba3499..551e13e46653 100644 --- a/drivers/net/ethernet/intel/ice/ice_sched.h +++ b/drivers/net/ethernet/intel/ice/ice_sched.h @@ -58,6 +58,8 @@ struct ice_sched_agg_info { DECLARE_BITMAP(tc_bitmap, ICE_MAX_TRAFFIC_CLASS); u32 agg_id; enum ice_agg_type agg_type; + /* bw_t_info saves aggregator BW information */ + struct ice_bw_type_info bw_t_info[ICE_MAX_TRAFFIC_CLASS]; /* save aggregator TC bitmap */ DECLARE_BITMAP(replay_tc_bitmap, ICE_MAX_TRAFFIC_CLASS); }; diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index c759a02bfce4..c56dd1749903 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -287,6 +287,7 @@ struct ice_rx_ring { struct rcu_head rcu; /* to avoid race on free */ /* CL4 - 3rd cacheline starts here */ + struct ice_channel *ch; struct bpf_prog *xdp_prog; struct ice_tx_ring *xdp_ring; struct xsk_buff_pool *xsk_pool; @@ -328,6 +329,7 @@ struct ice_tx_ring { /* CL3 - 3rd cacheline starts here */ struct rcu_head rcu; /* to avoid race on free */ DECLARE_BITMAP(xps_state, ICE_TX_NBITS); /* XPS Config State */ + struct ice_channel *ch; struct ice_ptp_tx *tx_tstamps; spinlock_t tx_lock; u32 txq_teid; /* Added Tx queue TEID */ @@ -352,6 +354,11 @@ static inline void ice_clear_ring_build_skb_ena(struct ice_rx_ring *ring) ring->flags &= ~ICE_RX_FLAGS_RING_BUILD_SKB; } +static inline bool ice_ring_ch_enabled(struct ice_tx_ring *ring) +{ + return !!ring->ch; +} + static inline bool ice_ring_is_xdp(struct ice_tx_ring *ring) { return !!(ring->flags & ICE_TX_FLAGS_RING_XDP); diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h index d5cb1c5a89c0..9e0c2923c62e 100644 --- a/drivers/net/ethernet/intel/ice/ice_type.h +++ b/drivers/net/ethernet/intel/ice/ice_type.h @@ -138,6 +138,7 @@ enum ice_vsi_type { ICE_VSI_PF = 0, ICE_VSI_VF = 1, ICE_VSI_CTRL = 3, /* equates to ICE_VSI_PF with 1 queue pair */ + ICE_VSI_CHNL = 4, ICE_VSI_LB = 6, ICE_VSI_SWITCHDEV_CTRL = 7, }; @@ -570,6 +571,8 @@ struct ice_sched_vsi_info { struct list_head list_entry; u16 max_lanq[ICE_MAX_TRAFFIC_CLASS]; u16 max_rdmaq[ICE_MAX_TRAFFIC_CLASS]; + /* bw_t_info saves VSI BW information */ + struct ice_bw_type_info bw_t_info[ICE_MAX_TRAFFIC_CLASS]; }; /* driver defines the policy */ diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c index 8e3f9ec4e35b..a42eaf6f942e 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c @@ -832,7 +832,7 @@ static struct ice_vsi *ice_vf_vsi_setup(struct ice_vf *vf) struct ice_pf *pf = vf->pf; struct ice_vsi *vsi; - vsi = ice_vsi_setup(pf, pi, ICE_VSI_VF, vf->vf_id); + vsi = ice_vsi_setup(pf, pi, ICE_VSI_VF, vf->vf_id, NULL); if (!vsi) { dev_err(ice_pf_to_dev(pf), "Failed to create VF VSI\n"); @@ -859,7 +859,7 @@ struct ice_vsi *ice_vf_ctrl_vsi_setup(struct ice_vf *vf) struct ice_pf *pf = vf->pf; struct ice_vsi *vsi; - vsi = ice_vsi_setup(pf, pi, ICE_VSI_CTRL, vf->vf_id); + vsi = ice_vsi_setup(pf, pi, ICE_VSI_CTRL, vf->vf_id, NULL); if (!vsi) { dev_err(ice_pf_to_dev(pf), "Failed to create VF control VSI\n"); ice_vf_ctrl_invalidate_vsi(vf); From fbc7b27af0f9fb181811424e29caf6825594a841 Mon Sep 17 00:00:00 2001 From: Kiran Patil Date: Fri, 15 Oct 2021 16:35:16 -0700 Subject: [PATCH 391/440] ice: enable ndo_setup_tc support for mqprio_qdisc Add support in driver for TC_QDISC_SETUP_MQPRIO. This support enables instantiation of channels in HW using existing MQPRIO infrastructure which is extended to be offloadable. This provides a mechanism to configure dedicated set of queues for each TC. Configuring channels using "tc mqprio": -------------------------------------- tc qdisc add dev root mqprio num_tc 3 map 0 1 2 \ queues 4@0 4@4 4@8 hw 1 mode channel Above command configures 3 TCs having 4 queues each. "hw 1 mode channel" implies offload of channel configuration to HW. When driver processes configuration received via "ndo_setup_tc: QDISC_SETUP_MQPRIO", each TC maps to HW VSI with specified queues. User can optionally specify bandwidth min and max rate limit per TC (see example below). If shaper params like min and/or max bandwidth rate limit are specified, driver configures VSI specific rate limiter in HW. Configuring channels and bandwidth shaper parameters using "tc mqprio": ---------------------------------------------------------------- tc qdisc add dev root mqprio \ num_tc 4 map 0 1 2 3 queues 4@0 4@4 4@8 4@12 hw 1 mode channel \ shaper bw_rlimit min_rate 1Gbit 2Gbit 3Gbit 4Gbit \ max_rate 4Gbit 5Gbit 6Gbit 7Gbit Command to view configured TCs: ----------------------------- tc qdisc show dev Deleting TCs: ------------ tc qdisc del dev root mqprio Signed-off-by: Kiran Patil Signed-off-by: Amritha Nambiar Signed-off-by: Sudheer Mogilappagari Tested-by: Bharathi Sreenivas Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice.h | 21 + drivers/net/ethernet/intel/ice/ice_ethtool.c | 10 + drivers/net/ethernet/intel/ice/ice_lib.c | 2 +- drivers/net/ethernet/intel/ice/ice_lib.h | 1 + drivers/net/ethernet/intel/ice/ice_main.c | 889 ++++++++++++++++++- 5 files changed, 913 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index c2814e31c438..4f94ecff87e8 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -56,6 +56,7 @@ #include "ice_dcb.h" #include "ice_switch.h" #include "ice_common.h" +#include "ice_flow.h" #include "ice_sched.h" #include "ice_idc_int.h" #include "ice_virtchnl_pf.h" @@ -126,6 +127,13 @@ #define ICE_TX_CTX_DESC(R, i) (&(((struct ice_tx_ctx_desc *)((R)->desc))[i])) #define ICE_TX_FDIRDESC(R, i) (&(((struct ice_fltr_desc *)((R)->desc))[i])) +/* Minimum BW limit is 500 Kbps for any scheduler node */ +#define ICE_MIN_BW_LIMIT 500 +/* User can specify BW in either Kbit/Mbit/Gbit and OS converts it in bytes. + * use it to convert user specified BW limit into Kbps + */ +#define ICE_BW_KBPS_DIVISOR 125 + /* Macro for each VSI in a PF */ #define ice_for_each_vsi(pf, i) \ for ((i) = 0; (i) < (pf)->num_alloc_vsi; (i)++) @@ -440,6 +448,8 @@ struct ice_q_vector { cpumask_t affinity_mask; struct irq_affinity_notify affinity_notify; + struct ice_channel *ch; + char name[ICE_INT_NAME_STR_LEN]; u16 total_events; /* net_dim(): number of interrupts processed */ @@ -595,6 +605,17 @@ struct ice_netdev_priv { struct ice_repr *repr; }; +/** + * ice_vector_ch_enabled + * @qv: pointer to q_vector, can be NULL + * + * This function returns true if vector is channel enabled otherwise false + */ +static inline bool ice_vector_ch_enabled(struct ice_q_vector *qv) +{ + return !!qv->ch; /* Enable it to run with TC */ +} + /** * ice_irq_dynamic_ena - Enable default interrupt generation settings * @hw: pointer to HW struct diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index 8b3eef6632e9..cfe96a127ed4 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -3194,6 +3194,11 @@ ice_set_rxfh(struct net_device *netdev, const u32 *indir, const u8 *key, return -EIO; } + if (ice_is_adq_active(pf)) { + netdev_err(netdev, "Cannot change RSS params with ADQ configured.\n"); + return -EOPNOTSUPP; + } + if (key) { if (!vsi->rss_hkey_user) { vsi->rss_hkey_user = @@ -3404,6 +3409,11 @@ static int ice_set_channels(struct net_device *dev, struct ethtool_channels *ch) if (ch->other_count != (test_bit(ICE_FLAG_FD_ENA, pf->flags) ? 1U : 0U)) return -EINVAL; + if (ice_is_adq_active(pf)) { + netdev_err(dev, "Cannot set channels with ADQ configured.\n"); + return -EOPNOTSUPP; + } + if (test_bit(ICE_FLAG_FD_ENA, pf->flags) && pf->hw.fdir_active_fltr) { netdev_err(dev, "Cannot set channels when Flow Director filters are active\n"); return -EOPNOTSUPP; diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 62adbb39670b..4904ae088daa 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -3954,7 +3954,7 @@ int ice_get_link_speed_mbps(struct ice_vsi *vsi) * * Return current VSI link speed and 0 if the speed is unknown. */ -static int ice_get_link_speed_kbps(struct ice_vsi *vsi) +int ice_get_link_speed_kbps(struct ice_vsi *vsi) { int speed_mbps; diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h index b44ceffe40bf..e7f4ecbb8549 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_lib.h @@ -124,6 +124,7 @@ int ice_set_dflt_vsi(struct ice_sw *sw, struct ice_vsi *vsi); int ice_clear_dflt_vsi(struct ice_sw *sw); int ice_set_min_bw_limit(struct ice_vsi *vsi, u64 min_tx_rate); int ice_set_max_bw_limit(struct ice_vsi *vsi, u64 max_tx_rate); +int ice_get_link_speed_kbps(struct ice_vsi *vsi); int ice_get_link_speed_mbps(struct ice_vsi *vsi); int ice_vsi_update_security(struct ice_vsi *vsi, void (*fill)(struct ice_vsi_ctx *)); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 20c912842ad0..ff2d000bbbab 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -55,6 +55,9 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type); static void ice_vsi_release_all(struct ice_pf *pf); +static int ice_rebuild_channels(struct ice_pf *pf); +static void ice_remove_q_channels(struct ice_vsi *vsi, bool rem_adv_fltr); + bool netif_is_ice(struct net_device *dev) { return dev && (dev->netdev_ops == &ice_netdev_ops); @@ -106,7 +109,12 @@ static void ice_check_for_hang_subtask(struct ice_pf *pf) ice_for_each_txq(vsi, i) { struct ice_tx_ring *tx_ring = vsi->tx_rings[i]; - if (tx_ring && tx_ring->desc) { + if (!tx_ring) + continue; + if (ice_ring_ch_enabled(tx_ring)) + continue; + + if (tx_ring->desc) { /* If packet counter has not changed the queue is * likely stalled, so force an interrupt for this * queue. @@ -458,17 +466,21 @@ static void ice_pf_dis_all_vsi(struct ice_pf *pf, bool locked) } /** - * ice_prepare_for_reset - prep for the core to reset + * ice_prepare_for_reset - prep for reset * @pf: board private structure + * @reset_type: reset type requested * * Inform or close all dependent features in prep for reset. */ static void -ice_prepare_for_reset(struct ice_pf *pf) +ice_prepare_for_reset(struct ice_pf *pf, enum ice_reset_req reset_type) { struct ice_hw *hw = &pf->hw; + struct ice_vsi *vsi; unsigned int i; + dev_dbg(ice_pf_to_dev(pf), "reset_type=%d\n", reset_type); + /* already prepared for reset */ if (test_bit(ICE_PREPARED_FOR_RESET, pf->state)) return; @@ -483,6 +495,38 @@ ice_prepare_for_reset(struct ice_pf *pf) ice_for_each_vf(pf, i) ice_set_vf_state_qs_dis(&pf->vf[i]); + /* release ADQ specific HW and SW resources */ + vsi = ice_get_main_vsi(pf); + if (!vsi) + goto skip; + + /* to be on safe side, reset orig_rss_size so that normal flow + * of deciding rss_size can take precedence + */ + vsi->orig_rss_size = 0; + + if (test_bit(ICE_FLAG_TC_MQPRIO, pf->flags)) { + if (reset_type == ICE_RESET_PFR) { + vsi->old_ena_tc = vsi->all_enatc; + vsi->old_numtc = vsi->all_numtc; + } else { + ice_remove_q_channels(vsi, true); + + /* for other reset type, do not support channel rebuild + * hence reset needed info + */ + vsi->old_ena_tc = 0; + vsi->all_enatc = 0; + vsi->old_numtc = 0; + vsi->all_numtc = 0; + vsi->req_txq = 0; + vsi->req_rxq = 0; + clear_bit(ICE_FLAG_TC_MQPRIO, pf->flags); + memset(&vsi->mqprio_qopt, 0, sizeof(vsi->mqprio_qopt)); + } + } +skip: + /* clear SW filtering DB */ ice_clear_hw_tbls(hw); /* disable the VSIs and their queues that are not already DOWN */ @@ -502,8 +546,7 @@ ice_prepare_for_reset(struct ice_pf *pf) /** * ice_do_reset - Initiate one of many types of resets * @pf: board private structure - * @reset_type: reset type requested - * before this function was called. + * @reset_type: reset type requested before this function was called. */ static void ice_do_reset(struct ice_pf *pf, enum ice_reset_req reset_type) { @@ -512,7 +555,7 @@ static void ice_do_reset(struct ice_pf *pf, enum ice_reset_req reset_type) dev_dbg(dev, "reset_type 0x%x requested\n", reset_type); - ice_prepare_for_reset(pf); + ice_prepare_for_reset(pf, reset_type); /* trigger the reset */ if (ice_reset(hw, reset_type)) { @@ -570,7 +613,7 @@ static void ice_reset_subtask(struct ice_pf *pf) /* return if no valid reset type requested */ if (reset_type == ICE_RESET_INVAL) return; - ice_prepare_for_reset(pf); + ice_prepare_for_reset(pf, reset_type); /* make sure we are ready to rebuild */ if (ice_check_reset(&pf->hw)) { @@ -3243,6 +3286,13 @@ ice_pf_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi) return ice_vsi_setup(pf, pi, ICE_VSI_PF, ICE_INVAL_VFID, NULL); } +static struct ice_vsi * +ice_chnl_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, + struct ice_channel *ch) +{ + return ice_vsi_setup(pf, pi, ICE_VSI_CHNL, ICE_INVAL_VFID, ch); +} + /** * ice_ctrl_vsi_setup - Set up a control VSI * @pf: board private structure @@ -3361,6 +3411,9 @@ static int ice_setup_pf_sw(struct ice_pf *pf) if (!vsi) return -ENOMEM; + /* init channel list */ + INIT_LIST_HEAD(&vsi->ch_list); + status = ice_cfg_netdev(vsi); if (status) { status = -ENODEV; @@ -4956,7 +5009,7 @@ ice_pci_err_detected(struct pci_dev *pdev, pci_channel_state_t err) if (!test_bit(ICE_PREPARED_FOR_RESET, pf->state)) { set_bit(ICE_PFR_REQ, pf->state); - ice_prepare_for_reset(pf); + ice_prepare_for_reset(pf, ICE_RESET_PFR); } } @@ -5048,7 +5101,7 @@ static void ice_pci_err_reset_prepare(struct pci_dev *pdev) if (!test_bit(ICE_PREPARED_FOR_RESET, pf->state)) { set_bit(ICE_PFR_REQ, pf->state); - ice_prepare_for_reset(pf); + ice_prepare_for_reset(pf, ICE_RESET_PFR); } } } @@ -5453,6 +5506,11 @@ ice_set_features(struct net_device *netdev, netdev_features_t features) ice_clear_arfs(vsi); } + /* don't turn off hw_tc_offload when ADQ is already enabled */ + if (!(features & NETIF_F_HW_TC) && ice_is_adq_active(pf)) { + dev_err(ice_pf_to_dev(pf), "ADQ is active, can't turn hw_tc_offload off\n"); + return -EACCES; + } return ret; } @@ -6307,6 +6365,9 @@ static void ice_vsi_release_all(struct ice_pf *pf) if (!pf->vsi[i]) continue; + if (pf->vsi[i]->type == ICE_VSI_CHNL) + continue; + err = ice_vsi_release(pf->vsi[i]); if (err) dev_dbg(ice_pf_to_dev(pf), "Failed to release pf->vsi[%d], err %d, vsi_num = %d\n", @@ -6517,6 +6578,15 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type) goto err_vsi_rebuild; } + if (reset_type == ICE_RESET_PFR) { + err = ice_rebuild_channels(pf); + if (err) { + dev_err(dev, "failed to rebuild and replay ADQ VSIs, err %d\n", + err); + goto err_vsi_rebuild; + } + } + /* If Flow Director is active */ if (test_bit(ICE_FLAG_FD_ENA, pf->flags)) { err = ice_vsi_rebuild_by_type(pf, ICE_VSI_CTRL); @@ -7183,6 +7253,799 @@ ice_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv) } } +/** + * ice_validate_mqprio_qopt - Validate TCF input parameters + * @vsi: Pointer to VSI + * @mqprio_qopt: input parameters for mqprio queue configuration + * + * This function validates MQPRIO params, such as qcount (power of 2 wherever + * needed), and make sure user doesn't specify qcount and BW rate limit + * for TCs, which are more than "num_tc" + */ +static int +ice_validate_mqprio_qopt(struct ice_vsi *vsi, + struct tc_mqprio_qopt_offload *mqprio_qopt) +{ + u64 sum_max_rate = 0, sum_min_rate = 0; + int non_power_of_2_qcount = 0; + struct ice_pf *pf = vsi->back; + int max_rss_q_cnt = 0; + struct device *dev; + int i, speed; + u8 num_tc; + + if (vsi->type != ICE_VSI_PF) + return -EINVAL; + + if (mqprio_qopt->qopt.offset[0] != 0 || + mqprio_qopt->qopt.num_tc < 1 || + mqprio_qopt->qopt.num_tc > ICE_CHNL_MAX_TC) + return -EINVAL; + + dev = ice_pf_to_dev(pf); + vsi->ch_rss_size = 0; + num_tc = mqprio_qopt->qopt.num_tc; + + for (i = 0; num_tc; i++) { + int qcount = mqprio_qopt->qopt.count[i]; + u64 max_rate, min_rate, rem; + + if (!qcount) + return -EINVAL; + + if (is_power_of_2(qcount)) { + if (non_power_of_2_qcount && + qcount > non_power_of_2_qcount) { + dev_err(dev, "qcount[%d] cannot be greater than non power of 2 qcount[%d]\n", + qcount, non_power_of_2_qcount); + return -EINVAL; + } + if (qcount > max_rss_q_cnt) + max_rss_q_cnt = qcount; + } else { + if (non_power_of_2_qcount && + qcount != non_power_of_2_qcount) { + dev_err(dev, "Only one non power of 2 qcount allowed[%d,%d]\n", + qcount, non_power_of_2_qcount); + return -EINVAL; + } + if (qcount < max_rss_q_cnt) { + dev_err(dev, "non power of 2 qcount[%d] cannot be less than other qcount[%d]\n", + qcount, max_rss_q_cnt); + return -EINVAL; + } + max_rss_q_cnt = qcount; + non_power_of_2_qcount = qcount; + } + + /* TC command takes input in K/N/Gbps or K/M/Gbit etc but + * converts the bandwidth rate limit into Bytes/s when + * passing it down to the driver. So convert input bandwidth + * from Bytes/s to Kbps + */ + max_rate = mqprio_qopt->max_rate[i]; + max_rate = div_u64(max_rate, ICE_BW_KBPS_DIVISOR); + sum_max_rate += max_rate; + + /* min_rate is minimum guaranteed rate and it can't be zero */ + min_rate = mqprio_qopt->min_rate[i]; + min_rate = div_u64(min_rate, ICE_BW_KBPS_DIVISOR); + sum_min_rate += min_rate; + + if (min_rate && min_rate < ICE_MIN_BW_LIMIT) { + dev_err(dev, "TC%d: min_rate(%llu Kbps) < %u Kbps\n", i, + min_rate, ICE_MIN_BW_LIMIT); + return -EINVAL; + } + + iter_div_u64_rem(min_rate, ICE_MIN_BW_LIMIT, &rem); + if (rem) { + dev_err(dev, "TC%d: Min Rate not multiple of %u Kbps", + i, ICE_MIN_BW_LIMIT); + return -EINVAL; + } + + iter_div_u64_rem(max_rate, ICE_MIN_BW_LIMIT, &rem); + if (rem) { + dev_err(dev, "TC%d: Max Rate not multiple of %u Kbps", + i, ICE_MIN_BW_LIMIT); + return -EINVAL; + } + + /* min_rate can't be more than max_rate, except when max_rate + * is zero (implies max_rate sought is max line rate). In such + * a case min_rate can be more than max. + */ + if (max_rate && min_rate > max_rate) { + dev_err(dev, "min_rate %llu Kbps can't be more than max_rate %llu Kbps\n", + min_rate, max_rate); + return -EINVAL; + } + + if (i >= mqprio_qopt->qopt.num_tc - 1) + break; + if (mqprio_qopt->qopt.offset[i + 1] != + (mqprio_qopt->qopt.offset[i] + qcount)) + return -EINVAL; + } + if (vsi->num_rxq < + (mqprio_qopt->qopt.offset[i] + mqprio_qopt->qopt.count[i])) + return -EINVAL; + if (vsi->num_txq < + (mqprio_qopt->qopt.offset[i] + mqprio_qopt->qopt.count[i])) + return -EINVAL; + + speed = ice_get_link_speed_kbps(vsi); + if (sum_max_rate && sum_max_rate > (u64)speed) { + dev_err(dev, "Invalid max Tx rate(%llu) Kbps > speed(%u) Kbps specified\n", + sum_max_rate, speed); + return -EINVAL; + } + if (sum_min_rate && sum_min_rate > (u64)speed) { + dev_err(dev, "Invalid min Tx rate(%llu) Kbps > speed (%u) Kbps specified\n", + sum_min_rate, speed); + return -EINVAL; + } + + /* make sure vsi->ch_rss_size is set correctly based on TC's qcount */ + vsi->ch_rss_size = max_rss_q_cnt; + + return 0; +} + +/** + * ice_add_channel - add a channel by adding VSI + * @pf: ptr to PF device + * @sw_id: underlying HW switching element ID + * @ch: ptr to channel structure + * + * Add a channel (VSI) using add_vsi and queue_map + */ +static int ice_add_channel(struct ice_pf *pf, u16 sw_id, struct ice_channel *ch) +{ + struct device *dev = ice_pf_to_dev(pf); + struct ice_vsi *vsi; + + if (ch->type != ICE_VSI_CHNL) { + dev_err(dev, "add new VSI failed, ch->type %d\n", ch->type); + return -EINVAL; + } + + vsi = ice_chnl_vsi_setup(pf, pf->hw.port_info, ch); + if (!vsi || vsi->type != ICE_VSI_CHNL) { + dev_err(dev, "create chnl VSI failure\n"); + return -EINVAL; + } + + ch->sw_id = sw_id; + ch->vsi_num = vsi->vsi_num; + ch->info.mapping_flags = vsi->info.mapping_flags; + ch->ch_vsi = vsi; + /* set the back pointer of channel for newly created VSI */ + vsi->ch = ch; + + memcpy(&ch->info.q_mapping, &vsi->info.q_mapping, + sizeof(vsi->info.q_mapping)); + memcpy(&ch->info.tc_mapping, vsi->info.tc_mapping, + sizeof(vsi->info.tc_mapping)); + + return 0; +} + +/** + * ice_chnl_cfg_res + * @vsi: the VSI being setup + * @ch: ptr to channel structure + * + * Configure channel specific resources such as rings, vector. + */ +static void ice_chnl_cfg_res(struct ice_vsi *vsi, struct ice_channel *ch) +{ + int i; + + for (i = 0; i < ch->num_txq; i++) { + struct ice_q_vector *tx_q_vector, *rx_q_vector; + struct ice_ring_container *rc; + struct ice_tx_ring *tx_ring; + struct ice_rx_ring *rx_ring; + + tx_ring = vsi->tx_rings[ch->base_q + i]; + rx_ring = vsi->rx_rings[ch->base_q + i]; + if (!tx_ring || !rx_ring) + continue; + + /* setup ring being channel enabled */ + tx_ring->ch = ch; + rx_ring->ch = ch; + + /* following code block sets up vector specific attributes */ + tx_q_vector = tx_ring->q_vector; + rx_q_vector = rx_ring->q_vector; + if (!tx_q_vector && !rx_q_vector) + continue; + + if (tx_q_vector) { + tx_q_vector->ch = ch; + /* setup Tx and Rx ITR setting if DIM is off */ + rc = &tx_q_vector->tx; + if (!ITR_IS_DYNAMIC(rc)) + ice_write_itr(rc, rc->itr_setting); + } + if (rx_q_vector) { + rx_q_vector->ch = ch; + /* setup Tx and Rx ITR setting if DIM is off */ + rc = &rx_q_vector->rx; + if (!ITR_IS_DYNAMIC(rc)) + ice_write_itr(rc, rc->itr_setting); + } + } + + /* it is safe to assume that, if channel has non-zero num_t[r]xq, then + * GLINT_ITR register would have written to perform in-context + * update, hence perform flush + */ + if (ch->num_txq || ch->num_rxq) + ice_flush(&vsi->back->hw); +} + +/** + * ice_cfg_chnl_all_res - configure channel resources + * @vsi: pte to main_vsi + * @ch: ptr to channel structure + * + * This function configures channel specific resources such as flow-director + * counter index, and other resources such as queues, vectors, ITR settings + */ +static void +ice_cfg_chnl_all_res(struct ice_vsi *vsi, struct ice_channel *ch) +{ + /* configure channel (aka ADQ) resources such as queues, vectors, + * ITR settings for channel specific vectors and anything else + */ + ice_chnl_cfg_res(vsi, ch); +} + +/** + * ice_setup_hw_channel - setup new channel + * @pf: ptr to PF device + * @vsi: the VSI being setup + * @ch: ptr to channel structure + * @sw_id: underlying HW switching element ID + * @type: type of channel to be created (VMDq2/VF) + * + * Setup new channel (VSI) based on specified type (VMDq2/VF) + * and configures Tx rings accordingly + */ +static int +ice_setup_hw_channel(struct ice_pf *pf, struct ice_vsi *vsi, + struct ice_channel *ch, u16 sw_id, u8 type) +{ + struct device *dev = ice_pf_to_dev(pf); + int ret; + + ch->base_q = vsi->next_base_q; + ch->type = type; + + ret = ice_add_channel(pf, sw_id, ch); + if (ret) { + dev_err(dev, "failed to add_channel using sw_id %u\n", sw_id); + return ret; + } + + /* configure/setup ADQ specific resources */ + ice_cfg_chnl_all_res(vsi, ch); + + /* make sure to update the next_base_q so that subsequent channel's + * (aka ADQ) VSI queue map is correct + */ + vsi->next_base_q = vsi->next_base_q + ch->num_rxq; + dev_dbg(dev, "added channel: vsi_num %u, num_rxq %u\n", ch->vsi_num, + ch->num_rxq); + + return 0; +} + +/** + * ice_setup_channel - setup new channel using uplink element + * @pf: ptr to PF device + * @vsi: the VSI being setup + * @ch: ptr to channel structure + * + * Setup new channel (VSI) based on specified type (VMDq2/VF) + * and uplink switching element + */ +static bool +ice_setup_channel(struct ice_pf *pf, struct ice_vsi *vsi, + struct ice_channel *ch) +{ + struct device *dev = ice_pf_to_dev(pf); + u16 sw_id; + int ret; + + if (vsi->type != ICE_VSI_PF) { + dev_err(dev, "unsupported parent VSI type(%d)\n", vsi->type); + return false; + } + + sw_id = pf->first_sw->sw_id; + + /* create channel (VSI) */ + ret = ice_setup_hw_channel(pf, vsi, ch, sw_id, ICE_VSI_CHNL); + if (ret) { + dev_err(dev, "failed to setup hw_channel\n"); + return false; + } + dev_dbg(dev, "successfully created channel()\n"); + + return ch->ch_vsi ? true : false; +} + +/** + * ice_set_bw_limit - setup BW limit for Tx traffic based on max_tx_rate + * @vsi: VSI to be configured + * @max_tx_rate: max Tx rate in Kbps to be configured as maximum BW limit + * @min_tx_rate: min Tx rate in Kbps to be configured as minimum BW limit + */ +static int +ice_set_bw_limit(struct ice_vsi *vsi, u64 max_tx_rate, u64 min_tx_rate) +{ + int err; + + err = ice_set_min_bw_limit(vsi, min_tx_rate); + if (err) + return err; + + return ice_set_max_bw_limit(vsi, max_tx_rate); +} + +/** + * ice_create_q_channel - function to create channel + * @vsi: VSI to be configured + * @ch: ptr to channel (it contains channel specific params) + * + * This function creates channel (VSI) using num_queues specified by user, + * reconfigs RSS if needed. + */ +static int ice_create_q_channel(struct ice_vsi *vsi, struct ice_channel *ch) +{ + struct ice_pf *pf = vsi->back; + struct device *dev; + + if (!ch) + return -EINVAL; + + dev = ice_pf_to_dev(pf); + if (!ch->num_txq || !ch->num_rxq) { + dev_err(dev, "Invalid num_queues requested: %d\n", ch->num_rxq); + return -EINVAL; + } + + if (!vsi->cnt_q_avail || vsi->cnt_q_avail < ch->num_txq) { + dev_err(dev, "cnt_q_avail (%u) less than num_queues %d\n", + vsi->cnt_q_avail, ch->num_txq); + return -EINVAL; + } + + if (!ice_setup_channel(pf, vsi, ch)) { + dev_info(dev, "Failed to setup channel\n"); + return -EINVAL; + } + /* configure BW rate limit */ + if (ch->ch_vsi && (ch->max_tx_rate || ch->min_tx_rate)) { + int ret; + + ret = ice_set_bw_limit(ch->ch_vsi, ch->max_tx_rate, + ch->min_tx_rate); + if (ret) + dev_err(dev, "failed to set Tx rate of %llu Kbps for VSI(%u)\n", + ch->max_tx_rate, ch->ch_vsi->vsi_num); + else + dev_dbg(dev, "set Tx rate of %llu Kbps for VSI(%u)\n", + ch->max_tx_rate, ch->ch_vsi->vsi_num); + } + + vsi->cnt_q_avail -= ch->num_txq; + + return 0; +} + +/** + * ice_remove_q_channels - Remove queue channels for the TCs + * @vsi: VSI to be configured + * @rem_fltr: delete advanced switch filter or not + * + * Remove queue channels for the TCs + */ +static void ice_remove_q_channels(struct ice_vsi *vsi, bool __maybe_unused rem_fltr) +{ + struct ice_channel *ch, *ch_tmp; + int i; + + /* perform cleanup for channels if they exist */ + list_for_each_entry_safe(ch, ch_tmp, &vsi->ch_list, list) { + struct ice_vsi *ch_vsi; + + list_del(&ch->list); + ch_vsi = ch->ch_vsi; + if (!ch_vsi) { + kfree(ch); + continue; + } + + /* Reset queue contexts */ + for (i = 0; i < ch->num_rxq; i++) { + struct ice_tx_ring *tx_ring; + struct ice_rx_ring *rx_ring; + + tx_ring = vsi->tx_rings[ch->base_q + i]; + rx_ring = vsi->rx_rings[ch->base_q + i]; + if (tx_ring) { + tx_ring->ch = NULL; + if (tx_ring->q_vector) + tx_ring->q_vector->ch = NULL; + } + if (rx_ring) { + rx_ring->ch = NULL; + if (rx_ring->q_vector) + rx_ring->q_vector->ch = NULL; + } + } + + /* clear the VSI from scheduler tree */ + ice_rm_vsi_lan_cfg(ch->ch_vsi->port_info, ch->ch_vsi->idx); + + /* Delete VSI from FW */ + ice_vsi_delete(ch->ch_vsi); + + /* Delete VSI from PF and HW VSI arrays */ + ice_vsi_clear(ch->ch_vsi); + + /* free the channel */ + kfree(ch); + } + + /* clear the channel VSI map which is stored in main VSI */ + ice_for_each_chnl_tc(i) + vsi->tc_map_vsi[i] = NULL; + + /* reset main VSI's all TC information */ + vsi->all_enatc = 0; + vsi->all_numtc = 0; +} + +/** + * ice_rebuild_channels - rebuild channel + * @pf: ptr to PF + * + * Recreate channel VSIs and replay filters + */ +static int ice_rebuild_channels(struct ice_pf *pf) +{ + struct device *dev = ice_pf_to_dev(pf); + struct ice_vsi *main_vsi; + bool rem_adv_fltr = true; + struct ice_channel *ch; + struct ice_vsi *vsi; + int tc_idx = 1; + int i, err; + + main_vsi = ice_get_main_vsi(pf); + if (!main_vsi) + return 0; + + if (!test_bit(ICE_FLAG_TC_MQPRIO, pf->flags) || + main_vsi->old_numtc == 1) + return 0; /* nothing to be done */ + + /* reconfigure main VSI based on old value of TC and cached values + * for MQPRIO opts + */ + err = ice_vsi_cfg_tc(main_vsi, main_vsi->old_ena_tc); + if (err) { + dev_err(dev, "failed configuring TC(ena_tc:0x%02x) for HW VSI=%u\n", + main_vsi->old_ena_tc, main_vsi->vsi_num); + return err; + } + + /* rebuild ADQ VSIs */ + ice_for_each_vsi(pf, i) { + enum ice_vsi_type type; + + vsi = pf->vsi[i]; + if (!vsi || vsi->type != ICE_VSI_CHNL) + continue; + + type = vsi->type; + + /* rebuild ADQ VSI */ + err = ice_vsi_rebuild(vsi, true); + if (err) { + dev_err(dev, "VSI (type:%s) at index %d rebuild failed, err %d\n", + ice_vsi_type_str(type), vsi->idx, err); + goto cleanup; + } + + /* Re-map HW VSI number, using VSI handle that has been + * previously validated in ice_replay_vsi() call above + */ + vsi->vsi_num = ice_get_hw_vsi_num(&pf->hw, vsi->idx); + + /* replay filters for the VSI */ + err = ice_replay_vsi(&pf->hw, vsi->idx); + if (err) { + dev_err(dev, "VSI (type:%s) replay failed, err %d, VSI index %d\n", + ice_vsi_type_str(type), err, vsi->idx); + rem_adv_fltr = false; + goto cleanup; + } + dev_info(dev, "VSI (type:%s) at index %d rebuilt successfully\n", + ice_vsi_type_str(type), vsi->idx); + + /* store ADQ VSI at correct TC index in main VSI's + * map of TC to VSI + */ + main_vsi->tc_map_vsi[tc_idx++] = vsi; + } + + /* ADQ VSI(s) has been rebuilt successfully, so setup + * channel for main VSI's Tx and Rx rings + */ + list_for_each_entry(ch, &main_vsi->ch_list, list) { + struct ice_vsi *ch_vsi; + + ch_vsi = ch->ch_vsi; + if (!ch_vsi) + continue; + + /* reconfig channel resources */ + ice_cfg_chnl_all_res(main_vsi, ch); + + /* replay BW rate limit if it is non-zero */ + if (!ch->max_tx_rate && !ch->min_tx_rate) + continue; + + err = ice_set_bw_limit(ch_vsi, ch->max_tx_rate, + ch->min_tx_rate); + if (err) + dev_err(dev, "failed (err:%d) to rebuild BW rate limit, max_tx_rate: %llu Kbps, min_tx_rate: %llu Kbps for VSI(%u)\n", + err, ch->max_tx_rate, ch->min_tx_rate, + ch_vsi->vsi_num); + else + dev_dbg(dev, "successfully rebuild BW rate limit, max_tx_rate: %llu Kbps, min_tx_rate: %llu Kbps for VSI(%u)\n", + ch->max_tx_rate, ch->min_tx_rate, + ch_vsi->vsi_num); + } + + /* reconfig RSS for main VSI */ + if (main_vsi->ch_rss_size) + ice_vsi_cfg_rss_lut_key(main_vsi); + + return 0; + +cleanup: + ice_remove_q_channels(main_vsi, rem_adv_fltr); + return err; +} + +/** + * ice_create_q_channels - Add queue channel for the given TCs + * @vsi: VSI to be configured + * + * Configures queue channel mapping to the given TCs + */ +static int ice_create_q_channels(struct ice_vsi *vsi) +{ + struct ice_pf *pf = vsi->back; + struct ice_channel *ch; + int ret = 0, i; + + ice_for_each_chnl_tc(i) { + if (!(vsi->all_enatc & BIT(i))) + continue; + + ch = kzalloc(sizeof(*ch), GFP_KERNEL); + if (!ch) { + ret = -ENOMEM; + goto err_free; + } + INIT_LIST_HEAD(&ch->list); + ch->num_rxq = vsi->mqprio_qopt.qopt.count[i]; + ch->num_txq = vsi->mqprio_qopt.qopt.count[i]; + ch->base_q = vsi->mqprio_qopt.qopt.offset[i]; + ch->max_tx_rate = vsi->mqprio_qopt.max_rate[i]; + ch->min_tx_rate = vsi->mqprio_qopt.min_rate[i]; + + /* convert to Kbits/s */ + if (ch->max_tx_rate) + ch->max_tx_rate = div_u64(ch->max_tx_rate, + ICE_BW_KBPS_DIVISOR); + if (ch->min_tx_rate) + ch->min_tx_rate = div_u64(ch->min_tx_rate, + ICE_BW_KBPS_DIVISOR); + + ret = ice_create_q_channel(vsi, ch); + if (ret) { + dev_err(ice_pf_to_dev(pf), + "failed creating channel TC:%d\n", i); + kfree(ch); + goto err_free; + } + list_add_tail(&ch->list, &vsi->ch_list); + vsi->tc_map_vsi[i] = ch->ch_vsi; + dev_dbg(ice_pf_to_dev(pf), + "successfully created channel: VSI %pK\n", ch->ch_vsi); + } + return 0; + +err_free: + ice_remove_q_channels(vsi, false); + + return ret; +} + +/** + * ice_setup_tc_mqprio_qdisc - configure multiple traffic classes + * @netdev: net device to configure + * @type_data: TC offload data + */ +static int ice_setup_tc_mqprio_qdisc(struct net_device *netdev, void *type_data) +{ + struct tc_mqprio_qopt_offload *mqprio_qopt = type_data; + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_vsi *vsi = np->vsi; + struct ice_pf *pf = vsi->back; + u16 mode, ena_tc_qdisc = 0; + int cur_txq, cur_rxq; + u8 hw = 0, num_tcf; + struct device *dev; + int ret, i; + + dev = ice_pf_to_dev(pf); + num_tcf = mqprio_qopt->qopt.num_tc; + hw = mqprio_qopt->qopt.hw; + mode = mqprio_qopt->mode; + if (!hw) { + clear_bit(ICE_FLAG_TC_MQPRIO, pf->flags); + vsi->ch_rss_size = 0; + memcpy(&vsi->mqprio_qopt, mqprio_qopt, sizeof(*mqprio_qopt)); + goto config_tcf; + } + + /* Generate queue region map for number of TCF requested */ + for (i = 0; i < num_tcf; i++) + ena_tc_qdisc |= BIT(i); + + switch (mode) { + case TC_MQPRIO_MODE_CHANNEL: + + ret = ice_validate_mqprio_qopt(vsi, mqprio_qopt); + if (ret) { + netdev_err(netdev, "failed to validate_mqprio_qopt(), ret %d\n", + ret); + return ret; + } + memcpy(&vsi->mqprio_qopt, mqprio_qopt, sizeof(*mqprio_qopt)); + set_bit(ICE_FLAG_TC_MQPRIO, pf->flags); + break; + default: + return -EINVAL; + } + +config_tcf: + + /* Requesting same TCF configuration as already enabled */ + if (ena_tc_qdisc == vsi->tc_cfg.ena_tc && + mode != TC_MQPRIO_MODE_CHANNEL) + return 0; + + /* Pause VSI queues */ + ice_dis_vsi(vsi, true); + + if (!hw && !test_bit(ICE_FLAG_TC_MQPRIO, pf->flags)) + ice_remove_q_channels(vsi, true); + + if (!hw && !test_bit(ICE_FLAG_TC_MQPRIO, pf->flags)) { + vsi->req_txq = min_t(int, ice_get_avail_txq_count(pf), + num_online_cpus()); + vsi->req_rxq = min_t(int, ice_get_avail_rxq_count(pf), + num_online_cpus()); + } else { + /* logic to rebuild VSI, same like ethtool -L */ + u16 offset = 0, qcount_tx = 0, qcount_rx = 0; + + for (i = 0; i < num_tcf; i++) { + if (!(ena_tc_qdisc & BIT(i))) + continue; + + offset = vsi->mqprio_qopt.qopt.offset[i]; + qcount_rx = vsi->mqprio_qopt.qopt.count[i]; + qcount_tx = vsi->mqprio_qopt.qopt.count[i]; + } + vsi->req_txq = offset + qcount_tx; + vsi->req_rxq = offset + qcount_rx; + + /* store away original rss_size info, so that it gets reused + * form ice_vsi_rebuild during tc-qdisc delete stage - to + * determine, what should be the rss_sizefor main VSI + */ + vsi->orig_rss_size = vsi->rss_size; + } + + /* save current values of Tx and Rx queues before calling VSI rebuild + * for fallback option + */ + cur_txq = vsi->num_txq; + cur_rxq = vsi->num_rxq; + + /* proceed with rebuild main VSI using correct number of queues */ + ret = ice_vsi_rebuild(vsi, false); + if (ret) { + /* fallback to current number of queues */ + dev_info(dev, "Rebuild failed with new queues, try with current number of queues\n"); + vsi->req_txq = cur_txq; + vsi->req_rxq = cur_rxq; + clear_bit(ICE_RESET_FAILED, pf->state); + if (ice_vsi_rebuild(vsi, false)) { + dev_err(dev, "Rebuild of main VSI failed again\n"); + return ret; + } + } + + vsi->all_numtc = num_tcf; + vsi->all_enatc = ena_tc_qdisc; + ret = ice_vsi_cfg_tc(vsi, ena_tc_qdisc); + if (ret) { + netdev_err(netdev, "failed configuring TC for VSI id=%d\n", + vsi->vsi_num); + goto exit; + } + + if (test_bit(ICE_FLAG_TC_MQPRIO, pf->flags)) { + u64 max_tx_rate = vsi->mqprio_qopt.max_rate[0]; + u64 min_tx_rate = vsi->mqprio_qopt.min_rate[0]; + + /* set TC0 rate limit if specified */ + if (max_tx_rate || min_tx_rate) { + /* convert to Kbits/s */ + if (max_tx_rate) + max_tx_rate = div_u64(max_tx_rate, ICE_BW_KBPS_DIVISOR); + if (min_tx_rate) + min_tx_rate = div_u64(min_tx_rate, ICE_BW_KBPS_DIVISOR); + + ret = ice_set_bw_limit(vsi, max_tx_rate, min_tx_rate); + if (!ret) { + dev_dbg(dev, "set Tx rate max %llu min %llu for VSI(%u)\n", + max_tx_rate, min_tx_rate, vsi->vsi_num); + } else { + dev_err(dev, "failed to set Tx rate max %llu min %llu for VSI(%u)\n", + max_tx_rate, min_tx_rate, vsi->vsi_num); + goto exit; + } + } + ret = ice_create_q_channels(vsi); + if (ret) { + netdev_err(netdev, "failed configuring queue channels\n"); + goto exit; + } else { + netdev_dbg(netdev, "successfully configured channels\n"); + } + } + + if (vsi->ch_rss_size) + ice_vsi_cfg_rss_lut_key(vsi); + +exit: + /* if error, reset the all_numtc and all_enatc */ + if (ret) { + vsi->all_numtc = 0; + vsi->all_enatc = 0; + } + /* resume VSI */ + ice_ena_vsi(vsi, true); + + return ret; +} + static LIST_HEAD(ice_block_cb_list); static int @@ -7190,6 +8053,8 @@ ice_setup_tc(struct net_device *netdev, enum tc_setup_type type, void *type_data) { struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_pf *pf = np->vsi->back; + int err; switch (type) { case TC_SETUP_BLOCK: @@ -7197,6 +8062,12 @@ ice_setup_tc(struct net_device *netdev, enum tc_setup_type type, &ice_block_cb_list, ice_setup_tc_block_cb, np, np, true); + case TC_SETUP_QDISC_MQPRIO: + /* setup traffic classifier for receive side */ + mutex_lock(&pf->tc_mutex); + err = ice_setup_tc_mqprio_qdisc(netdev, type_data); + mutex_unlock(&pf->tc_mutex); + return err; default: return -EOPNOTSUPP; } From 9fea749856d14c4713a2f5dee6f692aeaa2700b9 Mon Sep 17 00:00:00 2001 From: Kiran Patil Date: Fri, 15 Oct 2021 16:35:17 -0700 Subject: [PATCH 392/440] ice: Add tc-flower filter support for channel Add support to add/delete channel specific filter using tc-flower. For now, only supported action is "skip_sw hw_tc " Filter criteria is specific to channel and it can be combination of L3, L3+L4, L2+L4. Example: MATCH criteria Action --------------------------- src and/or dest IPv4[6]/mask -> Forward to "hw_tc " dest IPv4[6]/mask + dest L4 port -> Forward to "hw_tc " dest MAC + dest L4 port -> Forward to "hw_tc " src IPv4[6]/mask + src L4 port -> Forward to "hw_tc " src MAC + src L4 port -> Forward to "hw_tc " Adding tc-flower filter for channel using "hw_tc" ------------------------------------------------- tc qdisc add dev clsact Above two steps are only needed the first time when adding tc-flower filter. tc filter add dev protocol ip ingress prio 1 flower \ dst_ip 192.168.0.1/32 ip_proto tcp dst_port 5001 \ skip_sw hw_tc 1 tc filter show dev ingress filter protocol ip pref 1 flower chain 0 filter protocol ip pref 1 flower chain 0 handle 0x1 hw_tc 1 eth_type ipv4 ip_proto tcp dst_ip 192.168.0.1 dst_port 5001 skip_sw in_hw in_hw_count 1 Delete specific filter: ------------------------- tc filter del dev ingress pref 1 handle 0x1 flower Delete All filters: ------------------ tc filter del dev ingress Co-developed-by: Amritha Nambiar Signed-off-by: Amritha Nambiar Signed-off-by: Kiran Patil Signed-off-by: Sudheer Mogilappagari Tested-by: Bharathi Sreenivas Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice.h | 9 +- drivers/net/ethernet/intel/ice/ice_main.c | 77 ++++++- drivers/net/ethernet/intel/ice/ice_switch.c | 119 +++++++++++ drivers/net/ethernet/intel/ice/ice_switch.h | 2 + drivers/net/ethernet/intel/ice/ice_tc_lib.c | 217 +++++++++++++++++++- drivers/net/ethernet/intel/ice/ice_tc_lib.h | 22 ++ 6 files changed, 438 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 4f94ecff87e8..967a90efcb11 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -39,6 +39,9 @@ #include #include #include +#include +#include +#include #include #include #include @@ -402,6 +405,7 @@ struct ice_vsi { u16 num_chnl_rxq; u16 num_chnl_txq; u16 ch_rss_size; + u16 num_chnl_fltr; /* store away rss size info before configuring ADQ channels so that, * it can be used after tc-qdisc delete, to get back RSS setting as * they were before @@ -581,7 +585,10 @@ struct ice_pf { struct auxiliary_device *adev; int aux_idx; u32 sw_int_count; - + /* count of tc_flower filters specific to channel (aka where filter + * action is "hw_tc ") + */ + u16 num_dmac_chnl_fltrs; struct hlist_head tc_flower_fltr_list; __le64 nvm_phy_type_lo; /* NVM PHY type low */ diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index ff2d000bbbab..cb82abd08a40 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -5255,6 +5255,12 @@ static int ice_set_mac_address(struct net_device *netdev, void *pi) return -EBUSY; } + if (ice_chnl_dmac_fltr_cnt(pf)) { + netdev_err(netdev, "can't set mac %pM. Device has tc-flower filters, delete all of them and try again\n", + mac); + return -EAGAIN; + } + netif_addr_lock_bh(netdev); ether_addr_copy(old_mac, netdev->dev_addr); /* change the netdev's MAC address */ @@ -5511,6 +5517,13 @@ ice_set_features(struct net_device *netdev, netdev_features_t features) dev_err(ice_pf_to_dev(pf), "ADQ is active, can't turn hw_tc_offload off\n"); return -EACCES; } + + if ((features & NETIF_F_HW_TC) && + !(netdev->features & NETIF_F_HW_TC)) + set_bit(ICE_FLAG_CLS_FLOWER, pf->flags); + else + clear_bit(ICE_FLAG_CLS_FLOWER, pf->flags); + return ret; } @@ -7649,6 +7662,57 @@ static int ice_create_q_channel(struct ice_vsi *vsi, struct ice_channel *ch) return 0; } +/** + * ice_rem_all_chnl_fltrs - removes all channel filters + * @pf: ptr to PF, TC-flower based filter are tracked at PF level + * + * Remove all advanced switch filters only if they are channel specific + * tc-flower based filter + */ +static void ice_rem_all_chnl_fltrs(struct ice_pf *pf) +{ + struct ice_tc_flower_fltr *fltr; + struct hlist_node *node; + + /* to remove all channel filters, iterate an ordered list of filters */ + hlist_for_each_entry_safe(fltr, node, + &pf->tc_flower_fltr_list, + tc_flower_node) { + struct ice_rule_query_data rule; + int status; + + /* for now process only channel specific filters */ + if (!ice_is_chnl_fltr(fltr)) + continue; + + rule.rid = fltr->rid; + rule.rule_id = fltr->rule_id; + rule.vsi_handle = fltr->dest_id; + status = ice_rem_adv_rule_by_id(&pf->hw, &rule); + if (status) { + if (status == -ENOENT) + dev_dbg(ice_pf_to_dev(pf), "TC flower filter (rule_id %u) does not exist\n", + rule.rule_id); + else + dev_err(ice_pf_to_dev(pf), "failed to delete TC flower filter, status %d\n", + status); + } else if (fltr->dest_vsi) { + /* update advanced switch filter count */ + if (fltr->dest_vsi->type == ICE_VSI_CHNL) { + u32 flags = fltr->flags; + + fltr->dest_vsi->num_chnl_fltr--; + if (flags & (ICE_TC_FLWR_FIELD_DST_MAC | + ICE_TC_FLWR_FIELD_ENC_DST_MAC)) + pf->num_dmac_chnl_fltrs--; + } + } + + hlist_del(&fltr->tc_flower_node); + kfree(fltr); + } +} + /** * ice_remove_q_channels - Remove queue channels for the TCs * @vsi: VSI to be configured @@ -7656,11 +7720,16 @@ static int ice_create_q_channel(struct ice_vsi *vsi, struct ice_channel *ch) * * Remove queue channels for the TCs */ -static void ice_remove_q_channels(struct ice_vsi *vsi, bool __maybe_unused rem_fltr) +static void ice_remove_q_channels(struct ice_vsi *vsi, bool rem_fltr) { struct ice_channel *ch, *ch_tmp; + struct ice_pf *pf = vsi->back; int i; + /* remove all tc-flower based filter if they are channel filters only */ + if (rem_fltr) + ice_rem_all_chnl_fltrs(pf); + /* perform cleanup for channels if they exist */ list_for_each_entry_safe(ch, ch_tmp, &vsi->ch_list, list) { struct ice_vsi *ch_vsi; @@ -7926,6 +7995,12 @@ static int ice_setup_tc_mqprio_qdisc(struct net_device *netdev, void *type_data) } memcpy(&vsi->mqprio_qopt, mqprio_qopt, sizeof(*mqprio_qopt)); set_bit(ICE_FLAG_TC_MQPRIO, pf->flags); + /* don't assume state of hw_tc_offload during driver load + * and set the flag for TC flower filter if hw_tc_offload + * already ON + */ + if (vsi->netdev->features & NETIF_F_HW_TC) + set_bit(ICE_FLAG_CLS_FLOWER, pf->flags); break; default: return -EINVAL; diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c index a4a299012f9f..2742e1c1e337 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.c +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@ -2272,6 +2272,125 @@ exit: return status; } +/** + * ice_mac_fltr_exist - does this MAC filter exist for given VSI + * @hw: pointer to the hardware structure + * @mac: MAC address to be checked (for MAC filter) + * @vsi_handle: check MAC filter for this VSI + */ +bool ice_mac_fltr_exist(struct ice_hw *hw, u8 *mac, u16 vsi_handle) +{ + struct ice_fltr_mgmt_list_entry *entry; + struct list_head *rule_head; + struct ice_switch_info *sw; + struct mutex *rule_lock; /* Lock to protect filter rule list */ + u16 hw_vsi_id; + + if (!ice_is_vsi_valid(hw, vsi_handle)) + return false; + + hw_vsi_id = ice_get_hw_vsi_num(hw, vsi_handle); + sw = hw->switch_info; + rule_head = &sw->recp_list[ICE_SW_LKUP_MAC].filt_rules; + if (!rule_head) + return false; + + rule_lock = &sw->recp_list[ICE_SW_LKUP_MAC].filt_rule_lock; + mutex_lock(rule_lock); + list_for_each_entry(entry, rule_head, list_entry) { + struct ice_fltr_info *f_info = &entry->fltr_info; + u8 *mac_addr = &f_info->l_data.mac.mac_addr[0]; + + if (is_zero_ether_addr(mac_addr)) + continue; + + if (f_info->flag != ICE_FLTR_TX || + f_info->src_id != ICE_SRC_ID_VSI || + f_info->lkup_type != ICE_SW_LKUP_MAC || + f_info->fltr_act != ICE_FWD_TO_VSI || + hw_vsi_id != f_info->fwd_id.hw_vsi_id) + continue; + + if (ether_addr_equal(mac, mac_addr)) { + mutex_unlock(rule_lock); + return true; + } + } + mutex_unlock(rule_lock); + return false; +} + +/** + * ice_vlan_fltr_exist - does this VLAN filter exist for given VSI + * @hw: pointer to the hardware structure + * @vlan_id: VLAN ID + * @vsi_handle: check MAC filter for this VSI + */ +bool ice_vlan_fltr_exist(struct ice_hw *hw, u16 vlan_id, u16 vsi_handle) +{ + struct ice_fltr_mgmt_list_entry *entry; + struct list_head *rule_head; + struct ice_switch_info *sw; + struct mutex *rule_lock; /* Lock to protect filter rule list */ + u16 hw_vsi_id; + + if (vlan_id > ICE_MAX_VLAN_ID) + return false; + + if (!ice_is_vsi_valid(hw, vsi_handle)) + return false; + + hw_vsi_id = ice_get_hw_vsi_num(hw, vsi_handle); + sw = hw->switch_info; + rule_head = &sw->recp_list[ICE_SW_LKUP_VLAN].filt_rules; + if (!rule_head) + return false; + + rule_lock = &sw->recp_list[ICE_SW_LKUP_VLAN].filt_rule_lock; + mutex_lock(rule_lock); + list_for_each_entry(entry, rule_head, list_entry) { + struct ice_fltr_info *f_info = &entry->fltr_info; + u16 entry_vlan_id = f_info->l_data.vlan.vlan_id; + struct ice_vsi_list_map_info *map_info; + + if (entry_vlan_id > ICE_MAX_VLAN_ID) + continue; + + if (f_info->flag != ICE_FLTR_TX || + f_info->src_id != ICE_SRC_ID_VSI || + f_info->lkup_type != ICE_SW_LKUP_VLAN) + continue; + + /* Only allowed filter action are FWD_TO_VSI/_VSI_LIST */ + if (f_info->fltr_act != ICE_FWD_TO_VSI && + f_info->fltr_act != ICE_FWD_TO_VSI_LIST) + continue; + + if (f_info->fltr_act == ICE_FWD_TO_VSI) { + if (hw_vsi_id != f_info->fwd_id.hw_vsi_id) + continue; + } else if (f_info->fltr_act == ICE_FWD_TO_VSI_LIST) { + /* If filter_action is FWD_TO_VSI_LIST, make sure + * that VSI being checked is part of VSI list + */ + if (entry->vsi_count == 1 && + entry->vsi_list_info) { + map_info = entry->vsi_list_info; + if (!test_bit(vsi_handle, map_info->vsi_map)) + continue; + } + } + + if (vlan_id == entry_vlan_id) { + mutex_unlock(rule_lock); + return true; + } + } + mutex_unlock(rule_lock); + + return false; +} + /** * ice_add_mac - Add a MAC address based filter rule * @hw: pointer to the hardware structure diff --git a/drivers/net/ethernet/intel/ice/ice_switch.h b/drivers/net/ethernet/intel/ice/ice_switch.h index d4c0a3b594af..c4dd2062c469 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.h +++ b/drivers/net/ethernet/intel/ice/ice_switch.h @@ -335,6 +335,8 @@ enum ice_status ice_remove_eth_mac(struct ice_hw *hw, struct list_head *em_list); int ice_cfg_rdma_fltr(struct ice_hw *hw, u16 vsi_handle, bool enable); +bool ice_mac_fltr_exist(struct ice_hw *hw, u8 *mac, u16 vsi_handle); +bool ice_vlan_fltr_exist(struct ice_hw *hw, u16 vlan_id, u16 vsi_handle); void ice_remove_vsi_fltr(struct ice_hw *hw, u16 vsi_handle); enum ice_status ice_add_vlan(struct ice_hw *hw, struct list_head *m_list); diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.c b/drivers/net/ethernet/intel/ice/ice_tc_lib.c index 1dccfd116bc9..725caa160b13 100644 --- a/drivers/net/ethernet/intel/ice/ice_tc_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.c @@ -303,6 +303,136 @@ exit: return ret; } +/** + * ice_add_tc_flower_adv_fltr - add appropriate filter rules + * @vsi: Pointer to VSI + * @tc_fltr: Pointer to TC flower filter structure + * + * based on filter parameters using Advance recipes supported + * by OS package. + */ +static int +ice_add_tc_flower_adv_fltr(struct ice_vsi *vsi, + struct ice_tc_flower_fltr *tc_fltr) +{ + struct ice_tc_flower_lyr_2_4_hdrs *headers = &tc_fltr->outer_headers; + struct ice_adv_rule_info rule_info = {0}; + struct ice_rule_query_data rule_added; + struct ice_adv_lkup_elem *list; + struct ice_pf *pf = vsi->back; + struct ice_hw *hw = &pf->hw; + u32 flags = tc_fltr->flags; + struct ice_vsi *ch_vsi; + struct device *dev; + u16 lkups_cnt = 0; + u16 l4_proto = 0; + int ret = 0; + u16 i = 0; + + dev = ice_pf_to_dev(pf); + if (ice_is_safe_mode(pf)) { + NL_SET_ERR_MSG_MOD(tc_fltr->extack, "Unable to add filter because driver is in safe mode"); + return -EOPNOTSUPP; + } + + if (!flags || (flags & (ICE_TC_FLWR_FIELD_ENC_DEST_IPV4 | + ICE_TC_FLWR_FIELD_ENC_SRC_IPV4 | + ICE_TC_FLWR_FIELD_ENC_DEST_IPV6 | + ICE_TC_FLWR_FIELD_ENC_SRC_IPV6 | + ICE_TC_FLWR_FIELD_ENC_SRC_L4_PORT))) { + NL_SET_ERR_MSG_MOD(tc_fltr->extack, "Unsupported encap field(s)"); + return -EOPNOTSUPP; + } + + /* get the channel (aka ADQ VSI) */ + if (tc_fltr->dest_vsi) + ch_vsi = tc_fltr->dest_vsi; + else + ch_vsi = vsi->tc_map_vsi[tc_fltr->action.tc_class]; + + lkups_cnt = ice_tc_count_lkups(flags, headers, tc_fltr); + list = kcalloc(lkups_cnt, sizeof(*list), GFP_ATOMIC); + if (!list) + return -ENOMEM; + + i = ice_tc_fill_rules(hw, flags, tc_fltr, list, &rule_info, &l4_proto); + if (i != lkups_cnt) { + ret = -EINVAL; + goto exit; + } + + rule_info.sw_act.fltr_act = tc_fltr->action.fltr_act; + if (tc_fltr->action.tc_class >= ICE_CHNL_START_TC) { + if (!ch_vsi) { + NL_SET_ERR_MSG_MOD(tc_fltr->extack, "Unable to add filter because specified destination doesn't exist"); + ret = -EINVAL; + goto exit; + } + + rule_info.sw_act.fltr_act = ICE_FWD_TO_VSI; + rule_info.sw_act.vsi_handle = ch_vsi->idx; + rule_info.priority = 7; + rule_info.sw_act.src = hw->pf_id; + rule_info.rx = true; + dev_dbg(dev, "add switch rule for TC:%u vsi_idx:%u, lkups_cnt:%u\n", + tc_fltr->action.tc_class, + rule_info.sw_act.vsi_handle, lkups_cnt); + } else { + rule_info.sw_act.flag |= ICE_FLTR_TX; + rule_info.sw_act.src = vsi->idx; + rule_info.rx = false; + } + + /* specify the cookie as filter_rule_id */ + rule_info.fltr_rule_id = tc_fltr->cookie; + + ret = ice_add_adv_rule(hw, list, lkups_cnt, &rule_info, &rule_added); + if (ret == -EEXIST) { + NL_SET_ERR_MSG_MOD(tc_fltr->extack, + "Unable to add filter because it already exist"); + ret = -EINVAL; + goto exit; + } else if (ret) { + NL_SET_ERR_MSG_MOD(tc_fltr->extack, + "Unable to add filter due to error"); + ret = -EIO; + goto exit; + } + + /* store the output params, which are needed later for removing + * advanced switch filter + */ + tc_fltr->rid = rule_added.rid; + tc_fltr->rule_id = rule_added.rule_id; + if (tc_fltr->action.tc_class > 0 && ch_vsi) { + /* For PF ADQ, VSI type is set as ICE_VSI_CHNL, and + * for PF ADQ filter, it is not yet set in tc_fltr, + * hence store the dest_vsi ptr in tc_fltr + */ + if (ch_vsi->type == ICE_VSI_CHNL) + tc_fltr->dest_vsi = ch_vsi; + /* keep track of advanced switch filter for + * destination VSI (channel VSI) + */ + ch_vsi->num_chnl_fltr++; + /* in this case, dest_id is VSI handle (sw handle) */ + tc_fltr->dest_id = rule_added.vsi_handle; + + /* keeps track of channel filters for PF VSI */ + if (vsi->type == ICE_VSI_PF && + (flags & (ICE_TC_FLWR_FIELD_DST_MAC | + ICE_TC_FLWR_FIELD_ENC_DST_MAC))) + pf->num_dmac_chnl_fltrs++; + } + dev_dbg(dev, "added switch rule (lkups_cnt %u, flags 0x%x) for TC %u, rid %u, rule_id %u, vsi_idx %u\n", + lkups_cnt, flags, + tc_fltr->action.tc_class, rule_added.rid, + rule_added.rule_id, rule_added.vsi_handle); +exit: + kfree(list); + return ret; +} + /** * ice_tc_set_ipv4 - Parse IPv4 addresses from TC flower filter * @match: Pointer to flow match structure @@ -561,10 +691,13 @@ ice_parse_cls_flower(struct net_device *filter_dev, struct ice_vsi *vsi, static int ice_add_switch_fltr(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr) { + if (fltr->action.fltr_act == ICE_FWD_TO_QGRP) + return -EOPNOTSUPP; + if (ice_is_eswitch_mode_switchdev(vsi->back)) return ice_eswitch_add_tc_fltr(vsi, fltr); - return -EOPNOTSUPP; + return ice_add_tc_flower_adv_fltr(vsi, fltr); } /** @@ -581,6 +714,7 @@ ice_handle_tclass_action(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr) { int tc = tc_classid_to_hwtc(vsi->netdev, cls_flower->classid); + struct ice_vsi *main_vsi; if (tc < 0) { NL_SET_ERR_MSG_MOD(fltr->extack, "Unable to add filter because specified destination is invalid"); @@ -591,13 +725,69 @@ ice_handle_tclass_action(struct ice_vsi *vsi, return -EINVAL; } - if (!(vsi->tc_cfg.ena_tc & BIT(tc))) { + if (!(vsi->all_enatc & BIT(tc))) { NL_SET_ERR_MSG_MOD(fltr->extack, "Unable to add filter because of non-existence destination"); return -EINVAL; } /* Redirect to a TC class or Queue Group */ - fltr->action.fltr_act = ICE_FWD_TO_QGRP; + main_vsi = ice_get_main_vsi(vsi->back); + if (!main_vsi || !main_vsi->netdev) { + NL_SET_ERR_MSG_MOD(fltr->extack, + "Unable to add filter because of invalid netdevice"); + return -EINVAL; + } + + if ((fltr->flags & ICE_TC_FLWR_FIELD_TENANT_ID) && + (fltr->flags & (ICE_TC_FLWR_FIELD_DST_MAC | + ICE_TC_FLWR_FIELD_SRC_MAC))) { + NL_SET_ERR_MSG_MOD(fltr->extack, + "Unable to add filter because filter using tunnel key and inner MAC is unsupported combination"); + return -EOPNOTSUPP; + } + + /* For ADQ, filter must include dest MAC address, otherwise unwanted + * packets with unrelated MAC address get delivered to ADQ VSIs as long + * as remaining filter criteria is satisfied such as dest IP address + * and dest/src L4 port. Following code is trying to handle: + * 1. For non-tunnel, if user specify MAC addresses, use them (means + * this code won't do anything + * 2. For non-tunnel, if user didn't specify MAC address, add implicit + * dest MAC to be lower netdev's active unicast MAC address + */ + if (!(fltr->flags & ICE_TC_FLWR_FIELD_DST_MAC)) { + ether_addr_copy(fltr->outer_headers.l2_key.dst_mac, + main_vsi->netdev->dev_addr); + eth_broadcast_addr(fltr->outer_headers.l2_mask.dst_mac); + fltr->flags |= ICE_TC_FLWR_FIELD_DST_MAC; + } + + /* validate specified dest MAC address, make sure either it belongs to + * lower netdev or any of MACVLAN. MACVLANs MAC address are added as + * unicast MAC filter destined to main VSI. + */ + if (!ice_mac_fltr_exist(&main_vsi->back->hw, + fltr->outer_headers.l2_key.dst_mac, + main_vsi->idx)) { + NL_SET_ERR_MSG_MOD(fltr->extack, + "Unable to add filter because legacy MAC filter for specified destination doesn't exist"); + return -EINVAL; + } + + /* Make sure VLAN is already added to main VSI, before allowing ADQ to + * add a VLAN based filter such as MAC + VLAN + L4 port. + */ + if (fltr->flags & ICE_TC_FLWR_FIELD_VLAN) { + u16 vlan_id = be16_to_cpu(fltr->outer_headers.vlan_hdr.vlan_id); + + if (!ice_vlan_fltr_exist(&main_vsi->back->hw, vlan_id, + main_vsi->idx)) { + NL_SET_ERR_MSG_MOD(fltr->extack, + "Unable to add filter because legacy VLAN filter for specified destination doesn't exist"); + return -EINVAL; + } + } + fltr->action.fltr_act = ICE_FWD_TO_VSI; fltr->action.tc_class = tc; return 0; @@ -639,8 +829,8 @@ ice_parse_tc_flower_actions(struct ice_vsi *vsi, /* Drop action */ if (act->id == FLOW_ACTION_DROP) { - fltr->action.fltr_act = ICE_DROP_PACKET; - return 0; + NL_SET_ERR_MSG_MOD(fltr->extack, "Unsupported action DROP"); + return -EINVAL; } fltr->action.fltr_act = ICE_FWD_TO_VSI; } @@ -673,6 +863,20 @@ static int ice_del_tc_fltr(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr) return -EIO; } + /* update advanced switch filter count for destination + * VSI if filter destination was VSI + */ + if (fltr->dest_vsi) { + if (fltr->dest_vsi->type == ICE_VSI_CHNL) { + fltr->dest_vsi->num_chnl_fltr--; + + /* keeps track of channel filters for PF VSI */ + if (vsi->type == ICE_VSI_PF && + (fltr->flags & (ICE_TC_FLWR_FIELD_DST_MAC | + ICE_TC_FLWR_FIELD_ENC_DST_MAC))) + pf->num_dmac_chnl_fltrs--; + } + } return 0; } @@ -811,7 +1015,8 @@ ice_del_cls_flower(struct ice_vsi *vsi, struct flow_cls_offload *cls_flower) /* find filter */ fltr = ice_find_tc_flower_fltr(pf, cls_flower->cookie); if (!fltr) { - if (hlist_empty(&pf->tc_flower_fltr_list)) + if (!test_bit(ICE_FLAG_TC_MQPRIO, pf->flags) && + hlist_empty(&pf->tc_flower_fltr_list)) return 0; NL_SET_ERR_MSG_MOD(cls_flower->common.extack, "failed to delete TC flower filter because unable to find it"); diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.h b/drivers/net/ethernet/intel/ice/ice_tc_lib.h index d90e9e37ae25..ee9b284fcc02 100644 --- a/drivers/net/ethernet/intel/ice/ice_tc_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.h @@ -120,6 +120,28 @@ struct ice_tc_flower_fltr { struct netlink_ext_ack *extack; }; +/** + * ice_is_chnl_fltr - is this a valid channel filter + * @f: Pointer to tc-flower filter + * + * Criteria to determine of given filter is valid channel filter + * or not is based on its "destination". If destination is hw_tc (aka tc_class) + * and it is non-zero, then it is valid channel (aka ADQ) filter + */ +static inline bool ice_is_chnl_fltr(struct ice_tc_flower_fltr *f) +{ + return !!f->action.tc_class; +} + +/** + * ice_chnl_dmac_fltr_cnt - DMAC based CHNL filter count + * @pf: Pointer to PF + */ +static inline int ice_chnl_dmac_fltr_cnt(struct ice_pf *pf) +{ + return pf->num_dmac_chnl_fltrs; +} + int ice_add_cls_flower(struct net_device *netdev, struct ice_vsi *vsi, struct flow_cls_offload *cls_flower); From 62a22bcbd30ed01a0f0331288aa0cca9d9aae23b Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 20 Oct 2021 20:58:48 +0300 Subject: [PATCH 393/440] net: mscc: ocelot: add a type definition for REW_TAG_CFG_TAG_CFG This is a cosmetic patch which clarifies what are the port tagging options for Ocelot switches. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/ethernet/mscc/ocelot.c | 13 +++++-------- include/soc/mscc/ocelot.h | 11 +++++++++++ 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 520a75b57866..b09929970273 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -166,7 +166,7 @@ static void ocelot_port_set_native_vlan(struct ocelot *ocelot, int port, struct ocelot_vlan native_vlan) { struct ocelot_port *ocelot_port = ocelot->ports[port]; - u32 val = 0; + enum ocelot_port_tag_config tag_cfg; ocelot_port->native_vlan = native_vlan; @@ -176,16 +176,13 @@ static void ocelot_port_set_native_vlan(struct ocelot *ocelot, int port, if (ocelot_port->vlan_aware) { if (native_vlan.valid) - /* Tag all frames except when VID == DEFAULT_VLAN */ - val = REW_TAG_CFG_TAG_CFG(1); + tag_cfg = OCELOT_PORT_TAG_NATIVE; else - /* Tag all frames */ - val = REW_TAG_CFG_TAG_CFG(3); + tag_cfg = OCELOT_PORT_TAG_TRUNK; } else { - /* Port tagging disabled. */ - val = REW_TAG_CFG_TAG_CFG(0); + tag_cfg = OCELOT_PORT_TAG_DISABLED; } - ocelot_rmw_gix(ocelot, val, + ocelot_rmw_gix(ocelot, REW_TAG_CFG_TAG_CFG(tag_cfg), REW_TAG_CFG_TAG_CFG_M, REW_TAG_CFG, port); } diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index d7055b41982d..0568b25c8659 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -568,6 +568,17 @@ struct ocelot_vlan { u16 vid; }; +enum ocelot_port_tag_config { + /* all VLANs are egress-untagged */ + OCELOT_PORT_TAG_DISABLED = 0, + /* all VLANs except the native VLAN and VID 0 are egress-tagged */ + OCELOT_PORT_TAG_NATIVE = 1, + /* all VLANs except VID 0 are egress-tagged */ + OCELOT_PORT_TAG_TRUNK_NO_VID0 = 2, + /* all VLANs are egress-tagged */ + OCELOT_PORT_TAG_TRUNK = 3, +}; + enum ocelot_sb { OCELOT_SB_BUF, OCELOT_SB_REF, From 90e0aa8d108d22cb0dd646083fb3dfcc7a43a018 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 20 Oct 2021 20:58:49 +0300 Subject: [PATCH 394/440] net: mscc: ocelot: convert the VLAN masks to a list First and foremost, the driver currently allocates a constant sized 4K * u32 (16KB memory) array for the VLAN masks. However, a typical application might not need so many VLANs, so if we dynamically allocate the memory as needed, we might actually save some space. Secondly, we'll need to keep more advanced bookkeeping of the VLANs we have, notably we'll have to check how many untagged and how many tagged VLANs we have. This will have to stay in a structure, and allocating another 16 KB array for that is again a bit too much. So refactor the bridge VLANs in a linked list of structures. The hook points inside the driver are ocelot_vlan_member_add() and ocelot_vlan_member_del(), which previously used to operate on the ocelot->vlan_mask[vid] array element. ocelot_vlan_member_add() and ocelot_vlan_member_del() used to call ocelot_vlan_member_set() to commit to the ocelot->vlan_mask. Additionally, we had two calls to ocelot_vlan_member_set() from outside those callers, and those were directly from ocelot_vlan_init(). Those calls do not set up bridging service VLANs, instead they: - clear the VLAN table on reset - set the port pvid to the value used by this driver for VLAN-unaware standalone port operation (VID 0) So now, when we have a structure which represents actual bridge VLANs, VID 0 doesn't belong in that structure, since it is not part of the bridging layer. So delete the middle man, ocelot_vlan_member_set(), and let ocelot_vlan_init() call directly ocelot_vlant_set_mask() which forgoes any data structure and writes directly to hardware, which is all that we need. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/ethernet/mscc/ocelot.c | 81 ++++++++++++++++++++++++------ include/soc/mscc/ocelot.h | 9 +++- 2 files changed, 72 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index b09929970273..c8c0b0f0dd59 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -219,31 +219,79 @@ static void ocelot_port_set_pvid(struct ocelot *ocelot, int port, ANA_PORT_DROP_CFG, port); } -static int ocelot_vlan_member_set(struct ocelot *ocelot, u32 vlan_mask, u16 vid) +static struct ocelot_bridge_vlan *ocelot_bridge_vlan_find(struct ocelot *ocelot, + u16 vid) { - int err; + struct ocelot_bridge_vlan *vlan; - err = ocelot_vlant_set_mask(ocelot, vid, vlan_mask); - if (err) - return err; + list_for_each_entry(vlan, &ocelot->vlans, list) + if (vlan->vid == vid) + return vlan; - ocelot->vlan_mask[vid] = vlan_mask; - - return 0; + return NULL; } static int ocelot_vlan_member_add(struct ocelot *ocelot, int port, u16 vid) { - return ocelot_vlan_member_set(ocelot, - ocelot->vlan_mask[vid] | BIT(port), - vid); + struct ocelot_bridge_vlan *vlan = ocelot_bridge_vlan_find(ocelot, vid); + unsigned long portmask; + int err; + + if (vlan) { + portmask = vlan->portmask | BIT(port); + + err = ocelot_vlant_set_mask(ocelot, vid, portmask); + if (err) + return err; + + vlan->portmask = portmask; + + return 0; + } + + vlan = kzalloc(sizeof(*vlan), GFP_KERNEL); + if (!vlan) + return -ENOMEM; + + portmask = BIT(port); + + err = ocelot_vlant_set_mask(ocelot, vid, portmask); + if (err) { + kfree(vlan); + return err; + } + + vlan->vid = vid; + vlan->portmask = portmask; + INIT_LIST_HEAD(&vlan->list); + list_add_tail(&vlan->list, &ocelot->vlans); + + return 0; } static int ocelot_vlan_member_del(struct ocelot *ocelot, int port, u16 vid) { - return ocelot_vlan_member_set(ocelot, - ocelot->vlan_mask[vid] & ~BIT(port), - vid); + struct ocelot_bridge_vlan *vlan = ocelot_bridge_vlan_find(ocelot, vid); + unsigned long portmask; + int err; + + if (!vlan) + return 0; + + portmask = vlan->portmask & ~BIT(port); + + err = ocelot_vlant_set_mask(ocelot, vid, portmask); + if (err) + return err; + + vlan->portmask = portmask; + if (vlan->portmask) + return 0; + + list_del(&vlan->list); + kfree(vlan); + + return 0; } int ocelot_port_vlan_filtering(struct ocelot *ocelot, int port, @@ -369,13 +417,13 @@ static void ocelot_vlan_init(struct ocelot *ocelot) /* Configure the port VLAN memberships */ for (vid = 1; vid < VLAN_N_VID; vid++) - ocelot_vlan_member_set(ocelot, 0, vid); + ocelot_vlant_set_mask(ocelot, vid, 0); /* Because VLAN filtering is enabled, we need VID 0 to get untagged * traffic. It is added automatically if 8021q module is loaded, but * we can't rely on it since module may be not loaded. */ - ocelot_vlan_member_set(ocelot, all_ports, 0); + ocelot_vlant_set_mask(ocelot, 0, all_ports); /* Set vlan ingress filter mask to all ports but the CPU port by * default. @@ -2127,6 +2175,7 @@ int ocelot_init(struct ocelot *ocelot) INIT_LIST_HEAD(&ocelot->multicast); INIT_LIST_HEAD(&ocelot->pgids); + INIT_LIST_HEAD(&ocelot->vlans); ocelot_detect_features(ocelot); ocelot_mact_init(ocelot); ocelot_vlan_init(ocelot); diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index 0568b25c8659..9f2ea7995075 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -568,6 +568,12 @@ struct ocelot_vlan { u16 vid; }; +struct ocelot_bridge_vlan { + u16 vid; + unsigned long portmask; + struct list_head list; +}; + enum ocelot_port_tag_config { /* all VLANs are egress-untagged */ OCELOT_PORT_TAG_DISABLED = 0, @@ -646,8 +652,7 @@ struct ocelot { u8 base_mac[ETH_ALEN]; - /* Keep track of the vlan port masks */ - u32 vlan_mask[VLAN_N_VID]; + struct list_head vlans; /* Switches like VSC9959 have flooding per traffic class */ int num_flooding_pgids; From 0da1a1c4891188f456c7790940e47c8043bc7c9b Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 20 Oct 2021 20:58:50 +0300 Subject: [PATCH 395/440] net: mscc: ocelot: allow a config where all bridge VLANs are egress-untagged At present, the ocelot driver accepts a single egress-untagged bridge VLAN, meaning that this sequence of operations: ip link add br0 type bridge vlan_filtering 1 ip link set swp0 master br0 bridge vlan add dev swp0 vid 2 pvid untagged fails because the bridge automatically installs VID 1 as a pvid & untagged VLAN, and vid 2 would be the second untagged VLAN on this port. It is necessary to delete VID 1 before proceeding to add VID 2. This limitation comes from the fact that we operate the port tag, when it has an egress-untagged VID, in the OCELOT_PORT_TAG_NATIVE mode. The ocelot switches do not have full flexibility and can either have one single VID as egress-untagged, or all of them. There are use cases for having all VLANs as egress-untagged as well, and this patch adds support for that. The change rewrites ocelot_port_set_native_vlan() into a more generic ocelot_port_manage_port_tag() function. Because the software bridge's state, transmitted to us via switchdev, can become very complex, we don't attempt to track all possible state transitions, but instead take a more declarative approach and just make ocelot_port_manage_port_tag() figure out which more to operate in: - port is VLAN-unaware: the classified VLAN (internal, unrelated to the 802.1Q header) is not inserted into packets on egress - port is VLAN-aware: - port has tagged VLANs: -> port has no untagged VLAN: set up as pure trunk -> port has one untagged VLAN: set up as trunk port + native VLAN -> port has more than one untagged VLAN: this is an invalid config which is rejected by ocelot_vlan_prepare - port has no tagged VLANs -> set up as pure egress-untagged port We don't keep the number of tagged and untagged VLANs, we just count the structures we keep. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/ethernet/mscc/ocelot.c | 146 ++++++++++++++++++++++------- include/soc/mscc/ocelot.h | 3 +- 2 files changed, 113 insertions(+), 36 deletions(-) diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index c8c0b0f0dd59..bc033e62be97 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -162,29 +162,100 @@ static int ocelot_vlant_set_mask(struct ocelot *ocelot, u16 vid, u32 mask) return ocelot_vlant_wait_for_completion(ocelot); } -static void ocelot_port_set_native_vlan(struct ocelot *ocelot, int port, - struct ocelot_vlan native_vlan) +static int ocelot_port_num_untagged_vlans(struct ocelot *ocelot, int port) +{ + struct ocelot_bridge_vlan *vlan; + int num_untagged = 0; + + list_for_each_entry(vlan, &ocelot->vlans, list) { + if (!(vlan->portmask & BIT(port))) + continue; + + if (vlan->untagged & BIT(port)) + num_untagged++; + } + + return num_untagged; +} + +static int ocelot_port_num_tagged_vlans(struct ocelot *ocelot, int port) +{ + struct ocelot_bridge_vlan *vlan; + int num_tagged = 0; + + list_for_each_entry(vlan, &ocelot->vlans, list) { + if (!(vlan->portmask & BIT(port))) + continue; + + if (!(vlan->untagged & BIT(port))) + num_tagged++; + } + + return num_tagged; +} + +/* We use native VLAN when we have to mix egress-tagged VLANs with exactly + * _one_ egress-untagged VLAN (_the_ native VLAN) + */ +static bool ocelot_port_uses_native_vlan(struct ocelot *ocelot, int port) +{ + return ocelot_port_num_tagged_vlans(ocelot, port) && + ocelot_port_num_untagged_vlans(ocelot, port) == 1; +} + +static struct ocelot_bridge_vlan * +ocelot_port_find_native_vlan(struct ocelot *ocelot, int port) +{ + struct ocelot_bridge_vlan *vlan; + + list_for_each_entry(vlan, &ocelot->vlans, list) + if (vlan->portmask & BIT(port) && vlan->untagged & BIT(port)) + return vlan; + + return NULL; +} + +/* Keep in sync REW_TAG_CFG_TAG_CFG and, if applicable, + * REW_PORT_VLAN_CFG_PORT_VID, with the bridge VLAN table and VLAN awareness + * state of the port. + */ +static void ocelot_port_manage_port_tag(struct ocelot *ocelot, int port) { struct ocelot_port *ocelot_port = ocelot->ports[port]; enum ocelot_port_tag_config tag_cfg; - - ocelot_port->native_vlan = native_vlan; - - ocelot_rmw_gix(ocelot, REW_PORT_VLAN_CFG_PORT_VID(native_vlan.vid), - REW_PORT_VLAN_CFG_PORT_VID_M, - REW_PORT_VLAN_CFG, port); + bool uses_native_vlan = false; if (ocelot_port->vlan_aware) { - if (native_vlan.valid) + uses_native_vlan = ocelot_port_uses_native_vlan(ocelot, port); + + if (uses_native_vlan) tag_cfg = OCELOT_PORT_TAG_NATIVE; + else if (ocelot_port_num_untagged_vlans(ocelot, port)) + tag_cfg = OCELOT_PORT_TAG_DISABLED; else tag_cfg = OCELOT_PORT_TAG_TRUNK; } else { tag_cfg = OCELOT_PORT_TAG_DISABLED; } + ocelot_rmw_gix(ocelot, REW_TAG_CFG_TAG_CFG(tag_cfg), REW_TAG_CFG_TAG_CFG_M, REW_TAG_CFG, port); + + if (uses_native_vlan) { + struct ocelot_bridge_vlan *native_vlan; + + /* Not having a native VLAN is impossible, because + * ocelot_port_num_untagged_vlans has returned 1. + * So there is no use in checking for NULL here. + */ + native_vlan = ocelot_port_find_native_vlan(ocelot, port); + + ocelot_rmw_gix(ocelot, + REW_PORT_VLAN_CFG_PORT_VID(native_vlan->vid), + REW_PORT_VLAN_CFG_PORT_VID_M, + REW_PORT_VLAN_CFG, port); + } } /* Default vlan to clasify for untagged frames (may be zero) */ @@ -231,7 +302,8 @@ static struct ocelot_bridge_vlan *ocelot_bridge_vlan_find(struct ocelot *ocelot, return NULL; } -static int ocelot_vlan_member_add(struct ocelot *ocelot, int port, u16 vid) +static int ocelot_vlan_member_add(struct ocelot *ocelot, int port, u16 vid, + bool untagged) { struct ocelot_bridge_vlan *vlan = ocelot_bridge_vlan_find(ocelot, vid); unsigned long portmask; @@ -245,6 +317,14 @@ static int ocelot_vlan_member_add(struct ocelot *ocelot, int port, u16 vid) return err; vlan->portmask = portmask; + /* Bridge VLANs can be overwritten with a different + * egress-tagging setting, so make sure to override an untagged + * with a tagged VID if that's going on. + */ + if (untagged) + vlan->untagged |= BIT(port); + else + vlan->untagged &= ~BIT(port); return 0; } @@ -263,6 +343,8 @@ static int ocelot_vlan_member_add(struct ocelot *ocelot, int port, u16 vid) vlan->vid = vid; vlan->portmask = portmask; + if (untagged) + vlan->untagged = BIT(port); INIT_LIST_HEAD(&vlan->list); list_add_tail(&vlan->list, &ocelot->vlans); @@ -324,7 +406,7 @@ int ocelot_port_vlan_filtering(struct ocelot *ocelot, int port, ANA_PORT_VLAN_CFG, port); ocelot_port_set_pvid(ocelot, port, ocelot_port->pvid_vlan); - ocelot_port_set_native_vlan(ocelot, port, ocelot_port->native_vlan); + ocelot_port_manage_port_tag(ocelot, port); return 0; } @@ -333,14 +415,20 @@ EXPORT_SYMBOL(ocelot_port_vlan_filtering); int ocelot_vlan_prepare(struct ocelot *ocelot, int port, u16 vid, bool pvid, bool untagged, struct netlink_ext_ack *extack) { - struct ocelot_port *ocelot_port = ocelot->ports[port]; - - /* Deny changing the native VLAN, but always permit deleting it */ - if (untagged && ocelot_port->native_vlan.vid != vid && - ocelot_port->native_vlan.valid) { - NL_SET_ERR_MSG_MOD(extack, - "Port already has a native VLAN"); - return -EBUSY; + if (untagged) { + /* We are adding an egress-tagged VLAN */ + if (ocelot_port_uses_native_vlan(ocelot, port)) { + NL_SET_ERR_MSG_MOD(extack, + "Port with egress-tagged VLANs cannot have more than one egress-untagged (native) VLAN"); + return -EBUSY; + } + } else { + /* We are adding an egress-tagged VLAN */ + if (ocelot_port_num_untagged_vlans(ocelot, port) > 1) { + NL_SET_ERR_MSG_MOD(extack, + "Port with more than one egress-untagged VLAN cannot have egress-tagged VLANs"); + return -EBUSY; + } } return 0; @@ -352,7 +440,7 @@ int ocelot_vlan_add(struct ocelot *ocelot, int port, u16 vid, bool pvid, { int err; - err = ocelot_vlan_member_add(ocelot, port, vid); + err = ocelot_vlan_member_add(ocelot, port, vid, untagged); if (err) return err; @@ -366,13 +454,7 @@ int ocelot_vlan_add(struct ocelot *ocelot, int port, u16 vid, bool pvid, } /* Untagged egress vlan clasification */ - if (untagged) { - struct ocelot_vlan native_vlan; - - native_vlan.vid = vid; - native_vlan.valid = true; - ocelot_port_set_native_vlan(ocelot, port, native_vlan); - } + ocelot_port_manage_port_tag(ocelot, port); return 0; } @@ -395,11 +477,7 @@ int ocelot_vlan_del(struct ocelot *ocelot, int port, u16 vid) } /* Egress */ - if (ocelot_port->native_vlan.vid == vid) { - struct ocelot_vlan native_vlan = {0}; - - ocelot_port_set_native_vlan(ocelot, port, native_vlan); - } + ocelot_port_manage_port_tag(ocelot, port); return 0; } @@ -1725,12 +1803,12 @@ void ocelot_port_bridge_leave(struct ocelot *ocelot, int port, struct net_device *bridge) { struct ocelot_port *ocelot_port = ocelot->ports[port]; - struct ocelot_vlan pvid = {0}, native_vlan = {0}; + struct ocelot_vlan pvid = {0}; ocelot_port->bridge = NULL; ocelot_port_set_pvid(ocelot, port, pvid); - ocelot_port_set_native_vlan(ocelot, port, native_vlan); + ocelot_port_manage_port_tag(ocelot, port); ocelot_apply_bridge_fwd_mask(ocelot); } EXPORT_SYMBOL(ocelot_port_bridge_leave); diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index 9f2ea7995075..b8b1ac943b44 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -571,6 +571,7 @@ struct ocelot_vlan { struct ocelot_bridge_vlan { u16 vid; unsigned long portmask; + unsigned long untagged; struct list_head list; }; @@ -608,8 +609,6 @@ struct ocelot_port { bool vlan_aware; /* VLAN that untagged frames are classified to, on ingress */ struct ocelot_vlan pvid_vlan; - /* The VLAN ID that will be transmitted as untagged, on egress */ - struct ocelot_vlan native_vlan; unsigned int ptp_skbs_in_flight; u8 ptp_cmd; From bfbab3104413081907ad12dbec433f50fe3588cd Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 20 Oct 2021 20:58:51 +0300 Subject: [PATCH 396/440] net: mscc: ocelot: add the local station MAC addresses in VID 0 The ocelot switchdev driver does not include the CPU port in the list of flooding destinations for unknown traffic, instead that traffic is supposed to match FDB entries to reach the CPU. The addresses it installs are: (a) the station MAC address, in ocelot_probe_port() and later during runtime in ocelot_port_set_mac_address(). These are the VLAN-unaware addresses. The VLAN-aware addresses are in ocelot_vlan_vid_add(). (b) multicast addresses added with dev_mc_add() (not bridge host MDB entries) in ocelot_mc_sync() (c) multicast destination MAC addresses for MRP in ocelot_mrp_save_mac(), to make sure those are dropped (not forwarded) by the bridging service, just trapped to the CPU So we can see that the logic is slightly buggy ever since the initial commit a556c76adc05 ("net: mscc: Add initial Ocelot switch support"). This is because, when ocelot_probe_port() runs, the port pvid is 0. Then we join a VLAN-aware bridge, the pvid becomes 1, we call ocelot_port_set_mac_address(), this learns the new MAC address in VID 1 (also fails to forget the old one, since it thinks it's in VID 1, but that's not so important). Then when we leave the VLAN-aware bridge, outside world is unable to ping our new MAC address because it isn't learned in VID 0, the VLAN-unaware pvid. [ note: this is strictly based on static analysis, I don't have hardware to test. But there are also many more corner cases ] The basic idea is that we should have a separation of concerns, and the FDB entries used for standalone operation should be managed by the driver, and the FDB entries used by the bridging service should be managed by the bridge. So the standalone and VLAN-unaware bridge FDB entries should not follow the bridge PVID, because that will only be active when the bridge is VLAN-aware. So since the port pvid is coincidentally zero during probe time, just make those entries statically go to VID 0. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/ethernet/mscc/ocelot.c | 11 ++++++----- drivers/net/ethernet/mscc/ocelot.h | 1 + drivers/net/ethernet/mscc/ocelot_mrp.c | 8 ++++---- drivers/net/ethernet/mscc/ocelot_net.c | 12 ++++++------ 4 files changed, 17 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index bc033e62be97..30aa99a95005 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -268,7 +268,7 @@ static void ocelot_port_set_pvid(struct ocelot *ocelot, int port, ocelot_port->pvid_vlan = pvid_vlan; if (!ocelot_port->vlan_aware) - pvid_vlan.vid = 0; + pvid_vlan.vid = OCELOT_VLAN_UNAWARE_PVID; ocelot_rmw_gix(ocelot, ANA_PORT_VLAN_CFG_VLAN_VID(pvid_vlan.vid), @@ -501,7 +501,7 @@ static void ocelot_vlan_init(struct ocelot *ocelot) * traffic. It is added automatically if 8021q module is loaded, but * we can't rely on it since module may be not loaded. */ - ocelot_vlant_set_mask(ocelot, 0, all_ports); + ocelot_vlant_set_mask(ocelot, OCELOT_VLAN_UNAWARE_PVID, all_ports); /* Set vlan ingress filter mask to all ports but the CPU port by * default. @@ -2194,9 +2194,10 @@ static void ocelot_cpu_port_init(struct ocelot *ocelot) OCELOT_TAG_PREFIX_NONE); /* Configure the CPU port to be VLAN aware */ - ocelot_write_gix(ocelot, ANA_PORT_VLAN_CFG_VLAN_VID(0) | - ANA_PORT_VLAN_CFG_VLAN_AWARE_ENA | - ANA_PORT_VLAN_CFG_VLAN_POP_CNT(1), + ocelot_write_gix(ocelot, + ANA_PORT_VLAN_CFG_VLAN_VID(OCELOT_VLAN_UNAWARE_PVID) | + ANA_PORT_VLAN_CFG_VLAN_AWARE_ENA | + ANA_PORT_VLAN_CFG_VLAN_POP_CNT(1), ANA_PORT_VLAN_CFG, cpu); } diff --git a/drivers/net/ethernet/mscc/ocelot.h b/drivers/net/ethernet/mscc/ocelot.h index 1952d6a1b98a..e43da09b8f91 100644 --- a/drivers/net/ethernet/mscc/ocelot.h +++ b/drivers/net/ethernet/mscc/ocelot.h @@ -25,6 +25,7 @@ #include "ocelot_rew.h" #include "ocelot_qs.h" +#define OCELOT_VLAN_UNAWARE_PVID 0 #define OCELOT_BUFFER_CELL_SZ 60 #define OCELOT_STATS_CHECK_DELAY (2 * HZ) diff --git a/drivers/net/ethernet/mscc/ocelot_mrp.c b/drivers/net/ethernet/mscc/ocelot_mrp.c index 4b0941f09f71..1fa58546abdc 100644 --- a/drivers/net/ethernet/mscc/ocelot_mrp.c +++ b/drivers/net/ethernet/mscc/ocelot_mrp.c @@ -116,16 +116,16 @@ static void ocelot_mrp_save_mac(struct ocelot *ocelot, struct ocelot_port *port) { ocelot_mact_learn(ocelot, PGID_BLACKHOLE, mrp_test_dmac, - port->pvid_vlan.vid, ENTRYTYPE_LOCKED); + OCELOT_VLAN_UNAWARE_PVID, ENTRYTYPE_LOCKED); ocelot_mact_learn(ocelot, PGID_BLACKHOLE, mrp_control_dmac, - port->pvid_vlan.vid, ENTRYTYPE_LOCKED); + OCELOT_VLAN_UNAWARE_PVID, ENTRYTYPE_LOCKED); } static void ocelot_mrp_del_mac(struct ocelot *ocelot, struct ocelot_port *port) { - ocelot_mact_forget(ocelot, mrp_test_dmac, port->pvid_vlan.vid); - ocelot_mact_forget(ocelot, mrp_control_dmac, port->pvid_vlan.vid); + ocelot_mact_forget(ocelot, mrp_test_dmac, OCELOT_VLAN_UNAWARE_PVID); + ocelot_mact_forget(ocelot, mrp_control_dmac, OCELOT_VLAN_UNAWARE_PVID); } int ocelot_mrp_add(struct ocelot *ocelot, int port, diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c index affa9649f490..e3fc4548f642 100644 --- a/drivers/net/ethernet/mscc/ocelot_net.c +++ b/drivers/net/ethernet/mscc/ocelot_net.c @@ -418,7 +418,7 @@ static int ocelot_vlan_vid_del(struct net_device *dev, u16 vid) * with VLAN filtering feature. We need to keep it to receive * untagged traffic. */ - if (vid == 0) + if (vid == OCELOT_VLAN_UNAWARE_PVID) return 0; ret = ocelot_vlan_del(ocelot, port, vid); @@ -553,7 +553,7 @@ static int ocelot_mc_unsync(struct net_device *dev, const unsigned char *addr) struct ocelot_mact_work_ctx w; ether_addr_copy(w.forget.addr, addr); - w.forget.vid = ocelot_port->pvid_vlan.vid; + w.forget.vid = OCELOT_VLAN_UNAWARE_PVID; w.type = OCELOT_MACT_FORGET; return ocelot_enqueue_mact_action(ocelot, &w); @@ -567,7 +567,7 @@ static int ocelot_mc_sync(struct net_device *dev, const unsigned char *addr) struct ocelot_mact_work_ctx w; ether_addr_copy(w.learn.addr, addr); - w.learn.vid = ocelot_port->pvid_vlan.vid; + w.learn.vid = OCELOT_VLAN_UNAWARE_PVID; w.learn.pgid = PGID_CPU; w.learn.entry_type = ENTRYTYPE_LOCKED; w.type = OCELOT_MACT_LEARN; @@ -602,9 +602,9 @@ static int ocelot_port_set_mac_address(struct net_device *dev, void *p) /* Learn the new net device MAC address in the mac table. */ ocelot_mact_learn(ocelot, PGID_CPU, addr->sa_data, - ocelot_port->pvid_vlan.vid, ENTRYTYPE_LOCKED); + OCELOT_VLAN_UNAWARE_PVID, ENTRYTYPE_LOCKED); /* Then forget the previous one. */ - ocelot_mact_forget(ocelot, dev->dev_addr, ocelot_port->pvid_vlan.vid); + ocelot_mact_forget(ocelot, dev->dev_addr, OCELOT_VLAN_UNAWARE_PVID); eth_hw_addr_set(dev, addr->sa_data); return 0; @@ -1707,7 +1707,7 @@ int ocelot_probe_port(struct ocelot *ocelot, int port, struct regmap *target, eth_hw_addr_gen(dev, ocelot->base_mac, port); ocelot_mact_learn(ocelot, PGID_CPU, dev->dev_addr, - ocelot_port->pvid_vlan.vid, ENTRYTYPE_LOCKED); + OCELOT_VLAN_UNAWARE_PVID, ENTRYTYPE_LOCKED); ocelot_init_port(ocelot, port); From d4004422f6f9fa8e55c04482008c1c9f9edd2d19 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 20 Oct 2021 20:58:52 +0300 Subject: [PATCH 397/440] net: mscc: ocelot: track the port pvid using a pointer Now that we have a list of struct ocelot_bridge_vlan entries, we can rewrite the pvid logic to simply point to one of those structures, instead of having a separate structure with a "bool valid". The NULL pointer will represent the lack of a bridge pvid (not to be confused with the lack of a hardware pvid on the port, that is present at all times). Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/ethernet/mscc/ocelot.c | 31 ++++++++++++------------------ include/soc/mscc/ocelot.h | 7 +------ 2 files changed, 13 insertions(+), 25 deletions(-) diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 30aa99a95005..4e5ae687d2e2 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -260,18 +260,19 @@ static void ocelot_port_manage_port_tag(struct ocelot *ocelot, int port) /* Default vlan to clasify for untagged frames (may be zero) */ static void ocelot_port_set_pvid(struct ocelot *ocelot, int port, - struct ocelot_vlan pvid_vlan) + const struct ocelot_bridge_vlan *pvid_vlan) { struct ocelot_port *ocelot_port = ocelot->ports[port]; + u16 pvid = OCELOT_VLAN_UNAWARE_PVID; u32 val = 0; ocelot_port->pvid_vlan = pvid_vlan; - if (!ocelot_port->vlan_aware) - pvid_vlan.vid = OCELOT_VLAN_UNAWARE_PVID; + if (ocelot_port->vlan_aware && pvid_vlan) + pvid = pvid_vlan->vid; ocelot_rmw_gix(ocelot, - ANA_PORT_VLAN_CFG_VLAN_VID(pvid_vlan.vid), + ANA_PORT_VLAN_CFG_VLAN_VID(pvid), ANA_PORT_VLAN_CFG_VLAN_VID_M, ANA_PORT_VLAN_CFG, port); @@ -280,7 +281,7 @@ static void ocelot_port_set_pvid(struct ocelot *ocelot, int port, * classified to VLAN 0, but that is always in our RX filter, so it * would get accepted were it not for this setting. */ - if (!pvid_vlan.valid && ocelot_port->vlan_aware) + if (!pvid_vlan && ocelot_port->vlan_aware) val = ANA_PORT_DROP_CFG_DROP_PRIO_S_TAGGED_ENA | ANA_PORT_DROP_CFG_DROP_PRIO_C_TAGGED_ENA; @@ -445,13 +446,9 @@ int ocelot_vlan_add(struct ocelot *ocelot, int port, u16 vid, bool pvid, return err; /* Default ingress vlan classification */ - if (pvid) { - struct ocelot_vlan pvid_vlan; - - pvid_vlan.vid = vid; - pvid_vlan.valid = true; - ocelot_port_set_pvid(ocelot, port, pvid_vlan); - } + if (pvid) + ocelot_port_set_pvid(ocelot, port, + ocelot_bridge_vlan_find(ocelot, vid)); /* Untagged egress vlan clasification */ ocelot_port_manage_port_tag(ocelot, port); @@ -470,11 +467,8 @@ int ocelot_vlan_del(struct ocelot *ocelot, int port, u16 vid) return err; /* Ingress */ - if (ocelot_port->pvid_vlan.vid == vid) { - struct ocelot_vlan pvid_vlan = {0}; - - ocelot_port_set_pvid(ocelot, port, pvid_vlan); - } + if (ocelot_port->pvid_vlan && ocelot_port->pvid_vlan->vid == vid) + ocelot_port_set_pvid(ocelot, port, NULL); /* Egress */ ocelot_port_manage_port_tag(ocelot, port); @@ -1803,11 +1797,10 @@ void ocelot_port_bridge_leave(struct ocelot *ocelot, int port, struct net_device *bridge) { struct ocelot_port *ocelot_port = ocelot->ports[port]; - struct ocelot_vlan pvid = {0}; ocelot_port->bridge = NULL; - ocelot_port_set_pvid(ocelot, port, pvid); + ocelot_port_set_pvid(ocelot, port, NULL); ocelot_port_manage_port_tag(ocelot, port); ocelot_apply_bridge_fwd_mask(ocelot); } diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index b8b1ac943b44..9b872da0c246 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -563,11 +563,6 @@ struct ocelot_vcap_block { int pol_lpr; }; -struct ocelot_vlan { - bool valid; - u16 vid; -}; - struct ocelot_bridge_vlan { u16 vid; unsigned long portmask; @@ -608,7 +603,7 @@ struct ocelot_port { bool vlan_aware; /* VLAN that untagged frames are classified to, on ingress */ - struct ocelot_vlan pvid_vlan; + const struct ocelot_bridge_vlan *pvid_vlan; unsigned int ptp_skbs_in_flight; u8 ptp_cmd; From 82b318983c515f29b8b3a0dad9f6a5fe8a68a7f4 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 20 Oct 2021 20:49:49 +0300 Subject: [PATCH 398/440] net: dsa: introduce helpers for iterating through ports using dp Since the DSA conversion from the ds->ports array into the dst->ports list, the DSA API has encouraged driver writers, as well as the core itself, to write inefficient code. Currently, code that wants to filter by a specific type of port when iterating, like {!unused, user, cpu, dsa}, uses the dsa_is_*_port helper. Under the hood, this uses dsa_to_port which iterates again through dst->ports. But the driver iterates through the port list already, so the complexity is quadratic for the typical case of a single-switch tree. This patch introduces some iteration helpers where the iterator is already a struct dsa_port *dp, so that the other variant of the filtering functions, dsa_port_is_{unused,user,cpu_dsa}, can be used directly on the iterator. This eliminates the second lookup. These functions can be used both by the core and by drivers. Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/net/dsa.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/include/net/dsa.h b/include/net/dsa.h index 05ebdd8d5321..440b6aca22c7 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -474,6 +474,34 @@ static inline bool dsa_is_user_port(struct dsa_switch *ds, int p) return dsa_to_port(ds, p)->type == DSA_PORT_TYPE_USER; } +#define dsa_tree_for_each_user_port(_dp, _dst) \ + list_for_each_entry((_dp), &(_dst)->ports, list) \ + if (dsa_port_is_user((_dp))) + +#define dsa_switch_for_each_port(_dp, _ds) \ + list_for_each_entry((_dp), &(_ds)->dst->ports, list) \ + if ((_dp)->ds == (_ds)) + +#define dsa_switch_for_each_port_safe(_dp, _next, _ds) \ + list_for_each_entry_safe((_dp), (_next), &(_ds)->dst->ports, list) \ + if ((_dp)->ds == (_ds)) + +#define dsa_switch_for_each_port_continue_reverse(_dp, _ds) \ + list_for_each_entry_continue_reverse((_dp), &(_ds)->dst->ports, list) \ + if ((_dp)->ds == (_ds)) + +#define dsa_switch_for_each_available_port(_dp, _ds) \ + dsa_switch_for_each_port((_dp), (_ds)) \ + if (!dsa_port_is_unused((_dp))) + +#define dsa_switch_for_each_user_port(_dp, _ds) \ + dsa_switch_for_each_port((_dp), (_ds)) \ + if (dsa_port_is_user((_dp))) + +#define dsa_switch_for_each_cpu_port(_dp, _ds) \ + dsa_switch_for_each_port((_dp), (_ds)) \ + if (dsa_port_is_cpu((_dp))) + static inline u32 dsa_user_ports(struct dsa_switch *ds) { u32 mask = 0; From d0004a020bb50263de0e3e775c7b7c7a003e0e0c Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 20 Oct 2021 20:49:50 +0300 Subject: [PATCH 399/440] net: dsa: remove the "dsa_to_port in a loop" antipattern from the core Ever since Vivien's conversion of the ds->ports array into a dst->ports list, and the introduction of dsa_to_port, iterations through the ports of a switch became quadratic whenever dsa_to_port was needed. dsa_to_port can either be called directly, or indirectly through the dsa_is_{user,cpu,dsa,unused}_port helpers. Use the newly introduced dsa_switch_for_each_port() iteration macro that works with the iterator variable being a struct dsa_port *dp directly, and not an int i. It is an expensive variable to go from i to dp, but cheap to go from dp to i. This macro iterates through the entire ds->dst->ports list and filters by the ports belonging just to the switch provided as argument. Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/net/dsa.h | 7 +++---- net/dsa/dsa.c | 22 +++++++++++----------- net/dsa/dsa2.c | 13 ++++++------- net/dsa/port.c | 17 +++++++---------- net/dsa/slave.c | 2 +- net/dsa/switch.c | 40 +++++++++++++++++----------------------- 6 files changed, 45 insertions(+), 56 deletions(-) diff --git a/include/net/dsa.h b/include/net/dsa.h index 440b6aca22c7..1cd9c2461f0d 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -504,12 +504,11 @@ static inline bool dsa_is_user_port(struct dsa_switch *ds, int p) static inline u32 dsa_user_ports(struct dsa_switch *ds) { + struct dsa_port *dp; u32 mask = 0; - int p; - for (p = 0; p < ds->num_ports; p++) - if (dsa_is_user_port(ds, p)) - mask |= BIT(p); + dsa_switch_for_each_user_port(dp, ds) + mask |= BIT(dp->index); return mask; } diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 41f36ad8b0ec..ea5169e671ae 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -280,23 +280,22 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev, } #ifdef CONFIG_PM_SLEEP -static bool dsa_is_port_initialized(struct dsa_switch *ds, int p) +static bool dsa_port_is_initialized(const struct dsa_port *dp) { - const struct dsa_port *dp = dsa_to_port(ds, p); - return dp->type == DSA_PORT_TYPE_USER && dp->slave; } int dsa_switch_suspend(struct dsa_switch *ds) { - int i, ret = 0; + struct dsa_port *dp; + int ret = 0; /* Suspend slave network devices */ - for (i = 0; i < ds->num_ports; i++) { - if (!dsa_is_port_initialized(ds, i)) + dsa_switch_for_each_port(dp, ds) { + if (!dsa_port_is_initialized(dp)) continue; - ret = dsa_slave_suspend(dsa_to_port(ds, i)->slave); + ret = dsa_slave_suspend(dp->slave); if (ret) return ret; } @@ -310,7 +309,8 @@ EXPORT_SYMBOL_GPL(dsa_switch_suspend); int dsa_switch_resume(struct dsa_switch *ds) { - int i, ret = 0; + struct dsa_port *dp; + int ret = 0; if (ds->ops->resume) ret = ds->ops->resume(ds); @@ -319,11 +319,11 @@ int dsa_switch_resume(struct dsa_switch *ds) return ret; /* Resume slave network devices */ - for (i = 0; i < ds->num_ports; i++) { - if (!dsa_is_port_initialized(ds, i)) + dsa_switch_for_each_port(dp, ds) { + if (!dsa_port_is_initialized(dp)) continue; - ret = dsa_slave_resume(dsa_to_port(ds, i)->slave); + ret = dsa_slave_resume(dp->slave); if (ret) return ret; } diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 691d27498b24..1c09182b3644 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -802,17 +802,16 @@ static int dsa_switch_setup_tag_protocol(struct dsa_switch *ds) { const struct dsa_device_ops *tag_ops = ds->dst->tag_ops; struct dsa_switch_tree *dst = ds->dst; - int port, err; + struct dsa_port *cpu_dp; + int err; if (tag_ops->proto == dst->default_proto) return 0; - for (port = 0; port < ds->num_ports; port++) { - if (!dsa_is_cpu_port(ds, port)) - continue; - + dsa_switch_for_each_cpu_port(cpu_dp, ds) { rtnl_lock(); - err = ds->ops->change_tag_protocol(ds, port, tag_ops->proto); + err = ds->ops->change_tag_protocol(ds, cpu_dp->index, + tag_ops->proto); rtnl_unlock(); if (err) { dev_err(ds->dev, "Unable to use tag protocol \"%s\": %pe\n", @@ -1150,7 +1149,7 @@ int dsa_tree_change_tag_proto(struct dsa_switch_tree *dst, goto out_unlock; list_for_each_entry(dp, &dst->ports, list) { - if (!dsa_is_user_port(dp->ds, dp->index)) + if (!dsa_port_is_user(dp)) continue; if (dp->slave->flags & IFF_UP) diff --git a/net/dsa/port.c b/net/dsa/port.c index 616330a16d31..3b14c9b6a922 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -515,7 +515,8 @@ static bool dsa_port_can_apply_vlan_filtering(struct dsa_port *dp, struct netlink_ext_ack *extack) { struct dsa_switch *ds = dp->ds; - int err, i; + struct dsa_port *other_dp; + int err; /* VLAN awareness was off, so the question is "can we turn it on". * We may have had 8021q uppers, those need to go. Make sure we don't @@ -557,10 +558,10 @@ static bool dsa_port_can_apply_vlan_filtering(struct dsa_port *dp, * different ports of the same switch device and one of them has a * different setting than what is being requested. */ - for (i = 0; i < ds->num_ports; i++) { + dsa_switch_for_each_port(other_dp, ds) { struct net_device *other_bridge; - other_bridge = dsa_to_port(ds, i)->bridge_dev; + other_bridge = other_dp->bridge_dev; if (!other_bridge) continue; /* If it's the same bridge, it also has same @@ -607,20 +608,16 @@ int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering, return err; if (ds->vlan_filtering_is_global) { - int port; + struct dsa_port *other_dp; ds->vlan_filtering = vlan_filtering; - for (port = 0; port < ds->num_ports; port++) { - struct net_device *slave; - - if (!dsa_is_user_port(ds, port)) - continue; + dsa_switch_for_each_user_port(other_dp, ds) { + struct net_device *slave = dp->slave; /* We might be called in the unbind path, so not * all slave devices might still be registered. */ - slave = dsa_to_port(ds, port)->slave; if (!slave) continue; diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 499f8d18c76d..9d9fef668eba 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -2368,7 +2368,7 @@ static int dsa_slave_netdevice_event(struct notifier_block *nb, dst = cpu_dp->ds->dst; list_for_each_entry(dp, &dst->ports, list) { - if (!dsa_is_user_port(dp->ds, dp->index)) + if (!dsa_port_is_user(dp)) continue; list_add(&dp->slave->close_list, &close_list); diff --git a/net/dsa/switch.c b/net/dsa/switch.c index 6466d0539af9..19651674c8c7 100644 --- a/net/dsa/switch.c +++ b/net/dsa/switch.c @@ -17,14 +17,11 @@ static unsigned int dsa_switch_fastest_ageing_time(struct dsa_switch *ds, unsigned int ageing_time) { - int i; - - for (i = 0; i < ds->num_ports; ++i) { - struct dsa_port *dp = dsa_to_port(ds, i); + struct dsa_port *dp; + dsa_switch_for_each_port(dp, ds) if (dp->ageing_time && dp->ageing_time < ageing_time) ageing_time = dp->ageing_time; - } return ageing_time; } @@ -120,7 +117,8 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds, struct netlink_ext_ack extack = {0}; bool change_vlan_filtering = false; bool vlan_filtering; - int err, port; + struct dsa_port *dp; + int err; if (dst->index == info->tree_index && ds->index == info->sw_index && ds->ops->port_bridge_leave) @@ -150,10 +148,10 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds, * VLAN-aware bridge. */ if (change_vlan_filtering && ds->vlan_filtering_is_global) { - for (port = 0; port < ds->num_ports; port++) { + dsa_switch_for_each_port(dp, ds) { struct net_device *bridge_dev; - bridge_dev = dsa_to_port(ds, port)->bridge_dev; + bridge_dev = dp->bridge_dev; if (bridge_dev && br_vlan_enabled(bridge_dev)) { change_vlan_filtering = false; @@ -579,38 +577,34 @@ static int dsa_switch_change_tag_proto(struct dsa_switch *ds, struct dsa_notifier_tag_proto_info *info) { const struct dsa_device_ops *tag_ops = info->tag_ops; - int port, err; + struct dsa_port *dp, *cpu_dp; + int err; if (!ds->ops->change_tag_protocol) return -EOPNOTSUPP; ASSERT_RTNL(); - for (port = 0; port < ds->num_ports; port++) { - if (!dsa_is_cpu_port(ds, port)) - continue; - - err = ds->ops->change_tag_protocol(ds, port, tag_ops->proto); + dsa_switch_for_each_cpu_port(cpu_dp, ds) { + err = ds->ops->change_tag_protocol(ds, cpu_dp->index, + tag_ops->proto); if (err) return err; - dsa_port_set_tag_protocol(dsa_to_port(ds, port), tag_ops); + dsa_port_set_tag_protocol(cpu_dp, tag_ops); } /* Now that changing the tag protocol can no longer fail, let's update * the remaining bits which are "duplicated for faster access", and the * bits that depend on the tagger, such as the MTU. */ - for (port = 0; port < ds->num_ports; port++) { - if (dsa_is_user_port(ds, port)) { - struct net_device *slave; + dsa_switch_for_each_user_port(dp, ds) { + struct net_device *slave = dp->slave; - slave = dsa_to_port(ds, port)->slave; - dsa_slave_setup_tagger(slave); + dsa_slave_setup_tagger(slave); - /* rtnl_mutex is held in dsa_tree_change_tag_proto */ - dsa_slave_change_mtu(slave, slave->mtu); - } + /* rtnl_mutex is held in dsa_tree_change_tag_proto */ + dsa_slave_change_mtu(slave, slave->mtu); } return 0; From 65c563a67755194f56c461891075f5707b8df1ac Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 20 Oct 2021 20:49:51 +0300 Subject: [PATCH 400/440] net: dsa: do not open-code dsa_switch_for_each_port Find the remaining iterators over dst->ports that only filter for the ports belonging to a certain switch, and replace those with the dsa_switch_for_each_port helper that we have now. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- net/dsa/dsa2.c | 44 ++++++++++++++------------------------------ 1 file changed, 14 insertions(+), 30 deletions(-) diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 1c09182b3644..2a339fb09f4e 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -399,11 +399,8 @@ static int dsa_tree_setup_cpu_ports(struct dsa_switch_tree *dst) if (!dsa_port_is_cpu(cpu_dp)) continue; - list_for_each_entry(dp, &dst->ports, list) { - /* Prefer a local CPU port */ - if (dp->ds != cpu_dp->ds) - continue; - + /* Prefer a local CPU port */ + dsa_switch_for_each_port(dp, cpu_dp->ds) { /* Prefer the first local CPU port found */ if (dp->cpu_dp) continue; @@ -852,12 +849,10 @@ static int dsa_switch_setup(struct dsa_switch *ds) /* Setup devlink port instances now, so that the switch * setup() can register regions etc, against the ports */ - list_for_each_entry(dp, &ds->dst->ports, list) { - if (dp->ds == ds) { - err = dsa_port_devlink_setup(dp); - if (err) - goto unregister_devlink_ports; - } + dsa_switch_for_each_port(dp, ds) { + err = dsa_port_devlink_setup(dp); + if (err) + goto unregister_devlink_ports; } err = dsa_switch_register_notifier(ds); @@ -901,9 +896,8 @@ teardown: unregister_notifier: dsa_switch_unregister_notifier(ds); unregister_devlink_ports: - list_for_each_entry(dp, &ds->dst->ports, list) - if (dp->ds == ds) - dsa_port_devlink_teardown(dp); + dsa_switch_for_each_port(dp, ds) + dsa_port_devlink_teardown(dp); devlink_free(ds->devlink); ds->devlink = NULL; return err; @@ -931,9 +925,8 @@ static void dsa_switch_teardown(struct dsa_switch *ds) dsa_switch_unregister_notifier(ds); if (ds->devlink) { - list_for_each_entry(dp, &ds->dst->ports, list) - if (dp->ds == ds) - dsa_port_devlink_teardown(dp); + dsa_switch_for_each_port(dp, ds) + dsa_port_devlink_teardown(dp); devlink_free(ds->devlink); ds->devlink = NULL; } @@ -1180,8 +1173,8 @@ static struct dsa_port *dsa_port_touch(struct dsa_switch *ds, int index) struct dsa_switch_tree *dst = ds->dst; struct dsa_port *dp; - list_for_each_entry(dp, &dst->ports, list) - if (dp->ds == ds && dp->index == index) + dsa_switch_for_each_port(dp, ds) + if (dp->index == index) return dp; dp = kzalloc(sizeof(*dp), GFP_KERNEL); @@ -1522,12 +1515,9 @@ static int dsa_switch_parse(struct dsa_switch *ds, struct dsa_chip_data *cd) static void dsa_switch_release_ports(struct dsa_switch *ds) { - struct dsa_switch_tree *dst = ds->dst; struct dsa_port *dp, *next; - list_for_each_entry_safe(dp, next, &dst->ports, list) { - if (dp->ds != ds) - continue; + dsa_switch_for_each_port_safe(dp, next, ds) { list_del(&dp->list); kfree(dp); } @@ -1619,13 +1609,7 @@ void dsa_switch_shutdown(struct dsa_switch *ds) mutex_lock(&dsa2_mutex); rtnl_lock(); - list_for_each_entry(dp, &ds->dst->ports, list) { - if (dp->ds != ds) - continue; - - if (!dsa_port_is_user(dp)) - continue; - + dsa_switch_for_each_user_port(dp, ds) { master = dp->cpu_dp->master; slave_dev = dp->slave; From 57d77986e7428752a05358386e3f80c2a08c13a7 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 20 Oct 2021 20:49:52 +0300 Subject: [PATCH 401/440] net: dsa: remove gratuitous use of dsa_is_{user,dsa,cpu}_port Find the occurrences of dsa_is_{user,dsa,cpu}_port where a struct dsa_port *dp was already available in the function scope, and replace them with the dsa_port_is_{user,dsa,cpu} equivalent function which uses that dp directly and does not perform another hidden dsa_to_port(). Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- net/dsa/port.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/dsa/port.c b/net/dsa/port.c index 3b14c9b6a922..bf671306b560 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -523,7 +523,7 @@ static bool dsa_port_can_apply_vlan_filtering(struct dsa_port *dp, * enter an inconsistent state: deny changing the VLAN awareness state * as long as we have 8021q uppers. */ - if (vlan_filtering && dsa_is_user_port(ds, dp->index)) { + if (vlan_filtering && dsa_port_is_user(dp)) { struct net_device *upper_dev, *slave = dp->slave; struct net_device *br = dp->bridge_dev; struct list_head *iter; @@ -1038,7 +1038,7 @@ static void dsa_port_phylink_mac_link_down(struct phylink_config *config, struct phy_device *phydev = NULL; struct dsa_switch *ds = dp->ds; - if (dsa_is_user_port(ds, dp->index)) + if (dsa_port_is_user(dp)) phydev = dp->slave->phydev; if (!ds->ops->phylink_mac_link_down) { From fac6abd5f13297ddeae7ff6eb903e933cffc071b Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 20 Oct 2021 20:49:53 +0300 Subject: [PATCH 402/440] net: dsa: convert cross-chip notifiers to iterate using dp The majority of cross-chip switch notifiers need to filter in some way over the type of ports: some install VLANs etc on all cascade ports. The difference is that the matching function, which filters by port type, is separate from the function where the iteration happens. So this patch needs to refactor the matching functions' prototypes as well, to take the dp as argument. In a future patch/series, I might convert dsa_towards_port to return a struct dsa_port *dp too, but at the moment it is a bit entangled with dsa_routing_port which is also used by mv88e6xxx and they both return an int port. So keep dsa_towards_port the way it is and convert it into a dp using dsa_to_port. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- net/dsa/switch.c | 129 +++++++++++++++++++++++--------------------- net/dsa/tag_8021q.c | 85 ++++++++++++++--------------- 2 files changed, 112 insertions(+), 102 deletions(-) diff --git a/net/dsa/switch.c b/net/dsa/switch.c index 19651674c8c7..2b1b21bde830 100644 --- a/net/dsa/switch.c +++ b/net/dsa/switch.c @@ -46,10 +46,10 @@ static int dsa_switch_ageing_time(struct dsa_switch *ds, return 0; } -static bool dsa_switch_mtu_match(struct dsa_switch *ds, int port, - struct dsa_notifier_mtu_info *info) +static bool dsa_port_mtu_match(struct dsa_port *dp, + struct dsa_notifier_mtu_info *info) { - if (ds->index == info->sw_index && port == info->port) + if (dp->ds->index == info->sw_index && dp->index == info->port) return true; /* Do not propagate to other switches in the tree if the notifier was @@ -58,7 +58,7 @@ static bool dsa_switch_mtu_match(struct dsa_switch *ds, int port, if (info->targeted_match) return false; - if (dsa_is_dsa_port(ds, port) || dsa_is_cpu_port(ds, port)) + if (dsa_port_is_dsa(dp) || dsa_port_is_cpu(dp)) return true; return false; @@ -67,14 +67,16 @@ static bool dsa_switch_mtu_match(struct dsa_switch *ds, int port, static int dsa_switch_mtu(struct dsa_switch *ds, struct dsa_notifier_mtu_info *info) { - int port, ret; + struct dsa_port *dp; + int ret; if (!ds->ops->port_change_mtu) return -EOPNOTSUPP; - for (port = 0; port < ds->num_ports; port++) { - if (dsa_switch_mtu_match(ds, port, info)) { - ret = ds->ops->port_change_mtu(ds, port, info->mtu); + dsa_switch_for_each_port(dp, ds) { + if (dsa_port_mtu_match(dp, info)) { + ret = ds->ops->port_change_mtu(ds, dp->index, + info->mtu); if (ret) return ret; } @@ -177,19 +179,19 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds, * DSA links) that sit between the targeted port on which the notifier was * emitted and its dedicated CPU port. */ -static bool dsa_switch_host_address_match(struct dsa_switch *ds, int port, - int info_sw_index, int info_port) +static bool dsa_port_host_address_match(struct dsa_port *dp, + int info_sw_index, int info_port) { struct dsa_port *targeted_dp, *cpu_dp; struct dsa_switch *targeted_ds; - targeted_ds = dsa_switch_find(ds->dst->index, info_sw_index); + targeted_ds = dsa_switch_find(dp->ds->dst->index, info_sw_index); targeted_dp = dsa_to_port(targeted_ds, info_port); cpu_dp = targeted_dp->cpu_dp; - if (dsa_switch_is_upstream_of(ds, targeted_ds)) - return port == dsa_towards_port(ds, cpu_dp->ds->index, - cpu_dp->index); + if (dsa_switch_is_upstream_of(dp->ds, targeted_ds)) + return dp->index == dsa_towards_port(dp->ds, cpu_dp->ds->index, + cpu_dp->index); return false; } @@ -207,11 +209,12 @@ static struct dsa_mac_addr *dsa_mac_addr_find(struct list_head *addr_list, return NULL; } -static int dsa_switch_do_mdb_add(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_mdb *mdb) +static int dsa_port_do_mdb_add(struct dsa_port *dp, + const struct switchdev_obj_port_mdb *mdb) { - struct dsa_port *dp = dsa_to_port(ds, port); + struct dsa_switch *ds = dp->ds; struct dsa_mac_addr *a; + int port = dp->index; int err; /* No need to bother with refcounting for user ports */ @@ -242,11 +245,12 @@ static int dsa_switch_do_mdb_add(struct dsa_switch *ds, int port, return 0; } -static int dsa_switch_do_mdb_del(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_mdb *mdb) +static int dsa_port_do_mdb_del(struct dsa_port *dp, + const struct switchdev_obj_port_mdb *mdb) { - struct dsa_port *dp = dsa_to_port(ds, port); + struct dsa_switch *ds = dp->ds; struct dsa_mac_addr *a; + int port = dp->index; int err; /* No need to bother with refcounting for user ports */ @@ -272,11 +276,12 @@ static int dsa_switch_do_mdb_del(struct dsa_switch *ds, int port, return 0; } -static int dsa_switch_do_fdb_add(struct dsa_switch *ds, int port, - const unsigned char *addr, u16 vid) +static int dsa_port_do_fdb_add(struct dsa_port *dp, const unsigned char *addr, + u16 vid) { - struct dsa_port *dp = dsa_to_port(ds, port); + struct dsa_switch *ds = dp->ds; struct dsa_mac_addr *a; + int port = dp->index; int err; /* No need to bother with refcounting for user ports */ @@ -307,11 +312,12 @@ static int dsa_switch_do_fdb_add(struct dsa_switch *ds, int port, return 0; } -static int dsa_switch_do_fdb_del(struct dsa_switch *ds, int port, - const unsigned char *addr, u16 vid) +static int dsa_port_do_fdb_del(struct dsa_port *dp, const unsigned char *addr, + u16 vid) { - struct dsa_port *dp = dsa_to_port(ds, port); + struct dsa_switch *ds = dp->ds; struct dsa_mac_addr *a; + int port = dp->index; int err; /* No need to bother with refcounting for user ports */ @@ -340,17 +346,16 @@ static int dsa_switch_do_fdb_del(struct dsa_switch *ds, int port, static int dsa_switch_host_fdb_add(struct dsa_switch *ds, struct dsa_notifier_fdb_info *info) { + struct dsa_port *dp; int err = 0; - int port; if (!ds->ops->port_fdb_add) return -EOPNOTSUPP; - for (port = 0; port < ds->num_ports; port++) { - if (dsa_switch_host_address_match(ds, port, info->sw_index, - info->port)) { - err = dsa_switch_do_fdb_add(ds, port, info->addr, - info->vid); + dsa_switch_for_each_port(dp, ds) { + if (dsa_port_host_address_match(dp, info->sw_index, + info->port)) { + err = dsa_port_do_fdb_add(dp, info->addr, info->vid); if (err) break; } @@ -362,17 +367,16 @@ static int dsa_switch_host_fdb_add(struct dsa_switch *ds, static int dsa_switch_host_fdb_del(struct dsa_switch *ds, struct dsa_notifier_fdb_info *info) { + struct dsa_port *dp; int err = 0; - int port; if (!ds->ops->port_fdb_del) return -EOPNOTSUPP; - for (port = 0; port < ds->num_ports; port++) { - if (dsa_switch_host_address_match(ds, port, info->sw_index, - info->port)) { - err = dsa_switch_do_fdb_del(ds, port, info->addr, - info->vid); + dsa_switch_for_each_port(dp, ds) { + if (dsa_port_host_address_match(dp, info->sw_index, + info->port)) { + err = dsa_port_do_fdb_del(dp, info->addr, info->vid); if (err) break; } @@ -385,22 +389,24 @@ static int dsa_switch_fdb_add(struct dsa_switch *ds, struct dsa_notifier_fdb_info *info) { int port = dsa_towards_port(ds, info->sw_index, info->port); + struct dsa_port *dp = dsa_to_port(ds, port); if (!ds->ops->port_fdb_add) return -EOPNOTSUPP; - return dsa_switch_do_fdb_add(ds, port, info->addr, info->vid); + return dsa_port_do_fdb_add(dp, info->addr, info->vid); } static int dsa_switch_fdb_del(struct dsa_switch *ds, struct dsa_notifier_fdb_info *info) { int port = dsa_towards_port(ds, info->sw_index, info->port); + struct dsa_port *dp = dsa_to_port(ds, port); if (!ds->ops->port_fdb_del) return -EOPNOTSUPP; - return dsa_switch_do_fdb_del(ds, port, info->addr, info->vid); + return dsa_port_do_fdb_del(dp, info->addr, info->vid); } static int dsa_switch_hsr_join(struct dsa_switch *ds, @@ -466,37 +472,39 @@ static int dsa_switch_mdb_add(struct dsa_switch *ds, struct dsa_notifier_mdb_info *info) { int port = dsa_towards_port(ds, info->sw_index, info->port); + struct dsa_port *dp = dsa_to_port(ds, port); if (!ds->ops->port_mdb_add) return -EOPNOTSUPP; - return dsa_switch_do_mdb_add(ds, port, info->mdb); + return dsa_port_do_mdb_add(dp, info->mdb); } static int dsa_switch_mdb_del(struct dsa_switch *ds, struct dsa_notifier_mdb_info *info) { int port = dsa_towards_port(ds, info->sw_index, info->port); + struct dsa_port *dp = dsa_to_port(ds, port); if (!ds->ops->port_mdb_del) return -EOPNOTSUPP; - return dsa_switch_do_mdb_del(ds, port, info->mdb); + return dsa_port_do_mdb_del(dp, info->mdb); } static int dsa_switch_host_mdb_add(struct dsa_switch *ds, struct dsa_notifier_mdb_info *info) { + struct dsa_port *dp; int err = 0; - int port; if (!ds->ops->port_mdb_add) return -EOPNOTSUPP; - for (port = 0; port < ds->num_ports; port++) { - if (dsa_switch_host_address_match(ds, port, info->sw_index, - info->port)) { - err = dsa_switch_do_mdb_add(ds, port, info->mdb); + dsa_switch_for_each_port(dp, ds) { + if (dsa_port_host_address_match(dp, info->sw_index, + info->port)) { + err = dsa_port_do_mdb_add(dp, info->mdb); if (err) break; } @@ -508,16 +516,16 @@ static int dsa_switch_host_mdb_add(struct dsa_switch *ds, static int dsa_switch_host_mdb_del(struct dsa_switch *ds, struct dsa_notifier_mdb_info *info) { + struct dsa_port *dp; int err = 0; - int port; if (!ds->ops->port_mdb_del) return -EOPNOTSUPP; - for (port = 0; port < ds->num_ports; port++) { - if (dsa_switch_host_address_match(ds, port, info->sw_index, - info->port)) { - err = dsa_switch_do_mdb_del(ds, port, info->mdb); + dsa_switch_for_each_port(dp, ds) { + if (dsa_port_host_address_match(dp, info->sw_index, + info->port)) { + err = dsa_port_do_mdb_del(dp, info->mdb); if (err) break; } @@ -526,13 +534,13 @@ static int dsa_switch_host_mdb_del(struct dsa_switch *ds, return err; } -static bool dsa_switch_vlan_match(struct dsa_switch *ds, int port, - struct dsa_notifier_vlan_info *info) +static bool dsa_port_vlan_match(struct dsa_port *dp, + struct dsa_notifier_vlan_info *info) { - if (ds->index == info->sw_index && port == info->port) + if (dp->ds->index == info->sw_index && dp->index == info->port) return true; - if (dsa_is_dsa_port(ds, port)) + if (dsa_port_is_dsa(dp)) return true; return false; @@ -541,14 +549,15 @@ static bool dsa_switch_vlan_match(struct dsa_switch *ds, int port, static int dsa_switch_vlan_add(struct dsa_switch *ds, struct dsa_notifier_vlan_info *info) { - int port, err; + struct dsa_port *dp; + int err; if (!ds->ops->port_vlan_add) return -EOPNOTSUPP; - for (port = 0; port < ds->num_ports; port++) { - if (dsa_switch_vlan_match(ds, port, info)) { - err = ds->ops->port_vlan_add(ds, port, info->vlan, + dsa_switch_for_each_port(dp, ds) { + if (dsa_port_vlan_match(dp, info)) { + err = ds->ops->port_vlan_add(ds, dp->index, info->vlan, info->extack); if (err) return err; diff --git a/net/dsa/tag_8021q.c b/net/dsa/tag_8021q.c index 935d0264ebd8..8f4e0af2f74f 100644 --- a/net/dsa/tag_8021q.c +++ b/net/dsa/tag_8021q.c @@ -138,12 +138,13 @@ dsa_tag_8021q_vlan_find(struct dsa_8021q_context *ctx, int port, u16 vid) return NULL; } -static int dsa_switch_do_tag_8021q_vlan_add(struct dsa_switch *ds, int port, - u16 vid, u16 flags) +static int dsa_port_do_tag_8021q_vlan_add(struct dsa_port *dp, u16 vid, + u16 flags) { - struct dsa_8021q_context *ctx = ds->tag_8021q_ctx; - struct dsa_port *dp = dsa_to_port(ds, port); + struct dsa_8021q_context *ctx = dp->ds->tag_8021q_ctx; + struct dsa_switch *ds = dp->ds; struct dsa_tag_8021q_vlan *v; + int port = dp->index; int err; /* No need to bother with refcounting for user ports */ @@ -174,12 +175,12 @@ static int dsa_switch_do_tag_8021q_vlan_add(struct dsa_switch *ds, int port, return 0; } -static int dsa_switch_do_tag_8021q_vlan_del(struct dsa_switch *ds, int port, - u16 vid) +static int dsa_port_do_tag_8021q_vlan_del(struct dsa_port *dp, u16 vid) { - struct dsa_8021q_context *ctx = ds->tag_8021q_ctx; - struct dsa_port *dp = dsa_to_port(ds, port); + struct dsa_8021q_context *ctx = dp->ds->tag_8021q_ctx; + struct dsa_switch *ds = dp->ds; struct dsa_tag_8021q_vlan *v; + int port = dp->index; int err; /* No need to bother with refcounting for user ports */ @@ -206,14 +207,16 @@ static int dsa_switch_do_tag_8021q_vlan_del(struct dsa_switch *ds, int port, } static bool -dsa_switch_tag_8021q_vlan_match(struct dsa_switch *ds, int port, - struct dsa_notifier_tag_8021q_vlan_info *info) +dsa_port_tag_8021q_vlan_match(struct dsa_port *dp, + struct dsa_notifier_tag_8021q_vlan_info *info) { - if (dsa_is_dsa_port(ds, port) || dsa_is_cpu_port(ds, port)) + struct dsa_switch *ds = dp->ds; + + if (dsa_port_is_dsa(dp) || dsa_port_is_cpu(dp)) return true; if (ds->dst->index == info->tree_index && ds->index == info->sw_index) - return port == info->port; + return dp->index == info->port; return false; } @@ -221,7 +224,8 @@ dsa_switch_tag_8021q_vlan_match(struct dsa_switch *ds, int port, int dsa_switch_tag_8021q_vlan_add(struct dsa_switch *ds, struct dsa_notifier_tag_8021q_vlan_info *info) { - int port, err; + struct dsa_port *dp; + int err; /* Since we use dsa_broadcast(), there might be other switches in other * trees which don't support tag_8021q, so don't return an error. @@ -231,21 +235,20 @@ int dsa_switch_tag_8021q_vlan_add(struct dsa_switch *ds, if (!ds->ops->tag_8021q_vlan_add || !ds->tag_8021q_ctx) return 0; - for (port = 0; port < ds->num_ports; port++) { - if (dsa_switch_tag_8021q_vlan_match(ds, port, info)) { + dsa_switch_for_each_port(dp, ds) { + if (dsa_port_tag_8021q_vlan_match(dp, info)) { u16 flags = 0; - if (dsa_is_user_port(ds, port)) + if (dsa_port_is_user(dp)) flags |= BRIDGE_VLAN_INFO_UNTAGGED; if (vid_is_dsa_8021q_rxvlan(info->vid) && dsa_8021q_rx_switch_id(info->vid) == ds->index && - dsa_8021q_rx_source_port(info->vid) == port) + dsa_8021q_rx_source_port(info->vid) == dp->index) flags |= BRIDGE_VLAN_INFO_PVID; - err = dsa_switch_do_tag_8021q_vlan_add(ds, port, - info->vid, - flags); + err = dsa_port_do_tag_8021q_vlan_add(dp, info->vid, + flags); if (err) return err; } @@ -257,15 +260,15 @@ int dsa_switch_tag_8021q_vlan_add(struct dsa_switch *ds, int dsa_switch_tag_8021q_vlan_del(struct dsa_switch *ds, struct dsa_notifier_tag_8021q_vlan_info *info) { - int port, err; + struct dsa_port *dp; + int err; if (!ds->ops->tag_8021q_vlan_del || !ds->tag_8021q_ctx) return 0; - for (port = 0; port < ds->num_ports; port++) { - if (dsa_switch_tag_8021q_vlan_match(ds, port, info)) { - err = dsa_switch_do_tag_8021q_vlan_del(ds, port, - info->vid); + dsa_switch_for_each_port(dp, ds) { + if (dsa_port_tag_8021q_vlan_match(dp, info)) { + err = dsa_port_do_tag_8021q_vlan_del(dp, info->vid); if (err) return err; } @@ -321,15 +324,14 @@ int dsa_switch_tag_8021q_vlan_del(struct dsa_switch *ds, * +-+-----+-+-----+-+-----+-+-----+-+ +-+-----+-+-----+-+-----+-+-----+-+ * swp0 swp1 swp2 swp3 swp0 swp1 swp2 swp3 */ -static bool dsa_tag_8021q_bridge_match(struct dsa_switch *ds, int port, - struct dsa_notifier_bridge_info *info) +static bool +dsa_port_tag_8021q_bridge_match(struct dsa_port *dp, + struct dsa_notifier_bridge_info *info) { - struct dsa_port *dp = dsa_to_port(ds, port); - /* Don't match on self */ - if (ds->dst->index == info->tree_index && - ds->index == info->sw_index && - port == info->port) + if (dp->ds->dst->index == info->tree_index && + dp->ds->index == info->sw_index && + dp->index == info->port) return false; if (dsa_port_is_user(dp)) @@ -343,8 +345,9 @@ int dsa_tag_8021q_bridge_join(struct dsa_switch *ds, { struct dsa_switch *targeted_ds; struct dsa_port *targeted_dp; + struct dsa_port *dp; u16 targeted_rx_vid; - int err, port; + int err; if (!ds->tag_8021q_ctx) return 0; @@ -353,11 +356,10 @@ int dsa_tag_8021q_bridge_join(struct dsa_switch *ds, targeted_dp = dsa_to_port(targeted_ds, info->port); targeted_rx_vid = dsa_8021q_rx_vid(targeted_ds, info->port); - for (port = 0; port < ds->num_ports; port++) { - struct dsa_port *dp = dsa_to_port(ds, port); - u16 rx_vid = dsa_8021q_rx_vid(ds, port); + dsa_switch_for_each_port(dp, ds) { + u16 rx_vid = dsa_8021q_rx_vid(ds, dp->index); - if (!dsa_tag_8021q_bridge_match(ds, port, info)) + if (!dsa_port_tag_8021q_bridge_match(dp, info)) continue; /* Install the RX VID of the targeted port in our VLAN table */ @@ -379,8 +381,8 @@ int dsa_tag_8021q_bridge_leave(struct dsa_switch *ds, { struct dsa_switch *targeted_ds; struct dsa_port *targeted_dp; + struct dsa_port *dp; u16 targeted_rx_vid; - int port; if (!ds->tag_8021q_ctx) return 0; @@ -389,11 +391,10 @@ int dsa_tag_8021q_bridge_leave(struct dsa_switch *ds, targeted_dp = dsa_to_port(targeted_ds, info->port); targeted_rx_vid = dsa_8021q_rx_vid(targeted_ds, info->port); - for (port = 0; port < ds->num_ports; port++) { - struct dsa_port *dp = dsa_to_port(ds, port); - u16 rx_vid = dsa_8021q_rx_vid(ds, port); + dsa_switch_for_each_port(dp, ds) { + u16 rx_vid = dsa_8021q_rx_vid(ds, dp->index); - if (!dsa_tag_8021q_bridge_match(ds, port, info)) + if (!dsa_port_tag_8021q_bridge_match(dp, info)) continue; /* Remove the RX VID of the targeted port from our VLAN table */ From 5068887a4fbe11ed239a8a409bbde82a3317882a Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 20 Oct 2021 20:49:54 +0300 Subject: [PATCH 403/440] net: dsa: tag_sja1105: do not open-code dsa_switch_for_each_port Find the remaining iterators over dst->ports that only filter for the ports belonging to a certain switch, and replace those with the dsa_switch_for_each_port helper that we have now. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- net/dsa/tag_sja1105.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c index 2edede9ddac9..8b2d458f72b3 100644 --- a/net/dsa/tag_sja1105.c +++ b/net/dsa/tag_sja1105.c @@ -158,10 +158,7 @@ static u16 sja1105_xmit_tpid(struct dsa_port *dp) * we're sure about that). It may not be on this port though, so we * need to find it. */ - list_for_each_entry(other_dp, &ds->dst->ports, list) { - if (other_dp->ds != ds) - continue; - + dsa_switch_for_each_port(other_dp, ds) { if (!other_dp->bridge_dev) continue; From 992e5cc7be8e693aba1f0ee7905d34c87fd7872a Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 20 Oct 2021 20:49:55 +0300 Subject: [PATCH 404/440] net: dsa: tag_8021q: make dsa_8021q_{rx,tx}_vid take dp as argument Pass a single argument to dsa_8021q_rx_vid and dsa_8021q_tx_vid that contains the necessary information from the two arguments that are currently provided: the switch and the port number. Also rename those functions so that they have a dsa_port_* prefix, since they operate on a struct dsa_port *. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/sja1105/sja1105_vl.c | 3 ++- include/linux/dsa/8021q.h | 5 +++-- net/dsa/tag_8021q.c | 32 ++++++++++++++-------------- net/dsa/tag_ocelot_8021q.c | 2 +- net/dsa/tag_sja1105.c | 4 ++-- 5 files changed, 24 insertions(+), 22 deletions(-) diff --git a/drivers/net/dsa/sja1105/sja1105_vl.c b/drivers/net/dsa/sja1105/sja1105_vl.c index d55572994e1f..f5dca6a9b0f9 100644 --- a/drivers/net/dsa/sja1105/sja1105_vl.c +++ b/drivers/net/dsa/sja1105/sja1105_vl.c @@ -394,7 +394,8 @@ static int sja1105_init_virtual_links(struct sja1105_private *priv, vl_lookup[k].vlanid = rule->key.vl.vid; vl_lookup[k].vlanprior = rule->key.vl.pcp; } else { - u16 vid = dsa_8021q_rx_vid(priv->ds, port); + struct dsa_port *dp = dsa_to_port(priv->ds, port); + u16 vid = dsa_tag_8021q_rx_vid(dp); vl_lookup[k].vlanid = vid; vl_lookup[k].vlanprior = 0; diff --git a/include/linux/dsa/8021q.h b/include/linux/dsa/8021q.h index c7fa4a3498fe..254b165f2b44 100644 --- a/include/linux/dsa/8021q.h +++ b/include/linux/dsa/8021q.h @@ -9,6 +9,7 @@ #include struct dsa_switch; +struct dsa_port; struct sk_buff; struct net_device; @@ -45,9 +46,9 @@ void dsa_tag_8021q_bridge_tx_fwd_unoffload(struct dsa_switch *ds, int port, u16 dsa_8021q_bridge_tx_fwd_offload_vid(int bridge_num); -u16 dsa_8021q_tx_vid(struct dsa_switch *ds, int port); +u16 dsa_tag_8021q_tx_vid(const struct dsa_port *dp); -u16 dsa_8021q_rx_vid(struct dsa_switch *ds, int port); +u16 dsa_tag_8021q_rx_vid(const struct dsa_port *dp); int dsa_8021q_rx_switch_id(u16 vid); diff --git a/net/dsa/tag_8021q.c b/net/dsa/tag_8021q.c index 8f4e0af2f74f..72cac2c0af7b 100644 --- a/net/dsa/tag_8021q.c +++ b/net/dsa/tag_8021q.c @@ -77,22 +77,22 @@ EXPORT_SYMBOL_GPL(dsa_8021q_bridge_tx_fwd_offload_vid); /* Returns the VID to be inserted into the frame from xmit for switch steering * instructions on egress. Encodes switch ID and port ID. */ -u16 dsa_8021q_tx_vid(struct dsa_switch *ds, int port) +u16 dsa_tag_8021q_tx_vid(const struct dsa_port *dp) { - return DSA_8021Q_DIR_TX | DSA_8021Q_SWITCH_ID(ds->index) | - DSA_8021Q_PORT(port); + return DSA_8021Q_DIR_TX | DSA_8021Q_SWITCH_ID(dp->ds->index) | + DSA_8021Q_PORT(dp->index); } -EXPORT_SYMBOL_GPL(dsa_8021q_tx_vid); +EXPORT_SYMBOL_GPL(dsa_tag_8021q_tx_vid); /* Returns the VID that will be installed as pvid for this switch port, sent as * tagged egress towards the CPU port and decoded by the rcv function. */ -u16 dsa_8021q_rx_vid(struct dsa_switch *ds, int port) +u16 dsa_tag_8021q_rx_vid(const struct dsa_port *dp) { - return DSA_8021Q_DIR_RX | DSA_8021Q_SWITCH_ID(ds->index) | - DSA_8021Q_PORT(port); + return DSA_8021Q_DIR_RX | DSA_8021Q_SWITCH_ID(dp->ds->index) | + DSA_8021Q_PORT(dp->index); } -EXPORT_SYMBOL_GPL(dsa_8021q_rx_vid); +EXPORT_SYMBOL_GPL(dsa_tag_8021q_rx_vid); /* Returns the decoded switch ID from the RX VID. */ int dsa_8021q_rx_switch_id(u16 vid) @@ -354,10 +354,10 @@ int dsa_tag_8021q_bridge_join(struct dsa_switch *ds, targeted_ds = dsa_switch_find(info->tree_index, info->sw_index); targeted_dp = dsa_to_port(targeted_ds, info->port); - targeted_rx_vid = dsa_8021q_rx_vid(targeted_ds, info->port); + targeted_rx_vid = dsa_tag_8021q_rx_vid(targeted_dp); dsa_switch_for_each_port(dp, ds) { - u16 rx_vid = dsa_8021q_rx_vid(ds, dp->index); + u16 rx_vid = dsa_tag_8021q_rx_vid(dp); if (!dsa_port_tag_8021q_bridge_match(dp, info)) continue; @@ -389,10 +389,10 @@ int dsa_tag_8021q_bridge_leave(struct dsa_switch *ds, targeted_ds = dsa_switch_find(info->tree_index, info->sw_index); targeted_dp = dsa_to_port(targeted_ds, info->port); - targeted_rx_vid = dsa_8021q_rx_vid(targeted_ds, info->port); + targeted_rx_vid = dsa_tag_8021q_rx_vid(targeted_dp); dsa_switch_for_each_port(dp, ds) { - u16 rx_vid = dsa_8021q_rx_vid(ds, dp->index); + u16 rx_vid = dsa_tag_8021q_rx_vid(dp); if (!dsa_port_tag_8021q_bridge_match(dp, info)) continue; @@ -433,8 +433,8 @@ static int dsa_tag_8021q_port_setup(struct dsa_switch *ds, int port) { struct dsa_8021q_context *ctx = ds->tag_8021q_ctx; struct dsa_port *dp = dsa_to_port(ds, port); - u16 rx_vid = dsa_8021q_rx_vid(ds, port); - u16 tx_vid = dsa_8021q_tx_vid(ds, port); + u16 rx_vid = dsa_tag_8021q_rx_vid(dp); + u16 tx_vid = dsa_tag_8021q_tx_vid(dp); struct net_device *master; int err; @@ -478,8 +478,8 @@ static void dsa_tag_8021q_port_teardown(struct dsa_switch *ds, int port) { struct dsa_8021q_context *ctx = ds->tag_8021q_ctx; struct dsa_port *dp = dsa_to_port(ds, port); - u16 rx_vid = dsa_8021q_rx_vid(ds, port); - u16 tx_vid = dsa_8021q_tx_vid(ds, port); + u16 rx_vid = dsa_tag_8021q_rx_vid(dp); + u16 tx_vid = dsa_tag_8021q_tx_vid(dp); struct net_device *master; /* The CPU port is implicitly configured by diff --git a/net/dsa/tag_ocelot_8021q.c b/net/dsa/tag_ocelot_8021q.c index 3412051981d7..a1919ea5e828 100644 --- a/net/dsa/tag_ocelot_8021q.c +++ b/net/dsa/tag_ocelot_8021q.c @@ -39,9 +39,9 @@ static struct sk_buff *ocelot_xmit(struct sk_buff *skb, struct net_device *netdev) { struct dsa_port *dp = dsa_slave_to_port(netdev); - u16 tx_vid = dsa_8021q_tx_vid(dp->ds, dp->index); u16 queue_mapping = skb_get_queue_mapping(skb); u8 pcp = netdev_txq_to_tc(netdev, queue_mapping); + u16 tx_vid = dsa_tag_8021q_tx_vid(dp); struct ethhdr *hdr = eth_hdr(skb); if (ocelot_ptp_rew_op(skb) || is_link_local_ether_addr(hdr->h_dest)) diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c index 8b2d458f72b3..262c8833a910 100644 --- a/net/dsa/tag_sja1105.c +++ b/net/dsa/tag_sja1105.c @@ -235,9 +235,9 @@ static struct sk_buff *sja1105_xmit(struct sk_buff *skb, struct net_device *netdev) { struct dsa_port *dp = dsa_slave_to_port(netdev); - u16 tx_vid = dsa_8021q_tx_vid(dp->ds, dp->index); u16 queue_mapping = skb_get_queue_mapping(skb); u8 pcp = netdev_txq_to_tc(netdev, queue_mapping); + u16 tx_vid = dsa_tag_8021q_tx_vid(dp); if (skb->offload_fwd_mark) return sja1105_imprecise_xmit(skb, netdev); @@ -263,9 +263,9 @@ static struct sk_buff *sja1110_xmit(struct sk_buff *skb, { struct sk_buff *clone = SJA1105_SKB_CB(skb)->clone; struct dsa_port *dp = dsa_slave_to_port(netdev); - u16 tx_vid = dsa_8021q_tx_vid(dp->ds, dp->index); u16 queue_mapping = skb_get_queue_mapping(skb); u8 pcp = netdev_txq_to_tc(netdev, queue_mapping); + u16 tx_vid = dsa_tag_8021q_tx_vid(dp); __be32 *tx_trailer; __be16 *tx_header; int trailer_pos; From c5c6e589a8c811c7f20024cc22209ae984fc2b98 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 21 Oct 2021 11:59:19 +0200 Subject: [PATCH 405/440] net: stats: Read the statistics in ___gnet_stats_copy_basic() instead of adding. Since the rework, the statistics code always adds up the byte and packet value(s). On 32bit architectures a seqcount_t is used in gnet_stats_basic_sync to ensure that the 64bit values are not modified during the read since two 32bit loads are required. The usage of a seqcount_t requires a lock to ensure that only one writer is active at a time. This lock leads to disabled preemption during the update. The lack of disabling preemption is now creating a warning as reported by Naresh since the query done by gnet_stats_copy_basic() is in preemptible context. For ___gnet_stats_copy_basic() there is no need to disable preemption since the update is performed on stack and can't be modified by another writer. Instead of disabling preemption, to avoid the warning, simply create a read function to just read the values and return as u64. Reported-by: Naresh Kamboju Fixes: 67c9e6270f301 ("net: sched: Protect Qdisc::bstats with u64_stats") Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: David S. Miller --- net/core/gen_stats.c | 43 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 37 insertions(+), 6 deletions(-) diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c index 15c270e22c5e..a10335b4ba2d 100644 --- a/net/core/gen_stats.c +++ b/net/core/gen_stats.c @@ -171,20 +171,51 @@ void gnet_stats_add_basic(struct gnet_stats_basic_sync *bstats, } EXPORT_SYMBOL(gnet_stats_add_basic); +static void gnet_stats_read_basic(u64 *ret_bytes, u64 *ret_packets, + struct gnet_stats_basic_sync __percpu *cpu, + struct gnet_stats_basic_sync *b, bool running) +{ + unsigned int start; + + if (cpu) { + u64 t_bytes = 0, t_packets = 0; + int i; + + for_each_possible_cpu(i) { + struct gnet_stats_basic_sync *bcpu = per_cpu_ptr(cpu, i); + unsigned int start; + u64 bytes, packets; + + do { + start = u64_stats_fetch_begin_irq(&bcpu->syncp); + bytes = u64_stats_read(&bcpu->bytes); + packets = u64_stats_read(&bcpu->packets); + } while (u64_stats_fetch_retry_irq(&bcpu->syncp, start)); + + t_bytes += bytes; + t_packets += packets; + } + *ret_bytes = t_bytes; + *ret_packets = t_packets; + return; + } + do { + if (running) + start = u64_stats_fetch_begin_irq(&b->syncp); + *ret_bytes = u64_stats_read(&b->bytes); + *ret_packets = u64_stats_read(&b->packets); + } while (running && u64_stats_fetch_retry_irq(&b->syncp, start)); +} + static int ___gnet_stats_copy_basic(struct gnet_dump *d, struct gnet_stats_basic_sync __percpu *cpu, struct gnet_stats_basic_sync *b, int type, bool running) { - struct gnet_stats_basic_sync bstats; u64 bstats_bytes, bstats_packets; - gnet_stats_basic_sync_init(&bstats); - gnet_stats_add_basic(&bstats, cpu, b, running); - - bstats_bytes = u64_stats_read(&bstats.bytes); - bstats_packets = u64_stats_read(&bstats.packets); + gnet_stats_read_basic(&bstats_bytes, &bstats_packets, cpu, b, running); if (d->compat_tc_stats && type == TCA_STATS_BASIC) { d->tc_stats.bytes = bstats_bytes; From 50af5969bb22bb8ea757f87934c2478715afa854 Mon Sep 17 00:00:00 2001 From: luo penghao Date: Thu, 21 Oct 2021 06:40:20 +0000 Subject: [PATCH 406/440] net/core: Remove unused assignment operations and variable Although if_info_size is assigned, it has not been used. And the variable should also be deleted. The clang_analyzer complains as follows: net/core/rtnetlink.c:3806: warning: Although the value stored to 'if_info_size' is used in the enclosing expression, the value is never actually read from 'if_info_size'. Reported-by: Zeal Robot Signed-off-by: luo penghao Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 564d24c451af..79477dcae7c2 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3804,9 +3804,8 @@ struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev, struct net *net = dev_net(dev); struct sk_buff *skb; int err = -ENOBUFS; - size_t if_info_size; - skb = nlmsg_new((if_info_size = if_nlmsg_size(dev, 0)), flags); + skb = nlmsg_new(if_nlmsg_size(dev, 0), flags); if (skb == NULL) goto errout; From eb3d6175e4a904f7e4b0216c52810ac6065877df Mon Sep 17 00:00:00 2001 From: Mordechay Goodstein Date: Thu, 21 Oct 2021 16:30:36 +0200 Subject: [PATCH 407/440] mac80211: debugfs: calculate free buffer size correctly In breaking patch buf memory moved from stack to heap and sizeof(buf) change from size of actual memory to size of the pointer to the heap. Fix this by holding a separated variable for allocate size. Fixes: 01f84f0ed3b4 ("mac80211: reduce stack usage in debugfs") Signed-off-by: Mordechay Goodstein Link: https://lore.kernel.org/r/20211021163035.b9ae48c06e27.I6a6ed197110eae28cf4f6e38ce36828a7c136337@changeid Signed-off-by: Johannes Berg --- net/mac80211/debugfs_sta.c | 91 ++++++++++++++++++++------------------ 1 file changed, 47 insertions(+), 44 deletions(-) diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index da22725eca0f..481f01b0f65c 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -153,20 +153,20 @@ static ssize_t sta_aqm_read(struct file *file, char __user *userbuf, rcu_read_lock(); p += scnprintf(p, - bufsz+buf-p, + bufsz + buf - p, "target %uus interval %uus ecn %s\n", codel_time_to_us(sta->cparams.target), codel_time_to_us(sta->cparams.interval), sta->cparams.ecn ? "yes" : "no"); p += scnprintf(p, - bufsz+buf-p, + bufsz + buf - p, "tid ac backlog-bytes backlog-packets new-flows drops marks overlimit collisions tx-bytes tx-packets flags\n"); for (i = 0; i < ARRAY_SIZE(sta->sta.txq); i++) { if (!sta->sta.txq[i]) continue; txqi = to_txq_info(sta->sta.txq[i]); - p += scnprintf(p, bufsz+buf-p, + p += scnprintf(p, bufsz + buf - p, "%d %d %u %u %u %u %u %u %u %u %u 0x%lx(%s%s%s)\n", txqi->txq.tid, txqi->txq.ac, @@ -315,22 +315,23 @@ static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf, size_t count, loff_t *ppos) { char *buf, *p; + ssize_t bufsz = 71 + IEEE80211_NUM_TIDS * 40; int i; struct sta_info *sta = file->private_data; struct tid_ampdu_rx *tid_rx; struct tid_ampdu_tx *tid_tx; ssize_t ret; - buf = kzalloc(71 + IEEE80211_NUM_TIDS * 40, GFP_KERNEL); + buf = kzalloc(bufsz, GFP_KERNEL); if (!buf) return -ENOMEM; p = buf; rcu_read_lock(); - p += scnprintf(p, sizeof(buf) + buf - p, "next dialog_token: %#02x\n", + p += scnprintf(p, bufsz + buf - p, "next dialog_token: %#02x\n", sta->ampdu_mlme.dialog_token_allocator + 1); - p += scnprintf(p, sizeof(buf) + buf - p, + p += scnprintf(p, bufsz + buf - p, "TID\t\tRX\tDTKN\tSSN\t\tTX\tDTKN\tpending\n"); for (i = 0; i < IEEE80211_NUM_TIDS; i++) { @@ -340,21 +341,21 @@ static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf, tid_tx = rcu_dereference(sta->ampdu_mlme.tid_tx[i]); tid_rx_valid = test_bit(i, sta->ampdu_mlme.agg_session_valid); - p += scnprintf(p, sizeof(buf) + buf - p, "%02d", i); - p += scnprintf(p, sizeof(buf) + buf - p, "\t\t%x", + p += scnprintf(p, bufsz + buf - p, "%02d", i); + p += scnprintf(p, bufsz + buf - p, "\t\t%x", tid_rx_valid); - p += scnprintf(p, sizeof(buf) + buf - p, "\t%#.2x", + p += scnprintf(p, bufsz + buf - p, "\t%#.2x", tid_rx_valid ? sta->ampdu_mlme.tid_rx_token[i] : 0); - p += scnprintf(p, sizeof(buf) + buf - p, "\t%#.3x", + p += scnprintf(p, bufsz + buf - p, "\t%#.3x", tid_rx ? tid_rx->ssn : 0); - p += scnprintf(p, sizeof(buf) + buf - p, "\t\t%x", !!tid_tx); - p += scnprintf(p, sizeof(buf) + buf - p, "\t%#.2x", + p += scnprintf(p, bufsz + buf - p, "\t\t%x", !!tid_tx); + p += scnprintf(p, bufsz + buf - p, "\t%#.2x", tid_tx ? tid_tx->dialog_token : 0); - p += scnprintf(p, sizeof(buf) + buf - p, "\t%03d", + p += scnprintf(p, bufsz + buf - p, "\t%03d", tid_tx ? skb_queue_len(&tid_tx->pending) : 0); - p += scnprintf(p, sizeof(buf) + buf - p, "\n"); + p += scnprintf(p, bufsz + buf - p, "\n"); } rcu_read_unlock(); @@ -444,19 +445,20 @@ static ssize_t sta_ht_capa_read(struct file *file, char __user *userbuf, } while (0) char *buf, *p; int i; + ssize_t bufsz = 512; struct sta_info *sta = file->private_data; struct ieee80211_sta_ht_cap *htc = &sta->sta.ht_cap; ssize_t ret; - buf = kzalloc(512, GFP_KERNEL); + buf = kzalloc(bufsz, GFP_KERNEL); if (!buf) return -ENOMEM; p = buf; - p += scnprintf(p, sizeof(buf) + buf - p, "ht %ssupported\n", + p += scnprintf(p, bufsz + buf - p, "ht %ssupported\n", htc->ht_supported ? "" : "not "); if (htc->ht_supported) { - p += scnprintf(p, sizeof(buf)+buf-p, "cap: %#.4x\n", htc->cap); + p += scnprintf(p, bufsz + buf - p, "cap: %#.4x\n", htc->cap); PRINT_HT_CAP((htc->cap & BIT(0)), "RX LDPC"); PRINT_HT_CAP((htc->cap & BIT(1)), "HT20/HT40"); @@ -498,23 +500,23 @@ static ssize_t sta_ht_capa_read(struct file *file, char __user *userbuf, PRINT_HT_CAP((htc->cap & BIT(15)), "L-SIG TXOP protection"); - p += scnprintf(p, sizeof(buf)+buf-p, "ampdu factor/density: %d/%d\n", + p += scnprintf(p, bufsz + buf - p, "ampdu factor/density: %d/%d\n", htc->ampdu_factor, htc->ampdu_density); - p += scnprintf(p, sizeof(buf)+buf-p, "MCS mask:"); + p += scnprintf(p, bufsz + buf - p, "MCS mask:"); for (i = 0; i < IEEE80211_HT_MCS_MASK_LEN; i++) - p += scnprintf(p, sizeof(buf)+buf-p, " %.2x", + p += scnprintf(p, bufsz + buf - p, " %.2x", htc->mcs.rx_mask[i]); - p += scnprintf(p, sizeof(buf)+buf-p, "\n"); + p += scnprintf(p, bufsz + buf - p, "\n"); /* If not set this is meaningless */ if (le16_to_cpu(htc->mcs.rx_highest)) { - p += scnprintf(p, sizeof(buf)+buf-p, + p += scnprintf(p, bufsz + buf - p, "MCS rx highest: %d Mbps\n", le16_to_cpu(htc->mcs.rx_highest)); } - p += scnprintf(p, sizeof(buf)+buf-p, "MCS tx params: %x\n", + p += scnprintf(p, bufsz + buf - p, "MCS tx params: %x\n", htc->mcs.tx_params); } @@ -531,56 +533,57 @@ static ssize_t sta_vht_capa_read(struct file *file, char __user *userbuf, struct sta_info *sta = file->private_data; struct ieee80211_sta_vht_cap *vhtc = &sta->sta.vht_cap; ssize_t ret; + ssize_t bufsz = 512; - buf = kzalloc(512, GFP_KERNEL); + buf = kzalloc(bufsz, GFP_KERNEL); if (!buf) return -ENOMEM; p = buf; - p += scnprintf(p, sizeof(buf) + buf - p, "VHT %ssupported\n", + p += scnprintf(p, bufsz + buf - p, "VHT %ssupported\n", vhtc->vht_supported ? "" : "not "); if (vhtc->vht_supported) { - p += scnprintf(p, sizeof(buf) + buf - p, "cap: %#.8x\n", + p += scnprintf(p, bufsz + buf - p, "cap: %#.8x\n", vhtc->cap); #define PFLAG(a, b) \ do { \ if (vhtc->cap & IEEE80211_VHT_CAP_ ## a) \ - p += scnprintf(p, sizeof(buf) + buf - p, \ + p += scnprintf(p, bufsz + buf - p, \ "\t\t%s\n", b); \ } while (0) switch (vhtc->cap & 0x3) { case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_3895: - p += scnprintf(p, sizeof(buf) + buf - p, + p += scnprintf(p, bufsz + buf - p, "\t\tMAX-MPDU-3895\n"); break; case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_7991: - p += scnprintf(p, sizeof(buf) + buf - p, + p += scnprintf(p, bufsz + buf - p, "\t\tMAX-MPDU-7991\n"); break; case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454: - p += scnprintf(p, sizeof(buf) + buf - p, + p += scnprintf(p, bufsz + buf - p, "\t\tMAX-MPDU-11454\n"); break; default: - p += scnprintf(p, sizeof(buf) + buf - p, + p += scnprintf(p, bufsz + buf - p, "\t\tMAX-MPDU-UNKNOWN\n"); } switch (vhtc->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) { case 0: - p += scnprintf(p, sizeof(buf) + buf - p, + p += scnprintf(p, bufsz + buf - p, "\t\t80Mhz\n"); break; case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ: - p += scnprintf(p, sizeof(buf) + buf - p, + p += scnprintf(p, bufsz + buf - p, "\t\t160Mhz\n"); break; case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ: - p += scnprintf(p, sizeof(buf) + buf - p, + p += scnprintf(p, bufsz + buf - p, "\t\t80+80Mhz\n"); break; default: - p += scnprintf(p, sizeof(buf) + buf - p, + p += scnprintf(p, bufsz + buf - p, "\t\tUNKNOWN-MHZ: 0x%x\n", (vhtc->cap >> 2) & 0x3); } @@ -588,15 +591,15 @@ static ssize_t sta_vht_capa_read(struct file *file, char __user *userbuf, PFLAG(SHORT_GI_80, "SHORT-GI-80"); PFLAG(SHORT_GI_160, "SHORT-GI-160"); PFLAG(TXSTBC, "TXSTBC"); - p += scnprintf(p, sizeof(buf) + buf - p, + p += scnprintf(p, bufsz + buf - p, "\t\tRXSTBC_%d\n", (vhtc->cap >> 8) & 0x7); PFLAG(SU_BEAMFORMER_CAPABLE, "SU-BEAMFORMER-CAPABLE"); PFLAG(SU_BEAMFORMEE_CAPABLE, "SU-BEAMFORMEE-CAPABLE"); - p += scnprintf(p, sizeof(buf) + buf - p, + p += scnprintf(p, bufsz + buf - p, "\t\tBEAMFORMEE-STS: 0x%x\n", (vhtc->cap & IEEE80211_VHT_CAP_BEAMFORMEE_STS_MASK) >> IEEE80211_VHT_CAP_BEAMFORMEE_STS_SHIFT); - p += scnprintf(p, sizeof(buf) + buf - p, + p += scnprintf(p, bufsz + buf - p, "\t\tSOUNDING-DIMENSIONS: 0x%x\n", (vhtc->cap & IEEE80211_VHT_CAP_SOUNDING_DIMENSIONS_MASK) >> IEEE80211_VHT_CAP_SOUNDING_DIMENSIONS_SHIFT); @@ -604,28 +607,28 @@ static ssize_t sta_vht_capa_read(struct file *file, char __user *userbuf, PFLAG(MU_BEAMFORMEE_CAPABLE, "MU-BEAMFORMEE-CAPABLE"); PFLAG(VHT_TXOP_PS, "TXOP-PS"); PFLAG(HTC_VHT, "HTC-VHT"); - p += scnprintf(p, sizeof(buf) + buf - p, + p += scnprintf(p, bufsz + buf - p, "\t\tMPDU-LENGTH-EXPONENT: 0x%x\n", (vhtc->cap & IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK) >> IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_SHIFT); PFLAG(VHT_LINK_ADAPTATION_VHT_UNSOL_MFB, "LINK-ADAPTATION-VHT-UNSOL-MFB"); - p += scnprintf(p, sizeof(buf) + buf - p, + p += scnprintf(p, bufsz + buf - p, "\t\tLINK-ADAPTATION-VHT-MRQ-MFB: 0x%x\n", (vhtc->cap & IEEE80211_VHT_CAP_VHT_LINK_ADAPTATION_VHT_MRQ_MFB) >> 26); PFLAG(RX_ANTENNA_PATTERN, "RX-ANTENNA-PATTERN"); PFLAG(TX_ANTENNA_PATTERN, "TX-ANTENNA-PATTERN"); - p += scnprintf(p, sizeof(buf)+buf-p, "RX MCS: %.4x\n", + p += scnprintf(p, bufsz + buf - p, "RX MCS: %.4x\n", le16_to_cpu(vhtc->vht_mcs.rx_mcs_map)); if (vhtc->vht_mcs.rx_highest) - p += scnprintf(p, sizeof(buf)+buf-p, + p += scnprintf(p, bufsz + buf - p, "MCS RX highest: %d Mbps\n", le16_to_cpu(vhtc->vht_mcs.rx_highest)); - p += scnprintf(p, sizeof(buf)+buf-p, "TX MCS: %.4x\n", + p += scnprintf(p, bufsz + buf - p, "TX MCS: %.4x\n", le16_to_cpu(vhtc->vht_mcs.tx_mcs_map)); if (vhtc->vht_mcs.tx_highest) - p += scnprintf(p, sizeof(buf)+buf-p, + p += scnprintf(p, bufsz + buf - p, "MCS TX highest: %d Mbps\n", le16_to_cpu(vhtc->vht_mcs.tx_highest)); #undef PFLAG From e76219e675ebebebdb054bb57b0c6236c27755fe Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 18 Oct 2021 16:50:17 -0700 Subject: [PATCH 408/440] wireless: mac80211_hwsim: use eth_hw_addr_set() Commit 406f42fa0d3c ("net-next: When a bond have a massive amount of VLANs...") introduced a rbtree for faster Ethernet address look up. To maintain netdev->dev_addr in this tree we need to make all the writes to it got through appropriate helpers. Signed-off-by: Jakub Kicinski Link: https://lore.kernel.org/r/20211018235021.1279697-12-kuba@kernel.org Signed-off-by: Johannes Berg --- drivers/net/wireless/mac80211_hwsim.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 2d09e998d643..dba10d5a9bfd 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -3653,13 +3653,16 @@ static const struct net_device_ops hwsim_netdev_ops = { static void hwsim_mon_setup(struct net_device *dev) { + u8 addr[ETH_ALEN]; + dev->netdev_ops = &hwsim_netdev_ops; dev->needs_free_netdev = true; ether_setup(dev); dev->priv_flags |= IFF_NO_QUEUE; dev->type = ARPHRD_IEEE80211_RADIOTAP; - eth_zero_addr(dev->dev_addr); - dev->dev_addr[0] = 0x12; + eth_zero_addr(addr); + addr[0] = 0x12; + eth_hw_addr_set(dev, addr); } static struct mac80211_hwsim_data *get_hwsim_data_ref_from_addr(const u8 *addr) From de1352ead8a8cb4367a19ac23d9deaaa23befc3e Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 19 Oct 2021 09:28:15 -0700 Subject: [PATCH 409/440] mac80211: use eth_hw_addr_set() Commit 406f42fa0d3c ("net-next: When a bond have a massive amount of VLANs...") introduced a rbtree for faster Ethernet address look up. To maintain netdev->dev_addr in this tree we need to make all the writes to it got through appropriate helpers. Convert mac80211 from memcpy(... ETH_ADDR) to eth_hw_addr_set(): @@ expression dev, np; @@ - memcpy(dev->dev_addr, np, ETH_ALEN) + eth_hw_addr_set(dev, np) Signed-off-by: Jakub Kicinski Link: https://lore.kernel.org/r/20211019162816.1384077-1-kuba@kernel.org Signed-off-by: Johannes Berg --- net/mac80211/iface.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 691b983b762e..9a2145c8192b 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1137,9 +1137,7 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) * this interface, if it has the special null one. */ if (dev && is_zero_ether_addr(dev->dev_addr)) { - memcpy(dev->dev_addr, - local->hw.wiphy->perm_addr, - ETH_ALEN); + eth_hw_addr_set(dev, local->hw.wiphy->perm_addr); memcpy(dev->perm_addr, dev->dev_addr, ETH_ALEN); if (!is_valid_ether_addr(dev->dev_addr)) { @@ -1993,9 +1991,9 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, ieee80211_assign_perm_addr(local, ndev->perm_addr, type); if (is_valid_ether_addr(params->macaddr)) - memcpy(ndev->dev_addr, params->macaddr, ETH_ALEN); + eth_hw_addr_set(ndev, params->macaddr); else - memcpy(ndev->dev_addr, ndev->perm_addr, ETH_ALEN); + eth_hw_addr_set(ndev, ndev->perm_addr); SET_NETDEV_DEV(ndev, wiphy_dev(local->hw.wiphy)); /* don't use IEEE80211_DEV_TO_SUB_IF -- it checks too much */ From 10de5a599f92ef776886073a33ca7bbbe6b14d37 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 19 Oct 2021 09:28:16 -0700 Subject: [PATCH 410/440] cfg80211: prepare for const netdev->dev_addr netdev->dev_addr will be const soon. All callers of wdev_address() can take const already. Signed-off-by: Jakub Kicinski Link: https://lore.kernel.org/r/20211019162816.1384077-2-kuba@kernel.org Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index e9e313aa991f..02737dde06d9 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -5558,7 +5558,7 @@ struct wireless_dev { unsigned long unprot_beacon_reported; }; -static inline u8 *wdev_address(struct wireless_dev *wdev) +static inline const u8 *wdev_address(struct wireless_dev *wdev) { if (wdev->netdev) return wdev->netdev->dev_addr; From 8223ac199a3849257e86ec27865dc63f034b1cf1 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 1 Oct 2021 21:11:08 +0200 Subject: [PATCH 411/440] mac80211: fix memory leaks with element parsing My previous commit 5d24828d05f3 ("mac80211: always allocate struct ieee802_11_elems") had a few bugs and leaked the new allocated struct in a few error cases, fix that. Fixes: 5d24828d05f3 ("mac80211: always allocate struct ieee802_11_elems") Signed-off-by: Johannes Berg Link: https://lore.kernel.org/r/20211001211108.9839928e42e0.Ib81ca187d3d3af7ed1bfeac2e00d08a4637c8025@changeid Signed-off-by: Johannes Berg --- net/mac80211/agg-rx.c | 3 ++- net/mac80211/ibss.c | 10 +++++----- net/mac80211/mlme.c | 36 ++++++++++++++++++------------------ 3 files changed, 25 insertions(+), 24 deletions(-) diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index 94c65def102c..470ff0ce3dc7 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -498,13 +498,14 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, elems = ieee802_11_parse_elems(mgmt->u.action.u.addba_req.variable, ies_len, true, mgmt->bssid, NULL); if (!elems || elems->parse_error) - return; + goto free; } __ieee80211_start_rx_ba_session(sta, dialog_token, timeout, start_seq_num, ba_policy, tid, buf_size, true, false, elems ? elems->addba_ext_ie : NULL); +free: kfree(elems); } diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 66b00046f0c2..0416c4d22292 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -1659,11 +1659,11 @@ void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, mgmt->u.action.u.chan_switch.variable, ies_len, true, mgmt->bssid, NULL); - if (!elems || elems->parse_error) - break; - - ieee80211_rx_mgmt_spectrum_mgmt(sdata, mgmt, skb->len, - rx_status, elems); + if (elems && !elems->parse_error) + ieee80211_rx_mgmt_spectrum_mgmt(sdata, mgmt, + skb->len, + rx_status, + elems); kfree(elems); break; } diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 0ec183a92a01..40b29cfb7cfe 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -3353,8 +3353,10 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, bss_ies = kmemdup(ies, sizeof(*ies) + ies->len, GFP_ATOMIC); rcu_read_unlock(); - if (!bss_ies) - return false; + if (!bss_ies) { + ret = false; + goto out; + } bss_elems = ieee802_11_parse_elems(bss_ies->data, bss_ies->len, false, mgmt->bssid, @@ -4331,13 +4333,11 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, mgmt->u.action.u.chan_switch.variable, ies_len, true, mgmt->bssid, NULL); - if (!elems || elems->parse_error) - break; - - ieee80211_sta_process_chanswitch(sdata, - rx_status->mactime, - rx_status->device_timestamp, - elems, false); + if (elems && !elems->parse_error) + ieee80211_sta_process_chanswitch(sdata, + rx_status->mactime, + rx_status->device_timestamp, + elems, false); kfree(elems); } else if (mgmt->u.action.category == WLAN_CATEGORY_PUBLIC) { struct ieee802_11_elems *elems; @@ -4357,17 +4357,17 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, mgmt->u.action.u.ext_chan_switch.variable, ies_len, true, mgmt->bssid, NULL); - if (!elems || elems->parse_error) - break; + if (elems && !elems->parse_error) { + /* for the handling code pretend it was an IE */ + elems->ext_chansw_ie = + &mgmt->u.action.u.ext_chan_switch.data; - /* for the handling code pretend this was also an IE */ - elems->ext_chansw_ie = - &mgmt->u.action.u.ext_chan_switch.data; + ieee80211_sta_process_chanswitch(sdata, + rx_status->mactime, + rx_status->device_timestamp, + elems, false); + } - ieee80211_sta_process_chanswitch(sdata, - rx_status->mactime, - rx_status->device_timestamp, - elems, false); kfree(elems); } break; From ba9d0db9a5ccf96029bd16f08e8afbd63590adbb Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 30 Sep 2021 13:11:28 +0200 Subject: [PATCH 412/440] mac80211: fils: use cfg80211_find_ext_elem() Replace the use of cfg80211_find_ext_ie() with the more structured cfg80211_find_ext_elem(). Link: https://lore.kernel.org/r/20210930131130.17ecf37f0605.I853c2f9c2117a713deca9b8deb3552796d98ffac@changeid Signed-off-by: Johannes Berg --- net/mac80211/fils_aead.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/net/mac80211/fils_aead.c b/net/mac80211/fils_aead.c index a13ae148937e..e1d4cfd99128 100644 --- a/net/mac80211/fils_aead.c +++ b/net/mac80211/fils_aead.c @@ -219,7 +219,8 @@ int fils_encrypt_assoc_req(struct sk_buff *skb, { struct ieee80211_mgmt *mgmt = (void *)skb->data; u8 *capab, *ies, *encr; - const u8 *addr[5 + 1], *session; + const u8 *addr[5 + 1]; + const struct element *session; size_t len[5 + 1]; size_t crypt_len; @@ -231,12 +232,12 @@ int fils_encrypt_assoc_req(struct sk_buff *skb, ies = mgmt->u.assoc_req.variable; } - session = cfg80211_find_ext_ie(WLAN_EID_EXT_FILS_SESSION, - ies, skb->data + skb->len - ies); - if (!session || session[1] != 1 + 8) + session = cfg80211_find_ext_elem(WLAN_EID_EXT_FILS_SESSION, + ies, skb->data + skb->len - ies); + if (!session || session->datalen != 1 + 8) return -EINVAL; /* encrypt after FILS Session element */ - encr = (u8 *)session + 2 + 1 + 8; + encr = (u8 *)session->data + 1 + 8; /* AES-SIV AAD vectors */ @@ -270,7 +271,8 @@ int fils_decrypt_assoc_resp(struct ieee80211_sub_if_data *sdata, { struct ieee80211_mgmt *mgmt = (void *)frame; u8 *capab, *ies, *encr; - const u8 *addr[5 + 1], *session; + const u8 *addr[5 + 1]; + const struct element *session; size_t len[5 + 1]; int res; size_t crypt_len; @@ -280,16 +282,16 @@ int fils_decrypt_assoc_resp(struct ieee80211_sub_if_data *sdata, capab = (u8 *)&mgmt->u.assoc_resp.capab_info; ies = mgmt->u.assoc_resp.variable; - session = cfg80211_find_ext_ie(WLAN_EID_EXT_FILS_SESSION, - ies, frame + *frame_len - ies); - if (!session || session[1] != 1 + 8) { + session = cfg80211_find_ext_elem(WLAN_EID_EXT_FILS_SESSION, + ies, frame + *frame_len - ies); + if (!session || session->datalen != 1 + 8) { mlme_dbg(sdata, "No (valid) FILS Session element in (Re)Association Response frame from %pM", mgmt->sa); return -EINVAL; } /* decrypt after FILS Session element */ - encr = (u8 *)session + 2 + 1 + 8; + encr = (u8 *)session->data + 1 + 8; /* AES-SIV AAD vectors */ From 153e2a11c99b7382597614ddb995618c22aa1b5f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 30 Sep 2021 13:11:29 +0200 Subject: [PATCH 413/440] nl80211: use element finding functions The element finding functions are safer, so use them instead of the "find_ie" functions. Link: https://lore.kernel.org/r/20210930131130.b838f139cc8e.I2b641262d3fc6e0d498719bf343fdc1c0833b845@changeid Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 44 +++++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 3f37e4d5c5d2..e72efe146d58 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -5338,21 +5338,21 @@ nl80211_parse_unsol_bcast_probe_resp(struct cfg80211_registered_device *rdev, } static void nl80211_check_ap_rate_selectors(struct cfg80211_ap_settings *params, - const u8 *rates) + const struct element *rates) { int i; if (!rates) return; - for (i = 0; i < rates[1]; i++) { - if (rates[2 + i] == BSS_MEMBERSHIP_SELECTOR_HT_PHY) + for (i = 0; i < rates->datalen; i++) { + if (rates->data[i] == BSS_MEMBERSHIP_SELECTOR_HT_PHY) params->ht_required = true; - if (rates[2 + i] == BSS_MEMBERSHIP_SELECTOR_VHT_PHY) + if (rates->data[i] == BSS_MEMBERSHIP_SELECTOR_VHT_PHY) params->vht_required = true; - if (rates[2 + i] == BSS_MEMBERSHIP_SELECTOR_HE_PHY) + if (rates->data[i] == BSS_MEMBERSHIP_SELECTOR_HE_PHY) params->he_required = true; - if (rates[2 + i] == BSS_MEMBERSHIP_SELECTOR_SAE_H2E) + if (rates->data[i] == BSS_MEMBERSHIP_SELECTOR_SAE_H2E) params->sae_h2e_required = true; } } @@ -5367,27 +5367,27 @@ static void nl80211_calculate_ap_params(struct cfg80211_ap_settings *params) const struct cfg80211_beacon_data *bcn = ¶ms->beacon; size_t ies_len = bcn->tail_len; const u8 *ies = bcn->tail; - const u8 *rates; - const u8 *cap; + const struct element *rates; + const struct element *cap; - rates = cfg80211_find_ie(WLAN_EID_SUPP_RATES, ies, ies_len); + rates = cfg80211_find_elem(WLAN_EID_SUPP_RATES, ies, ies_len); nl80211_check_ap_rate_selectors(params, rates); - rates = cfg80211_find_ie(WLAN_EID_EXT_SUPP_RATES, ies, ies_len); + rates = cfg80211_find_elem(WLAN_EID_EXT_SUPP_RATES, ies, ies_len); nl80211_check_ap_rate_selectors(params, rates); - cap = cfg80211_find_ie(WLAN_EID_HT_CAPABILITY, ies, ies_len); - if (cap && cap[1] >= sizeof(*params->ht_cap)) - params->ht_cap = (void *)(cap + 2); - cap = cfg80211_find_ie(WLAN_EID_VHT_CAPABILITY, ies, ies_len); - if (cap && cap[1] >= sizeof(*params->vht_cap)) - params->vht_cap = (void *)(cap + 2); - cap = cfg80211_find_ext_ie(WLAN_EID_EXT_HE_CAPABILITY, ies, ies_len); - if (cap && cap[1] >= sizeof(*params->he_cap) + 1) - params->he_cap = (void *)(cap + 3); - cap = cfg80211_find_ext_ie(WLAN_EID_EXT_HE_OPERATION, ies, ies_len); - if (cap && cap[1] >= sizeof(*params->he_oper) + 1) - params->he_oper = (void *)(cap + 3); + cap = cfg80211_find_elem(WLAN_EID_HT_CAPABILITY, ies, ies_len); + if (cap && cap->datalen >= sizeof(*params->ht_cap)) + params->ht_cap = (void *)cap->data; + cap = cfg80211_find_elem(WLAN_EID_VHT_CAPABILITY, ies, ies_len); + if (cap && cap->datalen >= sizeof(*params->vht_cap)) + params->vht_cap = (void *)cap->data; + cap = cfg80211_find_ext_elem(WLAN_EID_EXT_HE_CAPABILITY, ies, ies_len); + if (cap && cap->datalen >= sizeof(*params->he_cap) + 1) + params->he_cap = (void *)(cap->data + 1); + cap = cfg80211_find_ext_elem(WLAN_EID_EXT_HE_OPERATION, ies, ies_len); + if (cap && cap->datalen >= sizeof(*params->he_oper) + 1) + params->he_oper = (void *)(cap->data + 1); } static bool nl80211_get_ap_channel(struct cfg80211_registered_device *rdev, From a3eca81792977954b5beb19bc05fd1b8f859b856 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 30 Sep 2021 13:11:30 +0200 Subject: [PATCH 414/440] cfg80211: scan: use element finding functions in easy cases There are a few easy cases where we only check for NULL or have just simple use of the result, this can be done with the element finding functions instead. Link: https://lore.kernel.org/r/20210930131130.f27c8a7ec264.Iadb03c4307e9216e080ce513e8ad4048cd020b25@changeid Signed-off-by: Johannes Berg --- net/wireless/scan.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 11c68b159324..0273f20291c4 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -383,7 +383,7 @@ static bool is_bss(struct cfg80211_bss *a, const u8 *bssid, const u8 *ssid, size_t ssid_len) { const struct cfg80211_bss_ies *ies; - const u8 *ssidie; + const struct element *ssid_elem; if (bssid && !ether_addr_equal(a->bssid, bssid)) return false; @@ -394,12 +394,12 @@ static bool is_bss(struct cfg80211_bss *a, const u8 *bssid, ies = rcu_access_pointer(a->ies); if (!ies) return false; - ssidie = cfg80211_find_ie(WLAN_EID_SSID, ies->data, ies->len); - if (!ssidie) + ssid_elem = cfg80211_find_elem(WLAN_EID_SSID, ies->data, ies->len); + if (!ssid_elem) return false; - if (ssidie[1] != ssid_len) + if (ssid_elem->datalen != ssid_len) return false; - return memcmp(ssidie + 2, ssid, ssid_len) == 0; + return memcmp(ssid_elem->data, ssid, ssid_len) == 0; } static int @@ -2072,12 +2072,12 @@ static void cfg80211_parse_mbssid_data(struct wiphy *wiphy, if (!non_tx_data) return; - if (!cfg80211_find_ie(WLAN_EID_MULTIPLE_BSSID, ie, ielen)) + if (!cfg80211_find_elem(WLAN_EID_MULTIPLE_BSSID, ie, ielen)) return; if (!wiphy->support_mbssid) return; if (wiphy->support_only_he_mbssid && - !cfg80211_find_ext_ie(WLAN_EID_EXT_HE_CAPABILITY, ie, ielen)) + !cfg80211_find_ext_elem(WLAN_EID_EXT_HE_CAPABILITY, ie, ielen)) return; new_ie = kmalloc(IEEE80211_MAX_DATA_LEN, gfp); @@ -2444,10 +2444,10 @@ cfg80211_inform_bss_frame_data(struct wiphy *wiphy, res = cfg80211_inform_single_bss_frame_data(wiphy, data, mgmt, len, gfp); if (!res || !wiphy->support_mbssid || - !cfg80211_find_ie(WLAN_EID_MULTIPLE_BSSID, ie, ielen)) + !cfg80211_find_elem(WLAN_EID_MULTIPLE_BSSID, ie, ielen)) return res; if (wiphy->support_only_he_mbssid && - !cfg80211_find_ext_ie(WLAN_EID_EXT_HE_CAPABILITY, ie, ielen)) + !cfg80211_find_ext_elem(WLAN_EID_EXT_HE_CAPABILITY, ie, ielen)) return res; non_tx_data.tx_bss = res; From f2622138f9352a2be7a45dfe430c88bbfe6218d3 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 30 Sep 2021 13:11:31 +0200 Subject: [PATCH 415/440] mac80211: use ieee80211_bss_get_elem() in most places There are a number of uses of ieee80211_bss_get_ie(), replace most of them with ieee80211_bss_get_elem(). Link: https://lore.kernel.org/r/20210930131130.9a413f12a151.I0699ba7e48c9d88dbbfa3107cf4d34a8345d02a0@changeid Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 43 ++++++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 40b29cfb7cfe..ac32a1998a7e 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2514,7 +2514,7 @@ static void ieee80211_mlme_send_probe_req(struct ieee80211_sub_if_data *sdata, static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - const u8 *ssid; + const struct element *ssid; u8 *dst = ifmgd->associated->bssid; u8 unicast_limit = max(1, max_probe_tries - 3); struct sta_info *sta; @@ -2551,14 +2551,14 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata) int ssid_len; rcu_read_lock(); - ssid = ieee80211_bss_get_ie(ifmgd->associated, WLAN_EID_SSID); + ssid = ieee80211_bss_get_elem(ifmgd->associated, WLAN_EID_SSID); if (WARN_ON_ONCE(ssid == NULL)) ssid_len = 0; else - ssid_len = ssid[1]; + ssid_len = ssid->datalen; ieee80211_mlme_send_probe_req(sdata, sdata->vif.addr, dst, - ssid + 2, ssid_len, + ssid->data, ssid_len, ifmgd->associated->channel); rcu_read_unlock(); } @@ -2634,7 +2634,7 @@ struct sk_buff *ieee80211_ap_probereq_get(struct ieee80211_hw *hw, struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct cfg80211_bss *cbss; struct sk_buff *skb; - const u8 *ssid; + const struct element *ssid; int ssid_len; if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_STATION)) @@ -2652,16 +2652,17 @@ struct sk_buff *ieee80211_ap_probereq_get(struct ieee80211_hw *hw, return NULL; rcu_read_lock(); - ssid = ieee80211_bss_get_ie(cbss, WLAN_EID_SSID); - if (WARN_ONCE(!ssid || ssid[1] > IEEE80211_MAX_SSID_LEN, - "invalid SSID element (len=%d)", ssid ? ssid[1] : -1)) + ssid = ieee80211_bss_get_elem(cbss, WLAN_EID_SSID); + if (WARN_ONCE(!ssid || ssid->datalen > IEEE80211_MAX_SSID_LEN, + "invalid SSID element (len=%d)", + ssid ? ssid->datalen : -1)) ssid_len = 0; else - ssid_len = ssid[1]; + ssid_len = ssid->datalen; skb = ieee80211_build_probe_req(sdata, sdata->vif.addr, cbss->bssid, (u32) -1, cbss->channel, - ssid + 2, ssid_len, + ssid->data, ssid_len, NULL, 0, IEEE80211_PROBE_FLAG_DIRECTED); rcu_read_unlock(); @@ -5538,7 +5539,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, const struct cfg80211_bss_ies *beacon_ies; struct ieee80211_supported_band *sband; struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf; - const u8 *ssidie, *ht_ie, *vht_ie; + const struct element *ssid_elem, *ht_elem, *vht_elem; int i, err; bool override = false; @@ -5547,14 +5548,14 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, return -ENOMEM; rcu_read_lock(); - ssidie = ieee80211_bss_get_ie(req->bss, WLAN_EID_SSID); - if (!ssidie || ssidie[1] > sizeof(assoc_data->ssid)) { + ssid_elem = ieee80211_bss_get_elem(req->bss, WLAN_EID_SSID); + if (!ssid_elem || ssid_elem->datalen > sizeof(assoc_data->ssid)) { rcu_read_unlock(); kfree(assoc_data); return -EINVAL; } - memcpy(assoc_data->ssid, ssidie + 2, ssidie[1]); - assoc_data->ssid_len = ssidie[1]; + memcpy(assoc_data->ssid, ssid_elem->data, ssid_elem->datalen); + assoc_data->ssid_len = ssid_elem->datalen; memcpy(bss_conf->ssid, assoc_data->ssid, assoc_data->ssid_len); bss_conf->ssid_len = assoc_data->ssid_len; rcu_read_unlock(); @@ -5668,15 +5669,15 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, assoc_data->supp_rates_len = bss->supp_rates_len; rcu_read_lock(); - ht_ie = ieee80211_bss_get_ie(req->bss, WLAN_EID_HT_OPERATION); - if (ht_ie && ht_ie[1] >= sizeof(struct ieee80211_ht_operation)) + ht_elem = ieee80211_bss_get_elem(req->bss, WLAN_EID_HT_OPERATION); + if (ht_elem && ht_elem->datalen >= sizeof(struct ieee80211_ht_operation)) assoc_data->ap_ht_param = - ((struct ieee80211_ht_operation *)(ht_ie + 2))->ht_param; + ((struct ieee80211_ht_operation *)(ht_elem->data))->ht_param; else if (!is_6ghz) ifmgd->flags |= IEEE80211_STA_DISABLE_HT; - vht_ie = ieee80211_bss_get_ie(req->bss, WLAN_EID_VHT_CAPABILITY); - if (vht_ie && vht_ie[1] >= sizeof(struct ieee80211_vht_cap)) - memcpy(&assoc_data->ap_vht_cap, vht_ie + 2, + vht_elem = ieee80211_bss_get_elem(req->bss, WLAN_EID_VHT_CAPABILITY); + if (vht_elem && vht_elem->datalen >= sizeof(struct ieee80211_vht_cap)) + memcpy(&assoc_data->ap_vht_cap, vht_elem->data, sizeof(struct ieee80211_vht_cap)); else if (is_5ghz) ifmgd->flags |= IEEE80211_STA_DISABLE_VHT | From 97981d89a1d47942a2d7517631d2400b99fe3f93 Mon Sep 17 00:00:00 2001 From: Wen Gong Date: Thu, 30 Sep 2021 04:15:33 -0400 Subject: [PATCH 416/440] cfg80211: separate get channel number from ies Get channel number from ies is a common logic, so separate it to a new function, which could also be used by lower driver. Signed-off-by: Wen Gong Link: https://lore.kernel.org/r/20210930081533.4898-1-wgong@codeaurora.org Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 11 +++++++++++ net/wireless/scan.c | 41 ++++++++++++++++++++++++++--------------- 2 files changed, 37 insertions(+), 15 deletions(-) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 02737dde06d9..241b29b0796e 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -6376,6 +6376,17 @@ static inline void cfg80211_gen_new_bssid(const u8 *bssid, u8 max_bssid, u64_to_ether_addr(new_bssid_u64, new_bssid); } +/** + * cfg80211_get_ies_channel_number - returns the channel number from ies + * @ie: IEs + * @ielen: length of IEs + * @band: enum nl80211_band of the channel + * + * Returns the channel number, or -1 if none could be determined. + */ +int cfg80211_get_ies_channel_number(const u8 *ie, size_t ielen, + enum nl80211_band band); + /** * cfg80211_is_element_inherited - returns if element ID should be inherited * @element: element to check diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 0273f20291c4..e4f79b23f7f6 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -1791,25 +1791,13 @@ cfg80211_bss_update(struct cfg80211_registered_device *rdev, return NULL; } -/* - * Update RX channel information based on the available frame payload - * information. This is mainly for the 2.4 GHz band where frames can be received - * from neighboring channels and the Beacon frames use the DSSS Parameter Set - * element to indicate the current (transmitting) channel, but this might also - * be needed on other bands if RX frequency does not match with the actual - * operating channel of a BSS. - */ -static struct ieee80211_channel * -cfg80211_get_bss_channel(struct wiphy *wiphy, const u8 *ie, size_t ielen, - struct ieee80211_channel *channel, - enum nl80211_bss_scan_width scan_width) +int cfg80211_get_ies_channel_number(const u8 *ie, size_t ielen, + enum nl80211_band band) { const u8 *tmp; - u32 freq; int channel_number = -1; - struct ieee80211_channel *alt_channel; - if (channel->band == NL80211_BAND_S1GHZ) { + if (band == NL80211_BAND_S1GHZ) { tmp = cfg80211_find_ie(WLAN_EID_S1G_OPERATION, ie, ielen); if (tmp && tmp[1] >= sizeof(struct ieee80211_s1g_oper_ie)) { struct ieee80211_s1g_oper_ie *s1gop = (void *)(tmp + 2); @@ -1830,6 +1818,29 @@ cfg80211_get_bss_channel(struct wiphy *wiphy, const u8 *ie, size_t ielen, } } + return channel_number; +} +EXPORT_SYMBOL(cfg80211_get_ies_channel_number); + +/* + * Update RX channel information based on the available frame payload + * information. This is mainly for the 2.4 GHz band where frames can be received + * from neighboring channels and the Beacon frames use the DSSS Parameter Set + * element to indicate the current (transmitting) channel, but this might also + * be needed on other bands if RX frequency does not match with the actual + * operating channel of a BSS. + */ +static struct ieee80211_channel * +cfg80211_get_bss_channel(struct wiphy *wiphy, const u8 *ie, size_t ielen, + struct ieee80211_channel *channel, + enum nl80211_bss_scan_width scan_width) +{ + u32 freq; + int channel_number; + struct ieee80211_channel *alt_channel; + + channel_number = cfg80211_get_ies_channel_number(ie, ielen, channel->band); + if (channel_number < 0) { /* No channel information in frame payload */ return channel; From a6e34fde48e8af923284d56d610b6c4a7035e514 Mon Sep 17 00:00:00 2001 From: Aloka Dixit Date: Tue, 5 Oct 2021 21:09:36 -0700 Subject: [PATCH 417/440] mac80211: split beacon retrieval functions Split __ieee80211_beacon_get() into a separate function for AP mode ieee80211_beacon_get_ap(). Also, move the code common to all modes (AP, adhoc and mesh) to a separate function ieee80211_beacon_get_finish(). Signed-off-by: Aloka Dixit Link: https://lore.kernel.org/r/20211006040938.9531-2-alokad@codeaurora.org Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 203 +++++++++++++++++++++++++++------------------- 1 file changed, 118 insertions(+), 85 deletions(-) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 2d1193ed3eb5..ac9ab007dc6f 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -4979,6 +4979,115 @@ static int ieee80211_beacon_protect(struct sk_buff *skb, return 0; } +static void +ieee80211_beacon_get_finish(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + struct ieee80211_mutable_offsets *offs, + struct beacon_data *beacon, + struct sk_buff *skb, + struct ieee80211_chanctx_conf *chanctx_conf, + u16 csa_off_base) +{ + struct ieee80211_local *local = hw_to_local(hw); + struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + struct ieee80211_tx_info *info; + enum nl80211_band band; + struct ieee80211_tx_rate_control txrc; + + /* CSA offsets */ + if (offs && beacon) { + u16 i; + + for (i = 0; i < IEEE80211_MAX_CNTDWN_COUNTERS_NUM; i++) { + u16 csa_off = beacon->cntdwn_counter_offsets[i]; + + if (!csa_off) + continue; + + offs->cntdwn_counter_offs[i] = csa_off_base + csa_off; + } + } + + band = chanctx_conf->def.chan->band; + info = IEEE80211_SKB_CB(skb); + info->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT; + info->flags |= IEEE80211_TX_CTL_NO_ACK; + info->band = band; + + memset(&txrc, 0, sizeof(txrc)); + txrc.hw = hw; + txrc.sband = local->hw.wiphy->bands[band]; + txrc.bss_conf = &sdata->vif.bss_conf; + txrc.skb = skb; + txrc.reported_rate.idx = -1; + if (sdata->beacon_rate_set && sdata->beacon_rateidx_mask[band]) + txrc.rate_idx_mask = sdata->beacon_rateidx_mask[band]; + else + txrc.rate_idx_mask = sdata->rc_rateidx_mask[band]; + txrc.bss = true; + rate_control_get_rate(sdata, NULL, &txrc); + + info->control.vif = vif; + info->flags |= IEEE80211_TX_CTL_CLEAR_PS_FILT | + IEEE80211_TX_CTL_ASSIGN_SEQ | + IEEE80211_TX_CTL_FIRST_FRAGMENT; +} + +static struct sk_buff * +ieee80211_beacon_get_ap(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + struct ieee80211_mutable_offsets *offs, + bool is_template, + struct beacon_data *beacon, + struct ieee80211_chanctx_conf *chanctx_conf) +{ + struct ieee80211_local *local = hw_to_local(hw); + struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + struct ieee80211_if_ap *ap = &sdata->u.ap; + struct sk_buff *skb = NULL; + u16 csa_off_base = 0; + + if (beacon->cntdwn_counter_offsets[0]) { + if (!is_template) + ieee80211_beacon_update_cntdwn(vif); + + ieee80211_set_beacon_cntdwn(sdata, beacon); + } + + /* headroom, head length, + * tail length and maximum TIM length + */ + skb = dev_alloc_skb(local->tx_headroom + beacon->head_len + + beacon->tail_len + 256 + + local->hw.extra_beacon_tailroom); + if (!skb) + return NULL; + + skb_reserve(skb, local->tx_headroom); + skb_put_data(skb, beacon->head, beacon->head_len); + + ieee80211_beacon_add_tim(sdata, &ap->ps, skb, is_template); + + if (offs) { + offs->tim_offset = beacon->head_len; + offs->tim_length = skb->len - beacon->head_len; + offs->cntdwn_counter_offs[0] = beacon->cntdwn_counter_offsets[0]; + + /* for AP the csa offsets are from tail */ + csa_off_base = skb->len; + } + + if (beacon->tail) + skb_put_data(skb, beacon->tail, beacon->tail_len); + + if (ieee80211_beacon_protect(skb, local, sdata) < 0) + return NULL; + + ieee80211_beacon_get_finish(hw, vif, offs, beacon, skb, chanctx_conf, + csa_off_base); + return skb; +} + static struct sk_buff * __ieee80211_beacon_get(struct ieee80211_hw *hw, struct ieee80211_vif *vif, @@ -4988,12 +5097,8 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw, struct ieee80211_local *local = hw_to_local(hw); struct beacon_data *beacon = NULL; struct sk_buff *skb = NULL; - struct ieee80211_tx_info *info; struct ieee80211_sub_if_data *sdata = NULL; - enum nl80211_band band; - struct ieee80211_tx_rate_control txrc; struct ieee80211_chanctx_conf *chanctx_conf; - int csa_off_base = 0; rcu_read_lock(); @@ -5010,48 +5115,11 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw, struct ieee80211_if_ap *ap = &sdata->u.ap; beacon = rcu_dereference(ap->beacon); - if (beacon) { - if (beacon->cntdwn_counter_offsets[0]) { - if (!is_template) - ieee80211_beacon_update_cntdwn(vif); - - ieee80211_set_beacon_cntdwn(sdata, beacon); - } - - /* - * headroom, head length, - * tail length and maximum TIM length - */ - skb = dev_alloc_skb(local->tx_headroom + - beacon->head_len + - beacon->tail_len + 256 + - local->hw.extra_beacon_tailroom); - if (!skb) - goto out; - - skb_reserve(skb, local->tx_headroom); - skb_put_data(skb, beacon->head, beacon->head_len); - - ieee80211_beacon_add_tim(sdata, &ap->ps, skb, - is_template); - - if (offs) { - offs->tim_offset = beacon->head_len; - offs->tim_length = skb->len - beacon->head_len; - offs->cntdwn_counter_offs[0] = beacon->cntdwn_counter_offsets[0]; - - /* for AP the csa offsets are from tail */ - csa_off_base = skb->len; - } - - if (beacon->tail) - skb_put_data(skb, beacon->tail, - beacon->tail_len); - - if (ieee80211_beacon_protect(skb, local, sdata) < 0) - goto out; - } else + if (!beacon) goto out; + + skb = ieee80211_beacon_get_ap(hw, vif, offs, is_template, + beacon, chanctx_conf); } else if (sdata->vif.type == NL80211_IFTYPE_ADHOC) { struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; struct ieee80211_hdr *hdr; @@ -5077,6 +5145,9 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw, hdr = (struct ieee80211_hdr *) skb->data; hdr->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_BEACON); + + ieee80211_beacon_get_finish(hw, vif, offs, beacon, skb, + chanctx_conf, 0); } else if (ieee80211_vif_is_mesh(&sdata->vif)) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; @@ -5116,51 +5187,13 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw, } skb_put_data(skb, beacon->tail, beacon->tail_len); + ieee80211_beacon_get_finish(hw, vif, offs, beacon, skb, + chanctx_conf, 0); } else { WARN_ON(1); goto out; } - /* CSA offsets */ - if (offs && beacon) { - int i; - - for (i = 0; i < IEEE80211_MAX_CNTDWN_COUNTERS_NUM; i++) { - u16 csa_off = beacon->cntdwn_counter_offsets[i]; - - if (!csa_off) - continue; - - offs->cntdwn_counter_offs[i] = csa_off_base + csa_off; - } - } - - band = chanctx_conf->def.chan->band; - - info = IEEE80211_SKB_CB(skb); - - info->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT; - info->flags |= IEEE80211_TX_CTL_NO_ACK; - info->band = band; - - memset(&txrc, 0, sizeof(txrc)); - txrc.hw = hw; - txrc.sband = local->hw.wiphy->bands[band]; - txrc.bss_conf = &sdata->vif.bss_conf; - txrc.skb = skb; - txrc.reported_rate.idx = -1; - if (sdata->beacon_rate_set && sdata->beacon_rateidx_mask[band]) - txrc.rate_idx_mask = sdata->beacon_rateidx_mask[band]; - else - txrc.rate_idx_mask = sdata->rc_rateidx_mask[band]; - txrc.bss = true; - rate_control_get_rate(sdata, NULL, &txrc); - - info->control.vif = vif; - - info->flags |= IEEE80211_TX_CTL_CLEAR_PS_FILT | - IEEE80211_TX_CTL_ASSIGN_SEQ | - IEEE80211_TX_CTL_FIRST_FRAGMENT; out: rcu_read_unlock(); return skb; From 1add667da24273631d4b1f5529789ec5253227f6 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Wed, 20 Oct 2021 08:11:47 +0300 Subject: [PATCH 418/440] nl80211: vendor-cmd: intel: add more details for IWL_MVM_VENDOR_CMD_HOST_GET_OWNERSHIP Explain more the expected flow for this command. Signed-off-by: Emmanuel Grumbach Link: https://lore.kernel.org/r/20211020051147.29297-1-emmanuel.grumbach@intel.com Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211-vnd-intel.h | 29 ++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/include/uapi/linux/nl80211-vnd-intel.h b/include/uapi/linux/nl80211-vnd-intel.h index 0bf177b84fd9..4ed7d0b24512 100644 --- a/include/uapi/linux/nl80211-vnd-intel.h +++ b/include/uapi/linux/nl80211-vnd-intel.h @@ -13,6 +13,35 @@ * enum iwl_mvm_vendor_cmd - supported vendor commands * @IWL_MVM_VENDOR_CMD_GET_CSME_CONN_INFO: reports CSME connection info. * @IWL_MVM_VENDOR_CMD_HOST_GET_OWNERSHIP: asks for ownership on the device. + * This is useful when the CSME firmware owns the device and the kernel + * wants to use it. In case the CSME firmware has no connection active the + * kernel will manage on its own to get ownership of the device. + * When the CSME firmware has an active connection, the user space + * involvement is required. The kernel will assert the RFKILL signal with + * the "device not owned" reason so that nobody can touch the device. Then + * the user space can run the following flow to be able to get connected + * to the very same AP the CSME firmware is currently connected to: + * + * 1) The user space (NetworkManager) boots and sees that the device is + * in RFKILL because the host doesn't own the device + * 2) The user space asks the kernel what AP the CSME firmware is + * connected to (with %IWL_MVM_VENDOR_CMD_GET_CSME_CONN_INFO) + * 3) The user space checks if it has a profile that matches the reply + * from the CSME firmware + * 4) The user space installs a network to the wpa_supplicant with a + * specific BSSID and a specific frequency + * 5) The user space prevents any type of full scan + * 6) The user space asks iwlmei to request ownership on the device (with + * this command) + * 7) iwlmei requests ownership from the CSME firmware + * 8) The CSME firmware grants ownership + * 9) iwlmei tells iwlwifi to lift the RFKILL + * 10) RFKILL OFF is reported to user space + * 11) The host boots the device, loads the firwmare, and connects to a + * specific BSSID without scanning including IP as fast as it can + * 12) The host reports to the CSME firmware that there is a connection + * 13) The TCP connection is preserved and the host has connectivity + * * @IWL_MVM_VENDOR_CMD_ROAMING_FORBIDDEN_EVENT: notifies if roaming is allowed. * It contains a &IWL_MVM_VENDOR_ATTR_ROAMING_FORBIDDEN and a * &IWL_MVM_VENDOR_ATTR_VIF_ADDR attributes. From 63fa04266629b9559d66c4dc18b03e0f9fc04a02 Mon Sep 17 00:00:00 2001 From: Srinivasan Raju Date: Mon, 18 Oct 2021 11:00:54 +0100 Subject: [PATCH 419/440] nl80211: Add LC placeholder band definition to nl80211_band Define LC band which is a draft under IEEE 802.11bb. Current NL80211_BAND_LC is a placeholder band and will be more defined IEEE 802.11bb progresses. Signed-off-by: Srinivasan Raju Link: https://lore.kernel.org/r/20211018100143.7565-2-srini.raju@purelifi.com Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 2 ++ net/mac80211/mlme.c | 1 + net/mac80211/sta_info.c | 1 + net/mac80211/tx.c | 3 ++- net/wireless/nl80211.c | 1 + net/wireless/util.c | 2 ++ 6 files changed, 9 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index eda608b1eb09..6b816ef0155f 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -4978,6 +4978,7 @@ enum nl80211_txrate_gi { * @NL80211_BAND_60GHZ: around 60 GHz band (58.32 - 69.12 GHz) * @NL80211_BAND_6GHZ: around 6 GHz band (5.9 - 7.2 GHz) * @NL80211_BAND_S1GHZ: around 900MHz, supported by S1G PHYs + * @NL80211_BAND_LC: light communication band (placeholder) * @NUM_NL80211_BANDS: number of bands, avoid using this in userspace * since newer kernel versions may support more bands */ @@ -4987,6 +4988,7 @@ enum nl80211_band { NL80211_BAND_60GHZ, NL80211_BAND_6GHZ, NL80211_BAND_S1GHZ, + NL80211_BAND_LC, NUM_NL80211_BANDS, }; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index ac32a1998a7e..16ef7396b6da 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1490,6 +1490,7 @@ ieee80211_find_80211h_pwr_constr(struct ieee80211_sub_if_data *sdata, fallthrough; case NL80211_BAND_2GHZ: case NL80211_BAND_60GHZ: + case NL80211_BAND_LC: chan_increment = 1; break; case NL80211_BAND_5GHZ: diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 2b5acb37587f..36524101d11f 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -444,6 +444,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, switch (i) { case NL80211_BAND_2GHZ: + case NL80211_BAND_LC: /* * We use both here, even if we cannot really know for * sure the station will support both, but the only use diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index ac9ab007dc6f..5c426b093ee2 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -146,7 +146,8 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, rate = DIV_ROUND_UP(r->bitrate, 1 << shift); switch (sband->band) { - case NL80211_BAND_2GHZ: { + case NL80211_BAND_2GHZ: + case NL80211_BAND_LC: { u32 flag; if (tx->sdata->flags & IEEE80211_SDATA_OPERATING_GMODE) flag = IEEE80211_RATE_MANDATORY_G; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index e72efe146d58..81232b73df8f 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -866,6 +866,7 @@ nl80211_match_band_rssi_policy[NUM_NL80211_BANDS] = { [NL80211_BAND_5GHZ] = { .type = NLA_S32 }, [NL80211_BAND_6GHZ] = { .type = NLA_S32 }, [NL80211_BAND_60GHZ] = { .type = NLA_S32 }, + [NL80211_BAND_LC] = { .type = NLA_S32 }, }; static const struct nla_policy diff --git a/net/wireless/util.c b/net/wireless/util.c index 18dba3d7c638..2991f711491a 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -80,6 +80,7 @@ u32 ieee80211_channel_to_freq_khz(int chan, enum nl80211_band band) return 0; /* not supported */ switch (band) { case NL80211_BAND_2GHZ: + case NL80211_BAND_LC: if (chan == 14) return MHZ_TO_KHZ(2484); else if (chan < 14) @@ -209,6 +210,7 @@ static void set_mandatory_flags_band(struct ieee80211_supported_band *sband) WARN_ON(want); break; case NL80211_BAND_2GHZ: + case NL80211_BAND_LC: want = 7; for (i = 0; i < sband->n_bitrates; i++) { switch (sband->bitrates[i].bitrate) { From b33fb28c867d7c86df3bdd257b0320ed148e3dc3 Mon Sep 17 00:00:00 2001 From: Loic Poulain Date: Thu, 21 Oct 2021 10:45:27 +0200 Subject: [PATCH 420/440] mac80211: Prevent AP probing during suspend Submitting AP probe/null during suspend can cause unexpected disconnect on resume because of timeout waiting for ack status: wlan0: Failed to send nullfunc to AP 11:22:33:44:55:66 after 500ms, disconnecting This is especially the case when we enter suspend when a scan is ongoing, indeed, scan is cancelled from __ieee80211_suspend, leading to a corresponding (aborted) scan complete event, which in turn causes the submission of an immediate monitor null frame (restart_sta_timer). The corresponding packet or ack will not be processed before resuming, causing a timeout & disconnect on resume. Delay the AP probing when suspending/suspended. Signed-off-by: Loic Poulain Link: https://lore.kernel.org/r/1634805927-1113-1-git-send-email-loic.poulain@linaro.org Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 3 +++ net/mac80211/mlme.c | 7 +++++++ net/mac80211/pm.c | 4 ++++ 3 files changed, 14 insertions(+) diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index c3b8590a7e90..5666bbb8860b 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1241,6 +1241,9 @@ struct ieee80211_local { */ bool suspended; + /* suspending is true during the whole suspend process */ + bool suspending; + /* * Resuming is true while suspended, but when we're reprogramming the * hardware -- at that time it's allowed to use ieee80211_queue_work() diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 16ef7396b6da..54ab0e1ef6ca 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2589,6 +2589,13 @@ static void ieee80211_mgd_probe_ap(struct ieee80211_sub_if_data *sdata, goto out; } + if (sdata->local->suspending) { + /* reschedule after resume */ + mutex_unlock(&sdata->local->mtx); + ieee80211_reset_ap_probe(sdata); + goto out; + } + if (beacon) { mlme_dbg_ratelimited(sdata, "detected beacon loss from AP (missed %d beacons) - probing\n", diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c index 7809a906d7fe..0ccb5701c7f3 100644 --- a/net/mac80211/pm.c +++ b/net/mac80211/pm.c @@ -27,6 +27,9 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) if (!local->open_count) goto suspend; + local->suspending = true; + mb(); /* make suspending visible before any cancellation */ + ieee80211_scan_cancel(local); ieee80211_dfs_cac_cancel(local); @@ -176,6 +179,7 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) /* need suspended to be visible before quiescing is false */ barrier(); local->quiescing = false; + local->suspending = false; return 0; } From f9d366d420af4ce8719c59e60853573c02831f61 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 21 Oct 2021 17:30:39 +0200 Subject: [PATCH 421/440] cfg80211: fix kernel-doc for MBSSID EMA The struct member ema_max_profile_periodicity was listed with the wrong name in the kernel-doc, fix that. Link: https://lore.kernel.org/r/20211021173038.18ec2030c66b.Iac731bb299525940948adad2c41f514b7dd81c47@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 2 +- include/uapi/linux/nl80211.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 241b29b0796e..7c9d5db4f0e6 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -5041,7 +5041,7 @@ struct wiphy_iftype_akm_suites { * @mbssid_max_interfaces: maximum number of interfaces supported by the driver * in a multiple BSSID set. This field must be set to a non-zero value * by the driver to advertise MBSSID support. - * @mbssid_max_ema_profile_periodicity: maximum profile periodicity supported by + * @ema_max_profile_periodicity: maximum profile periodicity supported by * the driver. Setting this field to a non-zero value indicates that the * driver supports enhanced multi-BSSID advertisements (EMA AP). */ diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 6b816ef0155f..61cab81e920d 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -7424,7 +7424,7 @@ enum nl80211_sar_specs_attrs { * @NL80211_MBSSID_CONFIG_ATTR_MAX_EMA_PROFILE_PERIODICITY: Used by the kernel * to advertise the maximum profile periodicity supported by the driver * if EMA is enabled. Driver should indicate EMA support to the userspace - * by setting wiphy->mbssid_max_ema_profile_periodicity to + * by setting wiphy->ema_max_profile_periodicity to * a non-zero value. * * @NL80211_MBSSID_CONFIG_ATTR_INDEX: Mandatory parameter to pass the index of @@ -7443,7 +7443,7 @@ enum nl80211_sar_specs_attrs { * * @NL80211_MBSSID_CONFIG_ATTR_EMA: Flag used to enable EMA AP feature. * Setting this flag is permitted only if the driver advertises EMA support - * by setting wiphy->mbssid_max_ema_profile_periodicity to non-zero. + * by setting wiphy->ema_max_profile_periodicity to non-zero. * * @__NL80211_MBSSID_CONFIG_ATTR_LAST: Internal * @NL80211_MBSSID_CONFIG_ATTR_MAX: highest attribute From ae77bdbc2fc63bdaa58b64ed06c51e4052631902 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 20 Oct 2021 20:42:19 +0300 Subject: [PATCH 422/440] net: enetc: remove local "priv" variable in enetc_clean_tx_ring() The "priv" variable is needed in the "check_writeback" scope since commit d39823121911 ("enetc: add hardware timestamping support"). Since commit 7294380c5211 ("enetc: support PTP Sync packet one-step timestamping"), we also need "priv" in the larger function scope. So the local variable from the "if" block scope is not needed, and actually shadows the other one. Delete it. Signed-off-by: Vladimir Oltean Reviewed-by: Claudiu Manoil Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/enetc/enetc.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index 8e31fe15bf3c..082b94e482ce 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -813,10 +813,7 @@ static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget) bool is_eof = tx_swbd->is_eof; if (unlikely(tx_swbd->check_wb)) { - struct enetc_ndev_priv *priv = netdev_priv(ndev); - union enetc_tx_bd *txbd; - - txbd = ENETC_TXBD(*tx_ring, i); + union enetc_tx_bd *txbd = ENETC_TXBD(*tx_ring, i); if (txbd->flags & ENETC_TXBD_FLAGS_W && tx_swbd->do_twostep_tstamp) { From 520661495409666771d6e4be683ce5e4854c60f5 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 20 Oct 2021 20:42:20 +0300 Subject: [PATCH 423/440] net: enetc: use the skb variable directly in enetc_clean_tx_ring() The code checks whether the skb had one-step TX timestamping enabled, in order to schedule the work item for emptying the priv->tx_skbs queue. That code checks for "tx_swbd->skb" directly, when we already had a skb retrieved using enetc_tx_swbd_get_skb(tx_swbd) - a TX software BD can also hold an XDP_TX packet or an XDP frame. But since the direct tx_swbd dereference is in an "if" block guarded by the non-NULL quality of "skb", accessing "tx_swbd->skb" directly is not wrong, just confusing. Just use the local variable named "skb". Signed-off-by: Vladimir Oltean Reviewed-by: Claudiu Manoil Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/enetc/enetc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index 082b94e482ce..504e12554079 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -831,8 +831,7 @@ static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget) if (xdp_frame) { xdp_return_frame(xdp_frame); } else if (skb) { - if (unlikely(tx_swbd->skb->cb[0] & - ENETC_F_TX_ONESTEP_SYNC_TSTAMP)) { + if (unlikely(skb->cb[0] & ENETC_F_TX_ONESTEP_SYNC_TSTAMP)) { /* Start work to release lock for next one-step * timestamping packet. And send one skb in * tx_skbs queue if has. From 4def0acb63cef1b286f5e1fa120112b587becd8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Stelmach?= Date: Wed, 20 Oct 2021 20:24:20 +0200 Subject: [PATCH 424/440] dt-bindings: vendor-prefixes: Add asix prefix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the prefix for ASIX Electronics Corporation. Signed-off-by: Łukasz Stelmach Reviewed-by: Krzysztof Kozlowski Acked-by: Rob Herring Signed-off-by: Jakub Kicinski --- Documentation/devicetree/bindings/vendor-prefixes.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/vendor-prefixes.yaml b/Documentation/devicetree/bindings/vendor-prefixes.yaml index a867f7102c35..e2eb0c738f3a 100644 --- a/Documentation/devicetree/bindings/vendor-prefixes.yaml +++ b/Documentation/devicetree/bindings/vendor-prefixes.yaml @@ -131,6 +131,8 @@ patternProperties: description: Asahi Kasei Corp. "^asc,.*": description: All Sensors Corporation + "^asix,.*": + description: ASIX Electronics Corporation "^aspeed,.*": description: ASPEED Technology Inc. "^asus,.*": From b13c7a88a7b666f4f7b6c5c834fcdee5b1bb31d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Stelmach?= Date: Wed, 20 Oct 2021 20:24:21 +0200 Subject: [PATCH 425/440] dt-bindings: net: Add bindings for AX88796C SPI Ethernet Adapter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add bindings for AX88796C SPI Ethernet Adapter. Signed-off-by: Łukasz Stelmach Reviewed-by: Rob Herring Acked-by: Krzysztof Kozlowski Signed-off-by: Jakub Kicinski --- .../bindings/net/asix,ax88796c.yaml | 73 +++++++++++++++++++ 1 file changed, 73 insertions(+) create mode 100644 Documentation/devicetree/bindings/net/asix,ax88796c.yaml diff --git a/Documentation/devicetree/bindings/net/asix,ax88796c.yaml b/Documentation/devicetree/bindings/net/asix,ax88796c.yaml new file mode 100644 index 000000000000..699ebf452479 --- /dev/null +++ b/Documentation/devicetree/bindings/net/asix,ax88796c.yaml @@ -0,0 +1,73 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/net/asix,ax88796c.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: ASIX AX88796C SPI Ethernet Adapter + +maintainers: + - Łukasz Stelmach + +description: | + ASIX AX88796C is an Ethernet controller with a built in PHY. This + describes SPI mode of the chip. + + The node for this driver must be a child node of an SPI controller, + hence all mandatory properties described in + ../spi/spi-controller.yaml must be specified. + +allOf: + - $ref: ethernet-controller.yaml# + +properties: + compatible: + const: asix,ax88796c + + reg: + maxItems: 1 + + spi-max-frequency: + maximum: 40000000 + + interrupts: + maxItems: 1 + + reset-gpios: + description: + A GPIO line handling reset of the chip. As the line is active low, + it should be marked GPIO_ACTIVE_LOW. + maxItems: 1 + + local-mac-address: true + + mac-address: true + +required: + - compatible + - reg + - spi-max-frequency + - interrupts + - reset-gpios + +additionalProperties: false + +examples: + # Artik5 eval board + - | + #include + #include + spi0 { + #address-cells = <1>; + #size-cells = <0>; + + ethernet@0 { + compatible = "asix,ax88796c"; + reg = <0x0>; + local-mac-address = [00 00 00 00 00 00]; /* Filled in by a bootloader */ + interrupt-parent = <&gpx2>; + interrupts = <0 IRQ_TYPE_LEVEL_LOW>; + spi-max-frequency = <40000000>; + reset-gpios = <&gpe0 2 GPIO_ACTIVE_LOW>; + }; + }; From a97c69ba4f30e46abb9cc7be8b98227cb468fdf9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Stelmach?= Date: Wed, 20 Oct 2021 20:24:22 +0200 Subject: [PATCH 426/440] net: ax88796c: ASIX AX88796C SPI Ethernet Adapter Driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ASIX AX88796[1] is a versatile ethernet adapter chip, that can be connected to a CPU with a 8/16-bit bus or with an SPI. This driver supports SPI connection. The driver has been ported from the vendor kernel for ARTIK5[2] boards. Several changes were made to adapt it to the current kernel which include: + updated DT configuration, + clock configuration moved to DT, + new timer, ethtool and gpio APIs, + dev_* instead of pr_* and custom printk() wrappers, + removed awkward vendor power managemtn. + introduced ethtool tunable to control SPI compression [1] https://www.asix.com.tw/products.php?op=pItemdetail&PItemID=104;65;86&PLine=65 [2] https://git.tizen.org/cgit/profile/common/platform/kernel/linux-3.10-artik/ The other ax88796 driver is for NE2000 compatible AX88796L chip. These chips are not compatible. Hence, two separate drivers are required. Signed-off-by: Łukasz Stelmach Signed-off-by: Jakub Kicinski --- MAINTAINERS | 6 + drivers/net/ethernet/Kconfig | 1 + drivers/net/ethernet/Makefile | 1 + drivers/net/ethernet/asix/Kconfig | 35 + drivers/net/ethernet/asix/Makefile | 6 + drivers/net/ethernet/asix/ax88796c_ioctl.c | 239 ++++ drivers/net/ethernet/asix/ax88796c_ioctl.h | 26 + drivers/net/ethernet/asix/ax88796c_main.c | 1163 ++++++++++++++++++++ drivers/net/ethernet/asix/ax88796c_main.h | 568 ++++++++++ drivers/net/ethernet/asix/ax88796c_spi.c | 115 ++ drivers/net/ethernet/asix/ax88796c_spi.h | 69 ++ 11 files changed, 2229 insertions(+) create mode 100644 drivers/net/ethernet/asix/Kconfig create mode 100644 drivers/net/ethernet/asix/Makefile create mode 100644 drivers/net/ethernet/asix/ax88796c_ioctl.c create mode 100644 drivers/net/ethernet/asix/ax88796c_ioctl.h create mode 100644 drivers/net/ethernet/asix/ax88796c_main.c create mode 100644 drivers/net/ethernet/asix/ax88796c_main.h create mode 100644 drivers/net/ethernet/asix/ax88796c_spi.c create mode 100644 drivers/net/ethernet/asix/ax88796c_spi.h diff --git a/MAINTAINERS b/MAINTAINERS index e107a81b0403..20ea3338404f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2899,6 +2899,12 @@ S: Maintained F: Documentation/hwmon/asc7621.rst F: drivers/hwmon/asc7621.c +ASIX AX88796C SPI ETHERNET ADAPTER +M: Łukasz Stelmach +S: Maintained +F: Documentation/devicetree/bindings/net/asix,ax88796c.yaml +F: drivers/net/ethernet/asix/ax88796c_* + ASPEED PINCTRL DRIVERS M: Andrew Jeffery L: linux-aspeed@lists.ozlabs.org (moderated for non-subscribers) diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig index 412ae3e43ffb..4601b38f532a 100644 --- a/drivers/net/ethernet/Kconfig +++ b/drivers/net/ethernet/Kconfig @@ -33,6 +33,7 @@ source "drivers/net/ethernet/apm/Kconfig" source "drivers/net/ethernet/apple/Kconfig" source "drivers/net/ethernet/aquantia/Kconfig" source "drivers/net/ethernet/arc/Kconfig" +source "drivers/net/ethernet/asix/Kconfig" source "drivers/net/ethernet/atheros/Kconfig" source "drivers/net/ethernet/broadcom/Kconfig" source "drivers/net/ethernet/brocade/Kconfig" diff --git a/drivers/net/ethernet/Makefile b/drivers/net/ethernet/Makefile index aaa5078cd7d1..fdd8c6c17451 100644 --- a/drivers/net/ethernet/Makefile +++ b/drivers/net/ethernet/Makefile @@ -19,6 +19,7 @@ obj-$(CONFIG_NET_XGENE) += apm/ obj-$(CONFIG_NET_VENDOR_APPLE) += apple/ obj-$(CONFIG_NET_VENDOR_AQUANTIA) += aquantia/ obj-$(CONFIG_NET_VENDOR_ARC) += arc/ +obj-$(CONFIG_NET_VENDOR_ASIX) += asix/ obj-$(CONFIG_NET_VENDOR_ATHEROS) += atheros/ obj-$(CONFIG_NET_VENDOR_CADENCE) += cadence/ obj-$(CONFIG_NET_VENDOR_BROADCOM) += broadcom/ diff --git a/drivers/net/ethernet/asix/Kconfig b/drivers/net/ethernet/asix/Kconfig new file mode 100644 index 000000000000..eed02453314c --- /dev/null +++ b/drivers/net/ethernet/asix/Kconfig @@ -0,0 +1,35 @@ +# +# Asix network device configuration +# + +config NET_VENDOR_ASIX + bool "Asix devices" + default y + help + If you have a network (Ethernet, non-USB, not NE2000 compatible) + interface based on a chip from ASIX, say Y. + +if NET_VENDOR_ASIX + +config SPI_AX88796C + tristate "Asix AX88796C-SPI support" + select PHYLIB + depends on SPI + depends on GPIOLIB + help + Say Y here if you intend to use ASIX AX88796C attached in SPI mode. + +config SPI_AX88796C_COMPRESSION + bool "SPI transfer compression" + default n + depends on SPI_AX88796C + help + Say Y here to enable SPI transfer compression. It saves up + to 24 dummy cycles during each transfer which may noticeably + speed up short transfers. This sets the default value that is + inherited by network interfaces during probe. It can be + changed at run time via spi-compression ethtool tunable. + + If unsure say N. + +endif # NET_VENDOR_ASIX diff --git a/drivers/net/ethernet/asix/Makefile b/drivers/net/ethernet/asix/Makefile new file mode 100644 index 000000000000..0bfbbb042634 --- /dev/null +++ b/drivers/net/ethernet/asix/Makefile @@ -0,0 +1,6 @@ +# +# Makefile for the Asix network device drivers. +# + +obj-$(CONFIG_SPI_AX88796C) += ax88796c.o +ax88796c-y := ax88796c_main.o ax88796c_ioctl.o ax88796c_spi.o diff --git a/drivers/net/ethernet/asix/ax88796c_ioctl.c b/drivers/net/ethernet/asix/ax88796c_ioctl.c new file mode 100644 index 000000000000..916ae380a004 --- /dev/null +++ b/drivers/net/ethernet/asix/ax88796c_ioctl.c @@ -0,0 +1,239 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2010 ASIX Electronics Corporation + * Copyright (c) 2020 Samsung Electronics Co., Ltd. + * + * ASIX AX88796C SPI Fast Ethernet Linux driver + */ + +#define pr_fmt(fmt) "ax88796c: " fmt + +#include +#include +#include +#include + +#include "ax88796c_main.h" +#include "ax88796c_ioctl.h" + +static const char ax88796c_priv_flag_names[][ETH_GSTRING_LEN] = { + "SPICompression", +}; + +static void +ax88796c_get_drvinfo(struct net_device *ndev, struct ethtool_drvinfo *info) +{ + /* Inherit standard device info */ + strncpy(info->driver, DRV_NAME, sizeof(info->driver)); +} + +static u32 ax88796c_get_msglevel(struct net_device *ndev) +{ + struct ax88796c_device *ax_local = to_ax88796c_device(ndev); + + return ax_local->msg_enable; +} + +static void ax88796c_set_msglevel(struct net_device *ndev, u32 level) +{ + struct ax88796c_device *ax_local = to_ax88796c_device(ndev); + + ax_local->msg_enable = level; +} + +static void +ax88796c_get_pauseparam(struct net_device *ndev, struct ethtool_pauseparam *pause) +{ + struct ax88796c_device *ax_local = to_ax88796c_device(ndev); + + pause->tx_pause = !!(ax_local->flowctrl & AX_FC_TX); + pause->rx_pause = !!(ax_local->flowctrl & AX_FC_RX); + pause->autoneg = (ax_local->flowctrl & AX_FC_ANEG) ? + AUTONEG_ENABLE : + AUTONEG_DISABLE; +} + +static int +ax88796c_set_pauseparam(struct net_device *ndev, struct ethtool_pauseparam *pause) +{ + struct ax88796c_device *ax_local = to_ax88796c_device(ndev); + int fc; + + /* The following logic comes from phylink_ethtool_set_pauseparam() */ + fc = pause->tx_pause ? AX_FC_TX : 0; + fc |= pause->rx_pause ? AX_FC_RX : 0; + fc |= pause->autoneg ? AX_FC_ANEG : 0; + + ax_local->flowctrl = fc; + + if (pause->autoneg) { + phy_set_asym_pause(ax_local->phydev, pause->tx_pause, + pause->rx_pause); + } else { + int maccr = 0; + + phy_set_asym_pause(ax_local->phydev, 0, 0); + maccr |= (ax_local->flowctrl & AX_FC_RX) ? MACCR_RXFC_ENABLE : 0; + maccr |= (ax_local->flowctrl & AX_FC_TX) ? MACCR_TXFC_ENABLE : 0; + + mutex_lock(&ax_local->spi_lock); + + maccr |= AX_READ(&ax_local->ax_spi, P0_MACCR) & + ~(MACCR_TXFC_ENABLE | MACCR_RXFC_ENABLE); + AX_WRITE(&ax_local->ax_spi, maccr, P0_MACCR); + + mutex_unlock(&ax_local->spi_lock); + } + + return 0; +} + +static int ax88796c_get_regs_len(struct net_device *ndev) +{ + return AX88796C_REGDUMP_LEN + AX88796C_PHY_REGDUMP_LEN; +} + +static void +ax88796c_get_regs(struct net_device *ndev, struct ethtool_regs *regs, void *_p) +{ + struct ax88796c_device *ax_local = to_ax88796c_device(ndev); + int offset, i; + u16 *p = _p; + + memset(p, 0, ax88796c_get_regs_len(ndev)); + + mutex_lock(&ax_local->spi_lock); + + for (offset = 0; offset < AX88796C_REGDUMP_LEN; offset += 2) { + if (!test_bit(offset / 2, ax88796c_no_regs_mask)) + *p = AX_READ(&ax_local->ax_spi, offset); + p++; + } + + mutex_unlock(&ax_local->spi_lock); + + for (i = 0; i < AX88796C_PHY_REGDUMP_LEN / 2; i++) { + *p = phy_read(ax_local->phydev, i); + p++; + } +} + +static void +ax88796c_get_strings(struct net_device *ndev, u32 stringset, u8 *data) +{ + switch (stringset) { + case ETH_SS_PRIV_FLAGS: + memcpy(data, ax88796c_priv_flag_names, + sizeof(ax88796c_priv_flag_names)); + break; + } +} + +static int +ax88796c_get_sset_count(struct net_device *ndev, int stringset) +{ + int ret = 0; + + switch (stringset) { + case ETH_SS_PRIV_FLAGS: + ret = ARRAY_SIZE(ax88796c_priv_flag_names); + break; + default: + ret = -EOPNOTSUPP; + } + + return ret; +} + +static int ax88796c_set_priv_flags(struct net_device *ndev, u32 flags) +{ + struct ax88796c_device *ax_local = to_ax88796c_device(ndev); + + if (flags & ~AX_PRIV_FLAGS_MASK) + return -EOPNOTSUPP; + + if ((ax_local->priv_flags ^ flags) & AX_CAP_COMP) + if (netif_running(ndev)) + return -EBUSY; + + ax_local->priv_flags = flags; + + return 0; +} + +static u32 ax88796c_get_priv_flags(struct net_device *ndev) +{ + struct ax88796c_device *ax_local = to_ax88796c_device(ndev); + + return ax_local->priv_flags; +} + +int ax88796c_mdio_read(struct mii_bus *mdiobus, int phy_id, int loc) +{ + struct ax88796c_device *ax_local = mdiobus->priv; + int ret; + + mutex_lock(&ax_local->spi_lock); + AX_WRITE(&ax_local->ax_spi, MDIOCR_RADDR(loc) + | MDIOCR_FADDR(phy_id) | MDIOCR_READ, P2_MDIOCR); + + ret = read_poll_timeout(AX_READ, ret, + (ret != 0), + 0, jiffies_to_usecs(HZ / 100), false, + &ax_local->ax_spi, P2_MDIOCR); + if (!ret) + ret = AX_READ(&ax_local->ax_spi, P2_MDIODR); + + mutex_unlock(&ax_local->spi_lock); + + return ret; +} + +int +ax88796c_mdio_write(struct mii_bus *mdiobus, int phy_id, int loc, u16 val) +{ + struct ax88796c_device *ax_local = mdiobus->priv; + int ret; + + mutex_lock(&ax_local->spi_lock); + AX_WRITE(&ax_local->ax_spi, val, P2_MDIODR); + + AX_WRITE(&ax_local->ax_spi, + MDIOCR_RADDR(loc) | MDIOCR_FADDR(phy_id) + | MDIOCR_WRITE, P2_MDIOCR); + + ret = read_poll_timeout(AX_READ, ret, + ((ret & MDIOCR_VALID) != 0), 0, + jiffies_to_usecs(HZ / 100), false, + &ax_local->ax_spi, P2_MDIOCR); + mutex_unlock(&ax_local->spi_lock); + + return ret; +} + +const struct ethtool_ops ax88796c_ethtool_ops = { + .get_drvinfo = ax88796c_get_drvinfo, + .get_link = ethtool_op_get_link, + .get_msglevel = ax88796c_get_msglevel, + .set_msglevel = ax88796c_set_msglevel, + .get_link_ksettings = phy_ethtool_get_link_ksettings, + .set_link_ksettings = phy_ethtool_set_link_ksettings, + .nway_reset = phy_ethtool_nway_reset, + .get_pauseparam = ax88796c_get_pauseparam, + .set_pauseparam = ax88796c_set_pauseparam, + .get_regs_len = ax88796c_get_regs_len, + .get_regs = ax88796c_get_regs, + .get_strings = ax88796c_get_strings, + .get_sset_count = ax88796c_get_sset_count, + .get_priv_flags = ax88796c_get_priv_flags, + .set_priv_flags = ax88796c_set_priv_flags, +}; + +int ax88796c_ioctl(struct net_device *ndev, struct ifreq *ifr, int cmd) +{ + int ret; + + ret = phy_mii_ioctl(ndev->phydev, ifr, cmd); + + return ret; +} diff --git a/drivers/net/ethernet/asix/ax88796c_ioctl.h b/drivers/net/ethernet/asix/ax88796c_ioctl.h new file mode 100644 index 000000000000..34d2a7dcc5ef --- /dev/null +++ b/drivers/net/ethernet/asix/ax88796c_ioctl.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2010 ASIX Electronics Corporation + * Copyright (c) 2020 Samsung Electronics Co., Ltd. + * + * ASIX AX88796C SPI Fast Ethernet Linux driver + */ + +#ifndef _AX88796C_IOCTL_H +#define _AX88796C_IOCTL_H + +#include +#include + +#include "ax88796c_main.h" + +extern const struct ethtool_ops ax88796c_ethtool_ops; + +bool ax88796c_check_power(const struct ax88796c_device *ax_local); +bool ax88796c_check_power_and_wake(struct ax88796c_device *ax_local); +void ax88796c_set_power_saving(struct ax88796c_device *ax_local, u8 ps_level); +int ax88796c_mdio_read(struct mii_bus *mdiobus, int phy_id, int loc); +int ax88796c_mdio_write(struct mii_bus *mdiobus, int phy_id, int loc, u16 val); +int ax88796c_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd); + +#endif diff --git a/drivers/net/ethernet/asix/ax88796c_main.c b/drivers/net/ethernet/asix/ax88796c_main.c new file mode 100644 index 000000000000..cfc597f72e3d --- /dev/null +++ b/drivers/net/ethernet/asix/ax88796c_main.c @@ -0,0 +1,1163 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2010 ASIX Electronics Corporation + * Copyright (c) 2020 Samsung Electronics Co., Ltd. + * + * ASIX AX88796C SPI Fast Ethernet Linux driver + */ + +#define pr_fmt(fmt) "ax88796c: " fmt + +#include "ax88796c_main.h" +#include "ax88796c_ioctl.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static int comp = IS_ENABLED(CONFIG_SPI_AX88796C_COMPRESSION); +static int msg_enable = NETIF_MSG_PROBE | + NETIF_MSG_LINK | + NETIF_MSG_RX_ERR | + NETIF_MSG_TX_ERR; + +static const char *no_regs_list = "80018001,e1918001,8001a001,fc0d0000"; +unsigned long ax88796c_no_regs_mask[AX88796C_REGDUMP_LEN / (sizeof(unsigned long) * 8)]; + +module_param(msg_enable, int, 0444); +MODULE_PARM_DESC(msg_enable, "Message mask (see linux/netdevice.h for bitmap)"); + +static int ax88796c_soft_reset(struct ax88796c_device *ax_local) +{ + u16 temp; + int ret; + + lockdep_assert_held(&ax_local->spi_lock); + + AX_WRITE(&ax_local->ax_spi, PSR_RESET, P0_PSR); + AX_WRITE(&ax_local->ax_spi, PSR_RESET_CLR, P0_PSR); + + ret = read_poll_timeout(AX_READ, ret, + (ret & PSR_DEV_READY), + 0, jiffies_to_usecs(160 * HZ / 1000), false, + &ax_local->ax_spi, P0_PSR); + if (ret) + return ret; + + temp = AX_READ(&ax_local->ax_spi, P4_SPICR); + if (ax_local->priv_flags & AX_CAP_COMP) { + AX_WRITE(&ax_local->ax_spi, + (temp | SPICR_RCEN | SPICR_QCEN), P4_SPICR); + ax_local->ax_spi.comp = 1; + } else { + AX_WRITE(&ax_local->ax_spi, + (temp & ~(SPICR_RCEN | SPICR_QCEN)), P4_SPICR); + ax_local->ax_spi.comp = 0; + } + + return 0; +} + +static int ax88796c_reload_eeprom(struct ax88796c_device *ax_local) +{ + int ret; + + lockdep_assert_held(&ax_local->spi_lock); + + AX_WRITE(&ax_local->ax_spi, EECR_RELOAD, P3_EECR); + + ret = read_poll_timeout(AX_READ, ret, + (ret & PSR_DEV_READY), + 0, jiffies_to_usecs(2 * HZ / 1000), false, + &ax_local->ax_spi, P0_PSR); + if (ret) { + dev_err(&ax_local->spi->dev, + "timeout waiting for reload eeprom\n"); + return ret; + } + + return 0; +} + +static void ax88796c_set_hw_multicast(struct net_device *ndev) +{ + struct ax88796c_device *ax_local = to_ax88796c_device(ndev); + int mc_count = netdev_mc_count(ndev); + u16 rx_ctl = RXCR_AB; + + lockdep_assert_held(&ax_local->spi_lock); + + memset(ax_local->multi_filter, 0, AX_MCAST_FILTER_SIZE); + + if (ndev->flags & IFF_PROMISC) { + rx_ctl |= RXCR_PRO; + + } else if (ndev->flags & IFF_ALLMULTI || mc_count > AX_MAX_MCAST) { + rx_ctl |= RXCR_AMALL; + + } else if (mc_count == 0) { + /* just broadcast and directed */ + } else { + u32 crc_bits; + int i; + struct netdev_hw_addr *ha; + + netdev_for_each_mc_addr(ha, ndev) { + crc_bits = ether_crc(ETH_ALEN, ha->addr); + ax_local->multi_filter[crc_bits >> 29] |= + (1 << ((crc_bits >> 26) & 7)); + } + + for (i = 0; i < 4; i++) { + AX_WRITE(&ax_local->ax_spi, + ((ax_local->multi_filter[i * 2 + 1] << 8) | + ax_local->multi_filter[i * 2]), P3_MFAR(i)); + } + } + + AX_WRITE(&ax_local->ax_spi, rx_ctl, P2_RXCR); +} + +static void ax88796c_set_mac_addr(struct net_device *ndev) +{ + struct ax88796c_device *ax_local = to_ax88796c_device(ndev); + + lockdep_assert_held(&ax_local->spi_lock); + + AX_WRITE(&ax_local->ax_spi, ((u16)(ndev->dev_addr[4] << 8) | + (u16)ndev->dev_addr[5]), P3_MACASR0); + AX_WRITE(&ax_local->ax_spi, ((u16)(ndev->dev_addr[2] << 8) | + (u16)ndev->dev_addr[3]), P3_MACASR1); + AX_WRITE(&ax_local->ax_spi, ((u16)(ndev->dev_addr[0] << 8) | + (u16)ndev->dev_addr[1]), P3_MACASR2); +} + +static void ax88796c_load_mac_addr(struct net_device *ndev) +{ + struct ax88796c_device *ax_local = to_ax88796c_device(ndev); + u16 temp; + + lockdep_assert_held(&ax_local->spi_lock); + + /* Try the device tree first */ + if (!eth_platform_get_mac_address(&ax_local->spi->dev, ndev->dev_addr) && + is_valid_ether_addr(ndev->dev_addr)) { + if (netif_msg_probe(ax_local)) + dev_info(&ax_local->spi->dev, + "MAC address read from device tree\n"); + return; + } + + /* Read the MAC address from AX88796C */ + temp = AX_READ(&ax_local->ax_spi, P3_MACASR0); + ndev->dev_addr[5] = (u8)temp; + ndev->dev_addr[4] = (u8)(temp >> 8); + + temp = AX_READ(&ax_local->ax_spi, P3_MACASR1); + ndev->dev_addr[3] = (u8)temp; + ndev->dev_addr[2] = (u8)(temp >> 8); + + temp = AX_READ(&ax_local->ax_spi, P3_MACASR2); + ndev->dev_addr[1] = (u8)temp; + ndev->dev_addr[0] = (u8)(temp >> 8); + + if (is_valid_ether_addr(ndev->dev_addr)) { + if (netif_msg_probe(ax_local)) + dev_info(&ax_local->spi->dev, + "MAC address read from ASIX chip\n"); + return; + } + + /* Use random address if none found */ + if (netif_msg_probe(ax_local)) + dev_info(&ax_local->spi->dev, "Use random MAC address\n"); + eth_hw_addr_random(ndev); +} + +static void ax88796c_proc_tx_hdr(struct tx_pkt_info *info, u8 ip_summed) +{ + u16 pkt_len_bar = (~info->pkt_len & TX_HDR_SOP_PKTLENBAR); + + /* Prepare SOP header */ + info->sop.flags_len = info->pkt_len | + ((ip_summed == CHECKSUM_NONE) || + (ip_summed == CHECKSUM_UNNECESSARY) ? TX_HDR_SOP_DICF : 0); + + info->sop.seq_lenbar = ((info->seq_num << 11) & TX_HDR_SOP_SEQNUM) + | pkt_len_bar; + cpu_to_be16s(&info->sop.flags_len); + cpu_to_be16s(&info->sop.seq_lenbar); + + /* Prepare Segment header */ + info->seg.flags_seqnum_seglen = TX_HDR_SEG_FS | TX_HDR_SEG_LS + | info->pkt_len; + + info->seg.eo_so_seglenbar = pkt_len_bar; + + cpu_to_be16s(&info->seg.flags_seqnum_seglen); + cpu_to_be16s(&info->seg.eo_so_seglenbar); + + /* Prepare EOP header */ + info->eop.seq_len = ((info->seq_num << 11) & + TX_HDR_EOP_SEQNUM) | info->pkt_len; + info->eop.seqbar_lenbar = ((~info->seq_num << 11) & + TX_HDR_EOP_SEQNUMBAR) | pkt_len_bar; + + cpu_to_be16s(&info->eop.seq_len); + cpu_to_be16s(&info->eop.seqbar_lenbar); +} + +static int +ax88796c_check_free_pages(struct ax88796c_device *ax_local, u8 need_pages) +{ + u8 free_pages; + u16 tmp; + + lockdep_assert_held(&ax_local->spi_lock); + + free_pages = AX_READ(&ax_local->ax_spi, P0_TFBFCR) & TX_FREEBUF_MASK; + if (free_pages < need_pages) { + /* schedule free page interrupt */ + tmp = AX_READ(&ax_local->ax_spi, P0_TFBFCR) + & TFBFCR_SCHE_FREE_PAGE; + AX_WRITE(&ax_local->ax_spi, tmp | TFBFCR_TX_PAGE_SET | + TFBFCR_SET_FREE_PAGE(need_pages), + P0_TFBFCR); + return -ENOMEM; + } + + return 0; +} + +static struct sk_buff * +ax88796c_tx_fixup(struct net_device *ndev, struct sk_buff_head *q) +{ + struct ax88796c_device *ax_local = to_ax88796c_device(ndev); + u8 spi_len = ax_local->ax_spi.comp ? 1 : 4; + struct sk_buff *skb; + struct tx_pkt_info info; + struct skb_data *entry; + u16 pkt_len; + u8 padlen, seq_num; + u8 need_pages; + int headroom; + int tailroom; + + if (skb_queue_empty(q)) + return NULL; + + skb = skb_peek(q); + pkt_len = skb->len; + need_pages = (pkt_len + TX_OVERHEAD + 127) >> 7; + if (ax88796c_check_free_pages(ax_local, need_pages) != 0) + return NULL; + + headroom = skb_headroom(skb); + tailroom = skb_tailroom(skb); + padlen = round_up(pkt_len, 4) - pkt_len; + seq_num = ++ax_local->seq_num & 0x1F; + + info.pkt_len = pkt_len; + + if (skb_cloned(skb) || + (headroom < (TX_OVERHEAD + spi_len)) || + (tailroom < (padlen + TX_EOP_SIZE))) { + size_t h = max((TX_OVERHEAD + spi_len) - headroom, 0); + size_t t = max((padlen + TX_EOP_SIZE) - tailroom, 0); + + if (pskb_expand_head(skb, h, t, GFP_KERNEL)) + return NULL; + } + + info.seq_num = seq_num; + ax88796c_proc_tx_hdr(&info, skb->ip_summed); + + /* SOP and SEG header */ + memcpy(skb_push(skb, TX_OVERHEAD), &info.sop, TX_OVERHEAD); + + /* Write SPI TXQ header */ + memcpy(skb_push(skb, spi_len), ax88796c_tx_cmd_buf, spi_len); + + /* Make 32-bit alignment */ + skb_put(skb, padlen); + + /* EOP header */ + memcpy(skb_put(skb, TX_EOP_SIZE), &info.eop, TX_EOP_SIZE); + + skb_unlink(skb, q); + + entry = (struct skb_data *)skb->cb; + memset(entry, 0, sizeof(*entry)); + entry->len = pkt_len; + + if (netif_msg_pktdata(ax_local)) { + char pfx[IFNAMSIZ + 7]; + + snprintf(pfx, sizeof(pfx), "%s: ", ndev->name); + + netdev_info(ndev, "TX packet len %d, total len %d, seq %d\n", + pkt_len, skb->len, seq_num); + + netdev_info(ndev, " SPI Header:\n"); + print_hex_dump(KERN_INFO, pfx, DUMP_PREFIX_OFFSET, 16, 1, + skb->data, 4, 0); + + netdev_info(ndev, " TX SOP:\n"); + print_hex_dump(KERN_INFO, pfx, DUMP_PREFIX_OFFSET, 16, 1, + skb->data + 4, TX_OVERHEAD, 0); + + netdev_info(ndev, " TX packet:\n"); + print_hex_dump(KERN_INFO, pfx, DUMP_PREFIX_OFFSET, 16, 1, + skb->data + 4 + TX_OVERHEAD, + skb->len - TX_EOP_SIZE - 4 - TX_OVERHEAD, 0); + + netdev_info(ndev, " TX EOP:\n"); + print_hex_dump(KERN_INFO, pfx, DUMP_PREFIX_OFFSET, 16, 1, + skb->data + skb->len - 4, 4, 0); + } + + return skb; +} + +static int ax88796c_hard_xmit(struct ax88796c_device *ax_local) +{ + struct ax88796c_pcpu_stats *stats; + struct sk_buff *tx_skb; + struct skb_data *entry; + unsigned long flags; + + lockdep_assert_held(&ax_local->spi_lock); + + stats = this_cpu_ptr(ax_local->stats); + tx_skb = ax88796c_tx_fixup(ax_local->ndev, &ax_local->tx_wait_q); + + if (!tx_skb) { + this_cpu_inc(ax_local->stats->tx_dropped); + return 0; + } + entry = (struct skb_data *)tx_skb->cb; + + AX_WRITE(&ax_local->ax_spi, + (TSNR_TXB_START | TSNR_PKT_CNT(1)), P0_TSNR); + + axspi_write_txq(&ax_local->ax_spi, tx_skb->data, tx_skb->len); + + if (((AX_READ(&ax_local->ax_spi, P0_TSNR) & TXNR_TXB_IDLE) == 0) || + ((ISR_TXERR & AX_READ(&ax_local->ax_spi, P0_ISR)) != 0)) { + /* Ack tx error int */ + AX_WRITE(&ax_local->ax_spi, ISR_TXERR, P0_ISR); + + this_cpu_inc(ax_local->stats->tx_dropped); + + if (net_ratelimit()) + netif_err(ax_local, tx_err, ax_local->ndev, + "TX FIFO error, re-initialize the TX bridge\n"); + + /* Reinitial tx bridge */ + AX_WRITE(&ax_local->ax_spi, TXNR_TXB_REINIT | + AX_READ(&ax_local->ax_spi, P0_TSNR), P0_TSNR); + ax_local->seq_num = 0; + } else { + flags = u64_stats_update_begin_irqsave(&stats->syncp); + u64_stats_inc(&stats->tx_packets); + u64_stats_add(&stats->tx_bytes, entry->len); + u64_stats_update_end_irqrestore(&stats->syncp, flags); + } + + entry->state = tx_done; + dev_kfree_skb(tx_skb); + + return 1; +} + +static int +ax88796c_start_xmit(struct sk_buff *skb, struct net_device *ndev) +{ + struct ax88796c_device *ax_local = to_ax88796c_device(ndev); + + skb_queue_tail(&ax_local->tx_wait_q, skb); + if (skb_queue_len(&ax_local->tx_wait_q) > TX_QUEUE_HIGH_WATER) + netif_stop_queue(ndev); + + set_bit(EVENT_TX, &ax_local->flags); + schedule_work(&ax_local->ax_work); + + return NETDEV_TX_OK; +} + +static void +ax88796c_skb_return(struct ax88796c_device *ax_local, + struct sk_buff *skb, struct rx_header *rxhdr) +{ + struct net_device *ndev = ax_local->ndev; + struct ax88796c_pcpu_stats *stats; + unsigned long flags; + int status; + + stats = this_cpu_ptr(ax_local->stats); + + do { + if (!(ndev->features & NETIF_F_RXCSUM)) + break; + + /* checksum error bit is set */ + if ((rxhdr->flags & RX_HDR3_L3_ERR) || + (rxhdr->flags & RX_HDR3_L4_ERR)) + break; + + /* Other types may be indicated by more than one bit. */ + if ((rxhdr->flags & RX_HDR3_L4_TYPE_TCP) || + (rxhdr->flags & RX_HDR3_L4_TYPE_UDP)) + skb->ip_summed = CHECKSUM_UNNECESSARY; + } while (0); + + flags = u64_stats_update_begin_irqsave(&stats->syncp); + u64_stats_inc(&stats->rx_packets); + u64_stats_add(&stats->rx_bytes, skb->len); + u64_stats_update_end_irqrestore(&stats->syncp, flags); + + skb->dev = ndev; + skb->protocol = eth_type_trans(skb, ax_local->ndev); + + netif_info(ax_local, rx_status, ndev, "< rx, len %zu, type 0x%x\n", + skb->len + sizeof(struct ethhdr), skb->protocol); + + status = netif_rx_ni(skb); + if (status != NET_RX_SUCCESS && net_ratelimit()) + netif_info(ax_local, rx_err, ndev, + "netif_rx status %d\n", status); +} + +static void +ax88796c_rx_fixup(struct ax88796c_device *ax_local, struct sk_buff *rx_skb) +{ + struct rx_header *rxhdr = (struct rx_header *)rx_skb->data; + struct net_device *ndev = ax_local->ndev; + u16 len; + + be16_to_cpus(&rxhdr->flags_len); + be16_to_cpus(&rxhdr->seq_lenbar); + be16_to_cpus(&rxhdr->flags); + + if ((rxhdr->flags_len & RX_HDR1_PKT_LEN) != + (~rxhdr->seq_lenbar & 0x7FF)) { + netif_err(ax_local, rx_err, ndev, "Header error\n"); + + this_cpu_inc(ax_local->stats->rx_frame_errors); + kfree_skb(rx_skb); + return; + } + + if ((rxhdr->flags_len & RX_HDR1_MII_ERR) || + (rxhdr->flags_len & RX_HDR1_CRC_ERR)) { + netif_err(ax_local, rx_err, ndev, "CRC or MII error\n"); + + this_cpu_inc(ax_local->stats->rx_crc_errors); + kfree_skb(rx_skb); + return; + } + + len = rxhdr->flags_len & RX_HDR1_PKT_LEN; + if (netif_msg_pktdata(ax_local)) { + char pfx[IFNAMSIZ + 7]; + + snprintf(pfx, sizeof(pfx), "%s: ", ndev->name); + netdev_info(ndev, "RX data, total len %d, packet len %d\n", + rx_skb->len, len); + + netdev_info(ndev, " Dump RX packet header:"); + print_hex_dump(KERN_INFO, pfx, DUMP_PREFIX_OFFSET, 16, 1, + rx_skb->data, sizeof(*rxhdr), 0); + + netdev_info(ndev, " Dump RX packet:"); + print_hex_dump(KERN_INFO, pfx, DUMP_PREFIX_OFFSET, 16, 1, + rx_skb->data + sizeof(*rxhdr), len, 0); + } + + skb_pull(rx_skb, sizeof(*rxhdr)); + pskb_trim(rx_skb, len); + + ax88796c_skb_return(ax_local, rx_skb, rxhdr); +} + +static int ax88796c_receive(struct net_device *ndev) +{ + struct ax88796c_device *ax_local = to_ax88796c_device(ndev); + struct skb_data *entry; + u16 w_count, pkt_len; + struct sk_buff *skb; + u8 pkt_cnt; + + lockdep_assert_held(&ax_local->spi_lock); + + /* check rx packet and total word count */ + AX_WRITE(&ax_local->ax_spi, AX_READ(&ax_local->ax_spi, P0_RTWCR) + | RTWCR_RX_LATCH, P0_RTWCR); + + pkt_cnt = AX_READ(&ax_local->ax_spi, P0_RXBCR2) & RXBCR2_PKT_MASK; + if (!pkt_cnt) + return 0; + + pkt_len = AX_READ(&ax_local->ax_spi, P0_RCPHR) & 0x7FF; + + w_count = round_up(pkt_len + 6, 4) >> 1; + + skb = netdev_alloc_skb(ndev, w_count * 2); + if (!skb) { + AX_WRITE(&ax_local->ax_spi, RXBCR1_RXB_DISCARD, P0_RXBCR1); + this_cpu_inc(ax_local->stats->rx_dropped); + return 0; + } + entry = (struct skb_data *)skb->cb; + + AX_WRITE(&ax_local->ax_spi, RXBCR1_RXB_START | w_count, P0_RXBCR1); + + axspi_read_rxq(&ax_local->ax_spi, + skb_put(skb, w_count * 2), skb->len); + + /* Check if rx bridge is idle */ + if ((AX_READ(&ax_local->ax_spi, P0_RXBCR2) & RXBCR2_RXB_IDLE) == 0) { + if (net_ratelimit()) + netif_err(ax_local, rx_err, ndev, + "Rx Bridge is not idle\n"); + AX_WRITE(&ax_local->ax_spi, RXBCR2_RXB_REINIT, P0_RXBCR2); + + entry->state = rx_err; + } else { + entry->state = rx_done; + } + + AX_WRITE(&ax_local->ax_spi, ISR_RXPKT, P0_ISR); + + ax88796c_rx_fixup(ax_local, skb); + + return 1; +} + +static int ax88796c_process_isr(struct ax88796c_device *ax_local) +{ + struct net_device *ndev = ax_local->ndev; + int todo = 0; + u16 isr; + + lockdep_assert_held(&ax_local->spi_lock); + + isr = AX_READ(&ax_local->ax_spi, P0_ISR); + AX_WRITE(&ax_local->ax_spi, isr, P0_ISR); + + netif_dbg(ax_local, intr, ndev, " ISR 0x%04x\n", isr); + + if (isr & ISR_TXERR) { + netif_dbg(ax_local, intr, ndev, " TXERR interrupt\n"); + AX_WRITE(&ax_local->ax_spi, TXNR_TXB_REINIT, P0_TSNR); + ax_local->seq_num = 0x1f; + } + + if (isr & ISR_TXPAGES) { + netif_dbg(ax_local, intr, ndev, " TXPAGES interrupt\n"); + set_bit(EVENT_TX, &ax_local->flags); + } + + if (isr & ISR_LINK) { + netif_dbg(ax_local, intr, ndev, " Link change interrupt\n"); + phy_mac_interrupt(ax_local->ndev->phydev); + } + + if (isr & ISR_RXPKT) { + netif_dbg(ax_local, intr, ndev, " RX interrupt\n"); + todo = ax88796c_receive(ax_local->ndev); + } + + return todo; +} + +static irqreturn_t ax88796c_interrupt(int irq, void *dev_instance) +{ + struct ax88796c_device *ax_local; + struct net_device *ndev; + + ndev = dev_instance; + if (!ndev) { + pr_err("irq %d for unknown device.\n", irq); + return IRQ_RETVAL(0); + } + ax_local = to_ax88796c_device(ndev); + + disable_irq_nosync(irq); + + netif_dbg(ax_local, intr, ndev, "Interrupt occurred\n"); + + set_bit(EVENT_INTR, &ax_local->flags); + schedule_work(&ax_local->ax_work); + + return IRQ_HANDLED; +} + +static void ax88796c_work(struct work_struct *work) +{ + struct ax88796c_device *ax_local = + container_of(work, struct ax88796c_device, ax_work); + + mutex_lock(&ax_local->spi_lock); + + if (test_bit(EVENT_SET_MULTI, &ax_local->flags)) { + ax88796c_set_hw_multicast(ax_local->ndev); + clear_bit(EVENT_SET_MULTI, &ax_local->flags); + } + + if (test_bit(EVENT_INTR, &ax_local->flags)) { + AX_WRITE(&ax_local->ax_spi, IMR_MASKALL, P0_IMR); + + while (ax88796c_process_isr(ax_local)) + /* nothing */; + + clear_bit(EVENT_INTR, &ax_local->flags); + + AX_WRITE(&ax_local->ax_spi, IMR_DEFAULT, P0_IMR); + + enable_irq(ax_local->ndev->irq); + } + + if (test_bit(EVENT_TX, &ax_local->flags)) { + while (skb_queue_len(&ax_local->tx_wait_q)) { + if (!ax88796c_hard_xmit(ax_local)) + break; + } + + clear_bit(EVENT_TX, &ax_local->flags); + + if (netif_queue_stopped(ax_local->ndev) && + (skb_queue_len(&ax_local->tx_wait_q) < TX_QUEUE_LOW_WATER)) + netif_wake_queue(ax_local->ndev); + } + + mutex_unlock(&ax_local->spi_lock); +} + +static void ax88796c_get_stats64(struct net_device *ndev, + struct rtnl_link_stats64 *stats) +{ + struct ax88796c_device *ax_local = to_ax88796c_device(ndev); + u32 rx_frame_errors = 0, rx_crc_errors = 0; + u32 rx_dropped = 0, tx_dropped = 0; + unsigned int start; + int cpu; + + for_each_possible_cpu(cpu) { + struct ax88796c_pcpu_stats *s; + u64 rx_packets, rx_bytes; + u64 tx_packets, tx_bytes; + + s = per_cpu_ptr(ax_local->stats, cpu); + + do { + start = u64_stats_fetch_begin_irq(&s->syncp); + rx_packets = u64_stats_read(&s->rx_packets); + rx_bytes = u64_stats_read(&s->rx_bytes); + tx_packets = u64_stats_read(&s->tx_packets); + tx_bytes = u64_stats_read(&s->tx_bytes); + } while (u64_stats_fetch_retry_irq(&s->syncp, start)); + + stats->rx_packets += rx_packets; + stats->rx_bytes += rx_bytes; + stats->tx_packets += tx_packets; + stats->tx_bytes += tx_bytes; + + rx_dropped += stats->rx_dropped; + tx_dropped += stats->tx_dropped; + rx_frame_errors += stats->rx_frame_errors; + rx_crc_errors += stats->rx_crc_errors; + } + + stats->rx_dropped = rx_dropped; + stats->tx_dropped = tx_dropped; + stats->rx_frame_errors = rx_frame_errors; + stats->rx_crc_errors = rx_crc_errors; +} + +static void ax88796c_set_mac(struct ax88796c_device *ax_local) +{ + u16 maccr; + + maccr = (ax_local->link) ? MACCR_RXEN : 0; + + switch (ax_local->speed) { + case SPEED_100: + maccr |= MACCR_SPEED_100; + case SPEED_10: + case SPEED_UNKNOWN: + break; + default: + return; + } + + switch (ax_local->duplex) { + case DUPLEX_FULL: + maccr |= MACCR_SPEED_100; + case DUPLEX_HALF: + case DUPLEX_UNKNOWN: + break; + default: + return; + } + + if (ax_local->flowctrl & AX_FC_ANEG && + ax_local->phydev->autoneg) { + maccr |= ax_local->pause ? MACCR_RXFC_ENABLE : 0; + maccr |= !ax_local->pause != !ax_local->asym_pause ? + MACCR_TXFC_ENABLE : 0; + } else { + maccr |= (ax_local->flowctrl & AX_FC_RX) ? MACCR_RXFC_ENABLE : 0; + maccr |= (ax_local->flowctrl & AX_FC_TX) ? MACCR_TXFC_ENABLE : 0; + } + + mutex_lock(&ax_local->spi_lock); + + maccr |= AX_READ(&ax_local->ax_spi, P0_MACCR) & + ~(MACCR_DUPLEX_FULL | MACCR_SPEED_100 | + MACCR_TXFC_ENABLE | MACCR_RXFC_ENABLE); + AX_WRITE(&ax_local->ax_spi, maccr, P0_MACCR); + + mutex_unlock(&ax_local->spi_lock); +} + +static void ax88796c_handle_link_change(struct net_device *ndev) +{ + struct ax88796c_device *ax_local = to_ax88796c_device(ndev); + struct phy_device *phydev = ndev->phydev; + bool update = false; + + if (phydev->link && (ax_local->speed != phydev->speed || + ax_local->duplex != phydev->duplex || + ax_local->pause != phydev->pause || + ax_local->asym_pause != phydev->asym_pause)) { + ax_local->speed = phydev->speed; + ax_local->duplex = phydev->duplex; + ax_local->pause = phydev->pause; + ax_local->asym_pause = phydev->asym_pause; + update = true; + } + + if (phydev->link != ax_local->link) { + if (!phydev->link) { + ax_local->speed = SPEED_UNKNOWN; + ax_local->duplex = DUPLEX_UNKNOWN; + } + + ax_local->link = phydev->link; + update = true; + } + + if (update) + ax88796c_set_mac(ax_local); + + if (net_ratelimit()) + phy_print_status(ndev->phydev); +} + +static void ax88796c_set_csums(struct ax88796c_device *ax_local) +{ + struct net_device *ndev = ax_local->ndev; + + lockdep_assert_held(&ax_local->spi_lock); + + if (ndev->features & NETIF_F_RXCSUM) { + AX_WRITE(&ax_local->ax_spi, COERCR0_DEFAULT, P4_COERCR0); + AX_WRITE(&ax_local->ax_spi, COERCR1_DEFAULT, P4_COERCR1); + } else { + AX_WRITE(&ax_local->ax_spi, 0, P4_COERCR0); + AX_WRITE(&ax_local->ax_spi, 0, P4_COERCR1); + } + + if (ndev->features & NETIF_F_HW_CSUM) { + AX_WRITE(&ax_local->ax_spi, COETCR0_DEFAULT, P4_COETCR0); + AX_WRITE(&ax_local->ax_spi, COETCR1_TXPPPE, P4_COETCR1); + } else { + AX_WRITE(&ax_local->ax_spi, 0, P4_COETCR0); + AX_WRITE(&ax_local->ax_spi, 0, P4_COETCR1); + } +} + +static int +ax88796c_open(struct net_device *ndev) +{ + struct ax88796c_device *ax_local = to_ax88796c_device(ndev); + unsigned long irq_flag = 0; + int fc = AX_FC_NONE; + int ret; + u16 t; + + ret = request_irq(ndev->irq, ax88796c_interrupt, + irq_flag, ndev->name, ndev); + if (ret) { + netdev_err(ndev, "unable to get IRQ %d (errno=%d).\n", + ndev->irq, ret); + return ret; + } + + mutex_lock(&ax_local->spi_lock); + + ret = ax88796c_soft_reset(ax_local); + if (ret < 0) { + free_irq(ndev->irq, ndev); + mutex_unlock(&ax_local->spi_lock); + return ret; + } + ax_local->seq_num = 0x1f; + + ax88796c_set_mac_addr(ndev); + ax88796c_set_csums(ax_local); + + /* Disable stuffing packet */ + t = AX_READ(&ax_local->ax_spi, P1_RXBSPCR); + t &= ~RXBSPCR_STUF_ENABLE; + AX_WRITE(&ax_local->ax_spi, t, P1_RXBSPCR); + + /* Enable RX packet process */ + AX_WRITE(&ax_local->ax_spi, RPPER_RXEN, P1_RPPER); + + t = AX_READ(&ax_local->ax_spi, P0_FER); + t |= FER_RXEN | FER_TXEN | FER_BSWAP | FER_IRQ_PULL; + AX_WRITE(&ax_local->ax_spi, t, P0_FER); + + /* Setup LED mode */ + AX_WRITE(&ax_local->ax_spi, + (LCR_LED0_EN | LCR_LED0_DUPLEX | LCR_LED1_EN | + LCR_LED1_100MODE), P2_LCR0); + AX_WRITE(&ax_local->ax_spi, + (AX_READ(&ax_local->ax_spi, P2_LCR1) & LCR_LED2_MASK) | + LCR_LED2_EN | LCR_LED2_LINK, P2_LCR1); + + /* Disable PHY auto-polling */ + AX_WRITE(&ax_local->ax_spi, PCR_PHYID(AX88796C_PHY_ID), P2_PCR); + + /* Enable MAC interrupts */ + AX_WRITE(&ax_local->ax_spi, IMR_DEFAULT, P0_IMR); + + mutex_unlock(&ax_local->spi_lock); + + /* Setup flow-control configuration */ + phy_support_asym_pause(ax_local->phydev); + + if (ax_local->phydev->advertising && + (linkmode_test_bit(ETHTOOL_LINK_MODE_Pause_BIT, + ax_local->phydev->advertising) || + linkmode_test_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, + ax_local->phydev->advertising))) + fc |= AX_FC_ANEG; + + fc |= linkmode_test_bit(ETHTOOL_LINK_MODE_Pause_BIT, + ax_local->phydev->advertising) ? AX_FC_RX : 0; + fc |= (linkmode_test_bit(ETHTOOL_LINK_MODE_Pause_BIT, + ax_local->phydev->advertising) != + linkmode_test_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, + ax_local->phydev->advertising)) ? AX_FC_TX : 0; + ax_local->flowctrl = fc; + + phy_start(ax_local->ndev->phydev); + + netif_start_queue(ndev); + + spi_message_init(&ax_local->ax_spi.rx_msg); + + return 0; +} + +static int +ax88796c_close(struct net_device *ndev) +{ + struct ax88796c_device *ax_local = to_ax88796c_device(ndev); + + phy_stop(ndev->phydev); + + /* We lock the mutex early not only to protect the device + * against concurrent access, but also avoid waking up the + * queue in ax88796c_work(). phy_stop() needs to be called + * before because it locks the mutex to access SPI. + */ + mutex_lock(&ax_local->spi_lock); + + netif_stop_queue(ndev); + + /* No more work can be scheduled now. Make any pending work, + * including one already waiting for the mutex to be unlocked, + * NOP. + */ + netif_dbg(ax_local, ifdown, ndev, "clearing bits\n"); + clear_bit(EVENT_SET_MULTI, &ax_local->flags); + clear_bit(EVENT_INTR, &ax_local->flags); + clear_bit(EVENT_TX, &ax_local->flags); + + /* Disable MAC interrupts */ + AX_WRITE(&ax_local->ax_spi, IMR_MASKALL, P0_IMR); + __skb_queue_purge(&ax_local->tx_wait_q); + ax88796c_soft_reset(ax_local); + + mutex_unlock(&ax_local->spi_lock); + + cancel_work_sync(&ax_local->ax_work); + + free_irq(ndev->irq, ndev); + + return 0; +} + +static int +ax88796c_set_features(struct net_device *ndev, netdev_features_t features) +{ + struct ax88796c_device *ax_local = to_ax88796c_device(ndev); + netdev_features_t changed = features ^ ndev->features; + + if (!(changed & (NETIF_F_RXCSUM | NETIF_F_HW_CSUM))) + return 0; + + ndev->features = features; + + if (changed & (NETIF_F_RXCSUM | NETIF_F_HW_CSUM)) + ax88796c_set_csums(ax_local); + + return 0; +} + +static const struct net_device_ops ax88796c_netdev_ops = { + .ndo_open = ax88796c_open, + .ndo_stop = ax88796c_close, + .ndo_start_xmit = ax88796c_start_xmit, + .ndo_get_stats64 = ax88796c_get_stats64, + .ndo_do_ioctl = ax88796c_ioctl, + .ndo_set_mac_address = eth_mac_addr, + .ndo_set_features = ax88796c_set_features, +}; + +static int ax88796c_hard_reset(struct ax88796c_device *ax_local) +{ + struct device *dev = (struct device *)&ax_local->spi->dev; + struct gpio_desc *reset_gpio; + + /* reset info */ + reset_gpio = gpiod_get(dev, "reset", 0); + if (IS_ERR(reset_gpio)) { + dev_err(dev, "Could not get 'reset' GPIO: %ld", PTR_ERR(reset_gpio)); + return PTR_ERR(reset_gpio); + } + + /* set reset */ + gpiod_direction_output(reset_gpio, 1); + msleep(100); + gpiod_direction_output(reset_gpio, 0); + gpiod_put(reset_gpio); + msleep(20); + + return 0; +} + +static int ax88796c_probe(struct spi_device *spi) +{ + char phy_id[MII_BUS_ID_SIZE + 3]; + struct ax88796c_device *ax_local; + struct net_device *ndev; + u16 temp; + int ret; + + ndev = devm_alloc_etherdev(&spi->dev, sizeof(*ax_local)); + if (!ndev) + return -ENOMEM; + + SET_NETDEV_DEV(ndev, &spi->dev); + + ax_local = to_ax88796c_device(ndev); + + dev_set_drvdata(&spi->dev, ax_local); + ax_local->spi = spi; + ax_local->ax_spi.spi = spi; + + ax_local->stats = + devm_netdev_alloc_pcpu_stats(&spi->dev, + struct ax88796c_pcpu_stats); + if (!ax_local->stats) + return -ENOMEM; + + ax_local->ndev = ndev; + ax_local->priv_flags |= comp ? AX_CAP_COMP : 0; + ax_local->msg_enable = msg_enable; + mutex_init(&ax_local->spi_lock); + + ax_local->mdiobus = devm_mdiobus_alloc(&spi->dev); + if (!ax_local->mdiobus) + return -ENOMEM; + + ax_local->mdiobus->priv = ax_local; + ax_local->mdiobus->read = ax88796c_mdio_read; + ax_local->mdiobus->write = ax88796c_mdio_write; + ax_local->mdiobus->name = "ax88976c-mdiobus"; + ax_local->mdiobus->phy_mask = (u32)~BIT(AX88796C_PHY_ID); + ax_local->mdiobus->parent = &spi->dev; + + snprintf(ax_local->mdiobus->id, MII_BUS_ID_SIZE, + "ax88796c-%s.%u", dev_name(&spi->dev), spi->chip_select); + + ret = devm_mdiobus_register(&spi->dev, ax_local->mdiobus); + if (ret < 0) { + dev_err(&spi->dev, "Could not register MDIO bus\n"); + return ret; + } + + if (netif_msg_probe(ax_local)) { + dev_info(&spi->dev, "AX88796C-SPI Configuration:\n"); + dev_info(&spi->dev, " Compression : %s\n", + ax_local->priv_flags & AX_CAP_COMP ? "ON" : "OFF"); + } + + ndev->irq = spi->irq; + ndev->netdev_ops = &ax88796c_netdev_ops; + ndev->ethtool_ops = &ax88796c_ethtool_ops; + ndev->hw_features |= NETIF_F_HW_CSUM | NETIF_F_RXCSUM; + ndev->features |= NETIF_F_HW_CSUM | NETIF_F_RXCSUM; + ndev->needed_headroom = TX_OVERHEAD; + ndev->needed_tailroom = TX_EOP_SIZE; + + mutex_lock(&ax_local->spi_lock); + + /* ax88796c gpio reset */ + ax88796c_hard_reset(ax_local); + + /* Reset AX88796C */ + ret = ax88796c_soft_reset(ax_local); + if (ret < 0) { + ret = -ENODEV; + mutex_unlock(&ax_local->spi_lock); + goto err; + } + /* Check board revision */ + temp = AX_READ(&ax_local->ax_spi, P2_CRIR); + if ((temp & 0xF) != 0x0) { + dev_err(&spi->dev, "spi read failed: %d\n", temp); + ret = -ENODEV; + mutex_unlock(&ax_local->spi_lock); + goto err; + } + + /*Reload EEPROM*/ + ax88796c_reload_eeprom(ax_local); + + ax88796c_load_mac_addr(ndev); + + if (netif_msg_probe(ax_local)) + dev_info(&spi->dev, + "irq %d, MAC addr %02X:%02X:%02X:%02X:%02X:%02X\n", + ndev->irq, + ndev->dev_addr[0], ndev->dev_addr[1], + ndev->dev_addr[2], ndev->dev_addr[3], + ndev->dev_addr[4], ndev->dev_addr[5]); + + /* Disable power saving */ + AX_WRITE(&ax_local->ax_spi, (AX_READ(&ax_local->ax_spi, P0_PSCR) + & PSCR_PS_MASK) | PSCR_PS_D0, P0_PSCR); + + mutex_unlock(&ax_local->spi_lock); + + INIT_WORK(&ax_local->ax_work, ax88796c_work); + + skb_queue_head_init(&ax_local->tx_wait_q); + + snprintf(phy_id, MII_BUS_ID_SIZE + 3, PHY_ID_FMT, + ax_local->mdiobus->id, AX88796C_PHY_ID); + ax_local->phydev = phy_connect(ax_local->ndev, phy_id, + ax88796c_handle_link_change, + PHY_INTERFACE_MODE_MII); + if (IS_ERR(ax_local->phydev)) { + ret = PTR_ERR(ax_local->phydev); + goto err; + } + ax_local->phydev->irq = PHY_POLL; + + ret = devm_register_netdev(&spi->dev, ndev); + if (ret) { + dev_err(&spi->dev, "failed to register a network device\n"); + goto err_phy_dis; + } + + netif_info(ax_local, probe, ndev, "%s %s registered\n", + dev_driver_string(&spi->dev), + dev_name(&spi->dev)); + phy_attached_info(ax_local->phydev); + + return 0; + +err_phy_dis: + phy_disconnect(ax_local->phydev); +err: + return ret; +} + +static int ax88796c_remove(struct spi_device *spi) +{ + struct ax88796c_device *ax_local = dev_get_drvdata(&spi->dev); + struct net_device *ndev = ax_local->ndev; + + phy_disconnect(ndev->phydev); + + netif_info(ax_local, probe, ndev, "removing network device %s %s\n", + dev_driver_string(&spi->dev), + dev_name(&spi->dev)); + + return 0; +} + +static const struct of_device_id ax88796c_dt_ids[] = { + { .compatible = "asix,ax88796c" }, + {}, +}; +MODULE_DEVICE_TABLE(of, ax88796c_dt_ids); + +static const struct spi_device_id asix_id[] = { + { "ax88796c", 0 }, + { } +}; +MODULE_DEVICE_TABLE(spi, asix_id); + +static struct spi_driver ax88796c_spi_driver = { + .driver = { + .name = DRV_NAME, + .of_match_table = of_match_ptr(ax88796c_dt_ids), + }, + .probe = ax88796c_probe, + .remove = ax88796c_remove, + .id_table = asix_id, +}; + +static __init int ax88796c_spi_init(void) +{ + int ret; + + bitmap_zero(ax88796c_no_regs_mask, AX88796C_REGDUMP_LEN); + ret = bitmap_parse(no_regs_list, 35, + ax88796c_no_regs_mask, AX88796C_REGDUMP_LEN); + if (ret) { + bitmap_fill(ax88796c_no_regs_mask, AX88796C_REGDUMP_LEN); + pr_err("Invalid bitmap description, masking all registers\n"); + } + + return spi_register_driver(&ax88796c_spi_driver); +} + +static __exit void ax88796c_spi_exit(void) +{ + spi_unregister_driver(&ax88796c_spi_driver); +} + +module_init(ax88796c_spi_init); +module_exit(ax88796c_spi_exit); + +MODULE_AUTHOR("Łukasz Stelmach "); +MODULE_DESCRIPTION("ASIX AX88796C SPI Ethernet driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/net/ethernet/asix/ax88796c_main.h b/drivers/net/ethernet/asix/ax88796c_main.h new file mode 100644 index 000000000000..80263c3cef75 --- /dev/null +++ b/drivers/net/ethernet/asix/ax88796c_main.h @@ -0,0 +1,568 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2010 ASIX Electronics Corporation + * Copyright (c) 2020 Samsung Electronics + * + * ASIX AX88796C SPI Fast Ethernet Linux driver + */ + +#ifndef _AX88796C_MAIN_H +#define _AX88796C_MAIN_H + +#include +#include + +#include "ax88796c_spi.h" + +/* These identify the driver base version and may not be removed. */ +#define DRV_NAME "ax88796c" +#define ADP_NAME "ASIX AX88796C SPI Ethernet Adapter" + +#define TX_QUEUE_HIGH_WATER 45 /* Tx queue high water mark */ +#define TX_QUEUE_LOW_WATER 20 /* Tx queue low water mark */ + +#define AX88796C_REGDUMP_LEN 256 +#define AX88796C_PHY_REGDUMP_LEN 14 +#define AX88796C_PHY_ID 0x10 + +#define TX_OVERHEAD 8 +#define TX_EOP_SIZE 4 + +#define AX_MCAST_FILTER_SIZE 8 +#define AX_MAX_MCAST 64 +#define AX_MAX_CLK 80000000 +#define TX_HDR_SOP_DICF 0x8000 +#define TX_HDR_SOP_CPHI 0x4000 +#define TX_HDR_SOP_INT 0x2000 +#define TX_HDR_SOP_MDEQ 0x1000 +#define TX_HDR_SOP_PKTLEN 0x07FF +#define TX_HDR_SOP_SEQNUM 0xF800 +#define TX_HDR_SOP_PKTLENBAR 0x07FF + +#define TX_HDR_SEG_FS 0x8000 +#define TX_HDR_SEG_LS 0x4000 +#define TX_HDR_SEG_SEGNUM 0x3800 +#define TX_HDR_SEG_SEGLEN 0x0700 +#define TX_HDR_SEG_EOFST 0xC000 +#define TX_HDR_SEG_SOFST 0x3800 +#define TX_HDR_SEG_SEGLENBAR 0x07FF + +#define TX_HDR_EOP_SEQNUM 0xF800 +#define TX_HDR_EOP_PKTLEN 0x07FF +#define TX_HDR_EOP_SEQNUMBAR 0xF800 +#define TX_HDR_EOP_PKTLENBAR 0x07FF + +/* Rx header fields mask */ +#define RX_HDR1_MCBC 0x8000 +#define RX_HDR1_STUFF_PKT 0x4000 +#define RX_HDR1_MII_ERR 0x2000 +#define RX_HDR1_CRC_ERR 0x1000 +#define RX_HDR1_PKT_LEN 0x07FF + +#define RX_HDR2_SEQ_NUM 0xF800 +#define RX_HDR2_PKT_LEN_BAR 0x7FFF + +#define RX_HDR3_PE 0x8000 +#define RX_HDR3_L3_TYPE_IPV4V6 0x6000 +#define RX_HDR3_L3_TYPE_IP 0x4000 +#define RX_HDR3_L3_TYPE_IPV6 0x2000 +#define RX_HDR3_L4_TYPE_ICMPV6 0x1400 +#define RX_HDR3_L4_TYPE_TCP 0x1000 +#define RX_HDR3_L4_TYPE_IGMP 0x0c00 +#define RX_HDR3_L4_TYPE_ICMP 0x0800 +#define RX_HDR3_L4_TYPE_UDP 0x0400 +#define RX_HDR3_L3_ERR 0x0200 +#define RX_HDR3_L4_ERR 0x0100 +#define RX_HDR3_PRIORITY(x) ((x) << 4) +#define RX_HDR3_STRIP 0x0008 +#define RX_HDR3_VLAN_ID 0x0007 + +struct ax88796c_pcpu_stats { + u64_stats_t rx_packets; + u64_stats_t rx_bytes; + u64_stats_t tx_packets; + u64_stats_t tx_bytes; + struct u64_stats_sync syncp; + u32 rx_dropped; + u32 tx_dropped; + u32 rx_frame_errors; + u32 rx_crc_errors; +}; + +struct ax88796c_device { + struct spi_device *spi; + struct net_device *ndev; + struct ax88796c_pcpu_stats __percpu *stats; + + struct work_struct ax_work; + + struct mutex spi_lock; /* device access */ + + struct sk_buff_head tx_wait_q; + + struct axspi_data ax_spi; + + struct mii_bus *mdiobus; + struct phy_device *phydev; + + int msg_enable; + + u16 seq_num; + + u8 multi_filter[AX_MCAST_FILTER_SIZE]; + + int link; + int speed; + int duplex; + int pause; + int asym_pause; + int flowctrl; + #define AX_FC_NONE 0 + #define AX_FC_RX BIT(0) + #define AX_FC_TX BIT(1) + #define AX_FC_ANEG BIT(2) + + u32 priv_flags; + #define AX_CAP_COMP BIT(0) + #define AX_PRIV_FLAGS_MASK (AX_CAP_COMP) + + unsigned long flags; + #define EVENT_INTR BIT(0) + #define EVENT_TX BIT(1) + #define EVENT_SET_MULTI BIT(2) + +}; + +#define to_ax88796c_device(ndev) ((struct ax88796c_device *)netdev_priv(ndev)) + +enum skb_state { + illegal = 0, + tx_done, + rx_done, + rx_err, +}; + +struct skb_data { + enum skb_state state; + size_t len; +}; + +/* A88796C register definition */ + /* Definition of PAGE0 */ +#define P0_PSR (0x00) + #define PSR_DEV_READY BIT(7) + #define PSR_RESET (0 << 15) + #define PSR_RESET_CLR BIT(15) +#define P0_BOR (0x02) +#define P0_FER (0x04) + #define FER_IPALM BIT(0) + #define FER_DCRC BIT(1) + #define FER_RH3M BIT(2) + #define FER_HEADERSWAP BIT(7) + #define FER_WSWAP BIT(8) + #define FER_BSWAP BIT(9) + #define FER_INTHI BIT(10) + #define FER_INTLO (0 << 10) + #define FER_IRQ_PULL BIT(11) + #define FER_RXEN BIT(14) + #define FER_TXEN BIT(15) +#define P0_ISR (0x06) + #define ISR_RXPKT BIT(0) + #define ISR_MDQ BIT(4) + #define ISR_TXT BIT(5) + #define ISR_TXPAGES BIT(6) + #define ISR_TXERR BIT(8) + #define ISR_LINK BIT(9) +#define P0_IMR (0x08) + #define IMR_RXPKT BIT(0) + #define IMR_MDQ BIT(4) + #define IMR_TXT BIT(5) + #define IMR_TXPAGES BIT(6) + #define IMR_TXERR BIT(8) + #define IMR_LINK BIT(9) + #define IMR_MASKALL (0xFFFF) + #define IMR_DEFAULT (IMR_TXERR) +#define P0_WFCR (0x0A) + #define WFCR_PMEIND BIT(0) /* PME indication */ + #define WFCR_PMETYPE BIT(1) /* PME I/O type */ + #define WFCR_PMEPOL BIT(2) /* PME polarity */ + #define WFCR_PMERST BIT(3) /* Reset PME */ + #define WFCR_SLEEP BIT(4) /* Enable sleep mode */ + #define WFCR_WAKEUP BIT(5) /* Enable wakeup mode */ + #define WFCR_WAITEVENT BIT(6) /* Reserved */ + #define WFCR_CLRWAKE BIT(7) /* Clear wakeup */ + #define WFCR_LINKCH BIT(8) /* Enable link change */ + #define WFCR_MAGICP BIT(9) /* Enable magic packet */ + #define WFCR_WAKEF BIT(10) /* Enable wakeup frame */ + #define WFCR_PMEEN BIT(11) /* Enable PME pin */ + #define WFCR_LINKCHS BIT(12) /* Link change status */ + #define WFCR_MAGICPS BIT(13) /* Magic packet status */ + #define WFCR_WAKEFS BIT(14) /* Wakeup frame status */ + #define WFCR_PMES BIT(15) /* PME pin status */ +#define P0_PSCR (0x0C) + #define PSCR_PS_MASK (0xFFF0) + #define PSCR_PS_D0 (0) + #define PSCR_PS_D1 BIT(0) + #define PSCR_PS_D2 BIT(1) + #define PSCR_FPS BIT(3) /* Enable fiber mode PS */ + #define PSCR_SWPS BIT(4) /* Enable software */ + /* PS control */ + #define PSCR_WOLPS BIT(5) /* Enable WOL PS */ + #define PSCR_SWWOL BIT(6) /* Enable software select */ + /* WOL PS */ + #define PSCR_PHYOSC BIT(7) /* Internal PHY OSC control */ + #define PSCR_FOFEF BIT(8) /* Force PHY generate FEF */ + #define PSCR_FOF BIT(9) /* Force PHY in fiber mode */ + #define PSCR_PHYPD BIT(10) /* PHY power down. */ + /* Active high */ + #define PSCR_PHYRST BIT(11) /* PHY reset signal. */ + /* Active low */ + #define PSCR_PHYCSIL BIT(12) /* PHY cable energy detect */ + #define PSCR_PHYCOFF BIT(13) /* PHY cable off */ + #define PSCR_PHYLINK BIT(14) /* PHY link status */ + #define PSCR_EEPOK BIT(15) /* EEPROM load complete */ +#define P0_MACCR (0x0E) + #define MACCR_RXEN BIT(0) /* Enable RX */ + #define MACCR_DUPLEX_FULL BIT(1) /* 1: Full, 0: Half */ + #define MACCR_SPEED_100 BIT(2) /* 1: 100Mbps, 0: 10Mbps */ + #define MACCR_RXFC_ENABLE BIT(3) + #define MACCR_RXFC_MASK 0xFFF7 + #define MACCR_TXFC_ENABLE BIT(4) + #define MACCR_TXFC_MASK 0xFFEF + #define MACCR_PSI BIT(6) /* Software Cable-Off */ + /* Power Saving Interrupt */ + #define MACCR_PF BIT(7) + #define MACCR_PMM_BITS 8 + #define MACCR_PMM_MASK (0x1F00) + #define MACCR_PMM_RESET BIT(8) + #define MACCR_PMM_WAIT (2 << 8) + #define MACCR_PMM_READY (3 << 8) + #define MACCR_PMM_D1 (4 << 8) + #define MACCR_PMM_D2 (5 << 8) + #define MACCR_PMM_WAKE (7 << 8) + #define MACCR_PMM_D1_WAKE (8 << 8) + #define MACCR_PMM_D2_WAKE (9 << 8) + #define MACCR_PMM_SLEEP (10 << 8) + #define MACCR_PMM_PHY_RESET (11 << 8) + #define MACCR_PMM_SOFT_D1 (16 << 8) + #define MACCR_PMM_SOFT_D2 (17 << 8) +#define P0_TFBFCR (0x10) + #define TFBFCR_SCHE_FREE_PAGE 0xE07F + #define TFBFCR_FREE_PAGE_BITS 0x07 + #define TFBFCR_FREE_PAGE_LATCH BIT(6) + #define TFBFCR_SET_FREE_PAGE(x) (((x) & 0x3F) << TFBFCR_FREE_PAGE_BITS) + #define TFBFCR_TX_PAGE_SET BIT(13) + #define TFBFCR_MANU_ENTX BIT(15) + #define TX_FREEBUF_MASK 0x003F + #define TX_DPTSTART 0x4000 + +#define P0_TSNR (0x12) + #define TXNR_TXB_ERR BIT(5) + #define TXNR_TXB_IDLE BIT(6) + #define TSNR_PKT_CNT(x) (((x) & 0x3F) << 8) + #define TXNR_TXB_REINIT BIT(14) + #define TSNR_TXB_START BIT(15) +#define P0_RTDPR (0x14) +#define P0_RXBCR1 (0x16) + #define RXBCR1_RXB_DISCARD BIT(14) + #define RXBCR1_RXB_START BIT(15) +#define P0_RXBCR2 (0x18) + #define RXBCR2_PKT_MASK (0xFF) + #define RXBCR2_RXPC_MASK (0x7F) + #define RXBCR2_RXB_READY BIT(13) + #define RXBCR2_RXB_IDLE BIT(14) + #define RXBCR2_RXB_REINIT BIT(15) +#define P0_RTWCR (0x1A) + #define RTWCR_RXWC_MASK (0x3FFF) + #define RTWCR_RX_LATCH BIT(15) +#define P0_RCPHR (0x1C) + + /* Definition of PAGE1 */ +#define P1_RPPER (0x22) + #define RPPER_RXEN BIT(0) +#define P1_MRCR (0x28) +#define P1_MDR (0x2A) +#define P1_RMPR (0x2C) +#define P1_TMPR (0x2E) +#define P1_RXBSPCR (0x30) + #define RXBSPCR_STUF_WORD_CNT(x) (((x) & 0x7000) >> 12) + #define RXBSPCR_STUF_ENABLE BIT(15) +#define P1_MCR (0x32) + #define MCR_SBP BIT(8) + #define MCR_SM BIT(9) + #define MCR_CRCENLAN BIT(11) + #define MCR_STP BIT(12) + /* Definition of PAGE2 */ +#define P2_CIR (0x42) +#define P2_PCR (0x44) + #define PCR_POLL_EN BIT(0) + #define PCR_POLL_FLOWCTRL BIT(1) + #define PCR_POLL_BMCR BIT(2) + #define PCR_PHYID(x) ((x) << 8) +#define P2_PHYSR (0x46) +#define P2_MDIODR (0x48) +#define P2_MDIOCR (0x4A) + #define MDIOCR_RADDR(x) ((x) & 0x1F) + #define MDIOCR_FADDR(x) (((x) & 0x1F) << 8) + #define MDIOCR_VALID BIT(13) + #define MDIOCR_READ BIT(14) + #define MDIOCR_WRITE BIT(15) +#define P2_LCR0 (0x4C) + #define LCR_LED0_EN BIT(0) + #define LCR_LED0_100MODE BIT(1) + #define LCR_LED0_DUPLEX BIT(2) + #define LCR_LED0_LINK BIT(3) + #define LCR_LED0_ACT BIT(4) + #define LCR_LED0_COL BIT(5) + #define LCR_LED0_10MODE BIT(6) + #define LCR_LED0_DUPCOL BIT(7) + #define LCR_LED1_EN BIT(8) + #define LCR_LED1_100MODE BIT(9) + #define LCR_LED1_DUPLEX BIT(10) + #define LCR_LED1_LINK BIT(11) + #define LCR_LED1_ACT BIT(12) + #define LCR_LED1_COL BIT(13) + #define LCR_LED1_10MODE BIT(14) + #define LCR_LED1_DUPCOL BIT(15) +#define P2_LCR1 (0x4E) + #define LCR_LED2_MASK (0xFF00) + #define LCR_LED2_EN BIT(0) + #define LCR_LED2_100MODE BIT(1) + #define LCR_LED2_DUPLEX BIT(2) + #define LCR_LED2_LINK BIT(3) + #define LCR_LED2_ACT BIT(4) + #define LCR_LED2_COL BIT(5) + #define LCR_LED2_10MODE BIT(6) + #define LCR_LED2_DUPCOL BIT(7) +#define P2_IPGCR (0x50) +#define P2_CRIR (0x52) +#define P2_FLHWCR (0x54) +#define P2_RXCR (0x56) + #define RXCR_PRO BIT(0) + #define RXCR_AMALL BIT(1) + #define RXCR_SEP BIT(2) + #define RXCR_AB BIT(3) + #define RXCR_AM BIT(4) + #define RXCR_AP BIT(5) + #define RXCR_ARP BIT(6) +#define P2_JLCR (0x58) +#define P2_MPLR (0x5C) + + /* Definition of PAGE3 */ +#define P3_MACASR0 (0x62) + #define P3_MACASR(x) (P3_MACASR0 + 2 * (x)) + #define MACASR_LOWBYTE_MASK 0x00FF + #define MACASR_HIGH_BITS 0x08 +#define P3_MACASR1 (0x64) +#define P3_MACASR2 (0x66) +#define P3_MFAR01 (0x68) +#define P3_MFAR_BASE (0x68) + #define P3_MFAR(x) (P3_MFAR_BASE + 2 * (x)) + +#define P3_MFAR23 (0x6A) +#define P3_MFAR45 (0x6C) +#define P3_MFAR67 (0x6E) +#define P3_VID0FR (0x70) +#define P3_VID1FR (0x72) +#define P3_EECSR (0x74) +#define P3_EEDR (0x76) +#define P3_EECR (0x78) + #define EECR_ADDR_MASK (0x00FF) + #define EECR_READ_ACT BIT(8) + #define EECR_WRITE_ACT BIT(9) + #define EECR_WRITE_DISABLE BIT(10) + #define EECR_WRITE_ENABLE BIT(11) + #define EECR_EE_READY BIT(13) + #define EECR_RELOAD BIT(14) + #define EECR_RESET BIT(15) +#define P3_TPCR (0x7A) + #define TPCR_PATT_MASK (0xFF) + #define TPCR_RAND_PKT_EN BIT(14) + #define TPCR_FIXED_PKT_EN BIT(15) +#define P3_TPLR (0x7C) + /* Definition of PAGE4 */ +#define P4_SPICR (0x8A) + #define SPICR_RCEN BIT(0) + #define SPICR_QCEN BIT(1) + #define SPICR_RBRE BIT(3) + #define SPICR_PMM BIT(4) + #define SPICR_LOOPBACK BIT(8) + #define SPICR_CORE_RES_CLR BIT(10) + #define SPICR_SPI_RES_CLR BIT(11) +#define P4_SPIISMR (0x8C) + +#define P4_COERCR0 (0x92) + #define COERCR0_RXIPCE BIT(0) + #define COERCR0_RXIPVE BIT(1) + #define COERCR0_RXV6PE BIT(2) + #define COERCR0_RXTCPE BIT(3) + #define COERCR0_RXUDPE BIT(4) + #define COERCR0_RXICMP BIT(5) + #define COERCR0_RXIGMP BIT(6) + #define COERCR0_RXICV6 BIT(7) + + #define COERCR0_RXTCPV6 BIT(8) + #define COERCR0_RXUDPV6 BIT(9) + #define COERCR0_RXICMV6 BIT(10) + #define COERCR0_RXIGMV6 BIT(11) + #define COERCR0_RXICV6V6 BIT(12) + + #define COERCR0_DEFAULT (COERCR0_RXIPCE | COERCR0_RXV6PE | \ + COERCR0_RXTCPE | COERCR0_RXUDPE | \ + COERCR0_RXTCPV6 | COERCR0_RXUDPV6) +#define P4_COERCR1 (0x94) + #define COERCR1_IPCEDP BIT(0) + #define COERCR1_IPVEDP BIT(1) + #define COERCR1_V6VEDP BIT(2) + #define COERCR1_TCPEDP BIT(3) + #define COERCR1_UDPEDP BIT(4) + #define COERCR1_ICMPDP BIT(5) + #define COERCR1_IGMPDP BIT(6) + #define COERCR1_ICV6DP BIT(7) + #define COERCR1_RX64TE BIT(8) + #define COERCR1_RXPPPE BIT(9) + #define COERCR1_TCP6DP BIT(10) + #define COERCR1_UDP6DP BIT(11) + #define COERCR1_IC6DP BIT(12) + #define COERCR1_IG6DP BIT(13) + #define COERCR1_ICV66DP BIT(14) + #define COERCR1_RPCE BIT(15) + + #define COERCR1_DEFAULT (COERCR1_RXPPPE) + +#define P4_COETCR0 (0x96) + #define COETCR0_TXIP BIT(0) + #define COETCR0_TXTCP BIT(1) + #define COETCR0_TXUDP BIT(2) + #define COETCR0_TXICMP BIT(3) + #define COETCR0_TXIGMP BIT(4) + #define COETCR0_TXICV6 BIT(5) + #define COETCR0_TXTCPV6 BIT(8) + #define COETCR0_TXUDPV6 BIT(9) + #define COETCR0_TXICMV6 BIT(10) + #define COETCR0_TXIGMV6 BIT(11) + #define COETCR0_TXICV6V6 BIT(12) + + #define COETCR0_DEFAULT (COETCR0_TXIP | COETCR0_TXTCP | \ + COETCR0_TXUDP | COETCR0_TXTCPV6 | \ + COETCR0_TXUDPV6) +#define P4_COETCR1 (0x98) + #define COETCR1_TX64TE BIT(0) + #define COETCR1_TXPPPE BIT(1) + +#define P4_COECEDR (0x9A) +#define P4_L2CECR (0x9C) + + /* Definition of PAGE5 */ +#define P5_WFTR (0xA2) + #define WFTR_2MS (0x01) + #define WFTR_4MS (0x02) + #define WFTR_8MS (0x03) + #define WFTR_16MS (0x04) + #define WFTR_32MS (0x05) + #define WFTR_64MS (0x06) + #define WFTR_128MS (0x07) + #define WFTR_256MS (0x08) + #define WFTR_512MS (0x09) + #define WFTR_1024MS (0x0A) + #define WFTR_2048MS (0x0B) + #define WFTR_4096MS (0x0C) + #define WFTR_8192MS (0x0D) + #define WFTR_16384MS (0x0E) + #define WFTR_32768MS (0x0F) +#define P5_WFCCR (0xA4) +#define P5_WFCR03 (0xA6) + #define WFCR03_F0_EN BIT(0) + #define WFCR03_F1_EN BIT(4) + #define WFCR03_F2_EN BIT(8) + #define WFCR03_F3_EN BIT(12) +#define P5_WFCR47 (0xA8) + #define WFCR47_F4_EN BIT(0) + #define WFCR47_F5_EN BIT(4) + #define WFCR47_F6_EN BIT(8) + #define WFCR47_F7_EN BIT(12) +#define P5_WF0BMR0 (0xAA) +#define P5_WF0BMR1 (0xAC) +#define P5_WF0CR (0xAE) +#define P5_WF0OBR (0xB0) +#define P5_WF1BMR0 (0xB2) +#define P5_WF1BMR1 (0xB4) +#define P5_WF1CR (0xB6) +#define P5_WF1OBR (0xB8) +#define P5_WF2BMR0 (0xBA) +#define P5_WF2BMR1 (0xBC) + + /* Definition of PAGE6 */ +#define P6_WF2CR (0xC2) +#define P6_WF2OBR (0xC4) +#define P6_WF3BMR0 (0xC6) +#define P6_WF3BMR1 (0xC8) +#define P6_WF3CR (0xCA) +#define P6_WF3OBR (0xCC) +#define P6_WF4BMR0 (0xCE) +#define P6_WF4BMR1 (0xD0) +#define P6_WF4CR (0xD2) +#define P6_WF4OBR (0xD4) +#define P6_WF5BMR0 (0xD6) +#define P6_WF5BMR1 (0xD8) +#define P6_WF5CR (0xDA) +#define P6_WF5OBR (0xDC) + +/* Definition of PAGE7 */ +#define P7_WF6BMR0 (0xE2) +#define P7_WF6BMR1 (0xE4) +#define P7_WF6CR (0xE6) +#define P7_WF6OBR (0xE8) +#define P7_WF7BMR0 (0xEA) +#define P7_WF7BMR1 (0xEC) +#define P7_WF7CR (0xEE) +#define P7_WF7OBR (0xF0) +#define P7_WFR01 (0xF2) +#define P7_WFR23 (0xF4) +#define P7_WFR45 (0xF6) +#define P7_WFR67 (0xF8) +#define P7_WFPC0 (0xFA) +#define P7_WFPC1 (0xFC) + +/* Tx headers structure */ +struct tx_sop_header { + /* bit 15-11: flags, bit 10-0: packet length */ + u16 flags_len; + /* bit 15-11: sequence number, bit 11-0: packet length bar */ + u16 seq_lenbar; +}; + +struct tx_segment_header { + /* bit 15-14: flags, bit 13-11: segment number */ + /* bit 10-0: segment length */ + u16 flags_seqnum_seglen; + /* bit 15-14: end offset, bit 13-11: start offset */ + /* bit 10-0: segment length bar */ + u16 eo_so_seglenbar; +}; + +struct tx_eop_header { + /* bit 15-11: sequence number, bit 10-0: packet length */ + u16 seq_len; + /* bit 15-11: sequence number bar, bit 10-0: packet length bar */ + u16 seqbar_lenbar; +}; + +struct tx_pkt_info { + struct tx_sop_header sop; + struct tx_segment_header seg; + struct tx_eop_header eop; + u16 pkt_len; + u16 seq_num; +}; + +/* Rx headers structure */ +struct rx_header { + u16 flags_len; + u16 seq_lenbar; + u16 flags; +}; + +extern unsigned long ax88796c_no_regs_mask[]; + +#endif /* #ifndef _AX88796C_MAIN_H */ diff --git a/drivers/net/ethernet/asix/ax88796c_spi.c b/drivers/net/ethernet/asix/ax88796c_spi.c new file mode 100644 index 000000000000..94df4f96d2be --- /dev/null +++ b/drivers/net/ethernet/asix/ax88796c_spi.c @@ -0,0 +1,115 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2010 ASIX Electronics Corporation + * Copyright (c) 2020 Samsung Electronics Co., Ltd. + * + * ASIX AX88796C SPI Fast Ethernet Linux driver + */ + +#define pr_fmt(fmt) "ax88796c: " fmt + +#include +#include + +#include "ax88796c_spi.h" + +const u8 ax88796c_rx_cmd_buf[5] = {AX_SPICMD_READ_RXQ, 0xFF, 0xFF, 0xFF, 0xFF}; +const u8 ax88796c_tx_cmd_buf[4] = {AX_SPICMD_WRITE_TXQ, 0xFF, 0xFF, 0xFF}; + +/* driver bus management functions */ +int axspi_wakeup(struct axspi_data *ax_spi) +{ + int ret; + + ax_spi->cmd_buf[0] = AX_SPICMD_EXIT_PWD; /* OP */ + ret = spi_write(ax_spi->spi, ax_spi->cmd_buf, 1); + if (ret) + dev_err(&ax_spi->spi->dev, "%s() failed: ret = %d\n", __func__, ret); + return ret; +} + +int axspi_read_status(struct axspi_data *ax_spi, struct spi_status *status) +{ + int ret; + + /* OP */ + ax_spi->cmd_buf[0] = AX_SPICMD_READ_STATUS; + ret = spi_write_then_read(ax_spi->spi, ax_spi->cmd_buf, 1, (u8 *)&status, 3); + if (ret) + dev_err(&ax_spi->spi->dev, "%s() failed: ret = %d\n", __func__, ret); + else + le16_to_cpus(&status->isr); + + return ret; +} + +int axspi_read_rxq(struct axspi_data *ax_spi, void *data, int len) +{ + struct spi_transfer *xfer = ax_spi->spi_rx_xfer; + int ret; + + memcpy(ax_spi->cmd_buf, ax88796c_rx_cmd_buf, 5); + + xfer->tx_buf = ax_spi->cmd_buf; + xfer->rx_buf = NULL; + xfer->len = ax_spi->comp ? 2 : 5; + xfer->bits_per_word = 8; + spi_message_add_tail(xfer, &ax_spi->rx_msg); + + xfer++; + xfer->rx_buf = data; + xfer->tx_buf = NULL; + xfer->len = len; + xfer->bits_per_word = 8; + spi_message_add_tail(xfer, &ax_spi->rx_msg); + ret = spi_sync(ax_spi->spi, &ax_spi->rx_msg); + if (ret) + dev_err(&ax_spi->spi->dev, "%s() failed: ret = %d\n", __func__, ret); + + return ret; +} + +int axspi_write_txq(const struct axspi_data *ax_spi, void *data, int len) +{ + return spi_write(ax_spi->spi, data, len); +} + +u16 axspi_read_reg(struct axspi_data *ax_spi, u8 reg) +{ + int ret; + int len = ax_spi->comp ? 3 : 4; + + ax_spi->cmd_buf[0] = 0x03; /* OP code read register */ + ax_spi->cmd_buf[1] = reg; /* register address */ + ax_spi->cmd_buf[2] = 0xFF; /* dumy cycle */ + ax_spi->cmd_buf[3] = 0xFF; /* dumy cycle */ + ret = spi_write_then_read(ax_spi->spi, + ax_spi->cmd_buf, len, + ax_spi->rx_buf, 2); + if (ret) { + dev_err(&ax_spi->spi->dev, + "%s() failed: ret = %d\n", __func__, ret); + return 0xFFFF; + } + + le16_to_cpus((u16 *)ax_spi->rx_buf); + + return *(u16 *)ax_spi->rx_buf; +} + +int axspi_write_reg(struct axspi_data *ax_spi, u8 reg, u16 value) +{ + int ret; + + memset(ax_spi->cmd_buf, 0, sizeof(ax_spi->cmd_buf)); + ax_spi->cmd_buf[0] = AX_SPICMD_WRITE_REG; /* OP code read register */ + ax_spi->cmd_buf[1] = reg; /* register address */ + ax_spi->cmd_buf[2] = value; + ax_spi->cmd_buf[3] = value >> 8; + + ret = spi_write(ax_spi->spi, ax_spi->cmd_buf, 4); + if (ret) + dev_err(&ax_spi->spi->dev, "%s() failed: ret = %d\n", __func__, ret); + return ret; +} + diff --git a/drivers/net/ethernet/asix/ax88796c_spi.h b/drivers/net/ethernet/asix/ax88796c_spi.h new file mode 100644 index 000000000000..5bcf91f603fb --- /dev/null +++ b/drivers/net/ethernet/asix/ax88796c_spi.h @@ -0,0 +1,69 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2010 ASIX Electronics Corporation + * Copyright (c) 2020 Samsung Electronics Co., Ltd. + * + * ASIX AX88796C SPI Fast Ethernet Linux driver + */ + +#ifndef _AX88796C_SPI_H +#define _AX88796C_SPI_H + +#include +#include + +/* Definition of SPI command */ +#define AX_SPICMD_WRITE_TXQ 0x02 +#define AX_SPICMD_READ_REG 0x03 +#define AX_SPICMD_READ_STATUS 0x05 +#define AX_SPICMD_READ_RXQ 0x0B +#define AX_SPICMD_BIDIR_WRQ 0xB2 +#define AX_SPICMD_WRITE_REG 0xD8 +#define AX_SPICMD_EXIT_PWD 0xAB + +extern const u8 ax88796c_rx_cmd_buf[]; +extern const u8 ax88796c_tx_cmd_buf[]; + +struct axspi_data { + struct spi_device *spi; + struct spi_message rx_msg; + struct spi_transfer spi_rx_xfer[2]; + u8 cmd_buf[6]; + u8 rx_buf[6]; + u8 comp; +}; + +struct spi_status { + u16 isr; + u8 status; +# define AX_STATUS_READY 0x80 +}; + +int axspi_read_rxq(struct axspi_data *ax_spi, void *data, int len); +int axspi_write_txq(const struct axspi_data *ax_spi, void *data, int len); +u16 axspi_read_reg(struct axspi_data *ax_spi, u8 reg); +int axspi_write_reg(struct axspi_data *ax_spi, u8 reg, u16 value); +int axspi_read_status(struct axspi_data *ax_spi, struct spi_status *status); +int axspi_wakeup(struct axspi_data *ax_spi); + +static inline u16 AX_READ(struct axspi_data *ax_spi, u8 offset) +{ + return axspi_read_reg(ax_spi, offset); +} + +static inline int AX_WRITE(struct axspi_data *ax_spi, u16 value, u8 offset) +{ + return axspi_write_reg(ax_spi, offset, value); +} + +static inline int AX_READ_STATUS(struct axspi_data *ax_spi, + struct spi_status *status) +{ + return axspi_read_status(ax_spi, status); +} + +static inline int AX_WAKEUP(struct axspi_data *ax_spi) +{ + return axspi_wakeup(ax_spi); +} +#endif From b89e7f2c31ae5fe730567085da23574cb8f319ae Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 22 Oct 2021 14:24:44 +0100 Subject: [PATCH 427/440] ice: Nuild fix. M --- drivers/net/ethernet/intel/ice/ice_devlink.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_devlink.c b/drivers/net/ethernet/intel/ice/ice_devlink.c index 1fb754b1355b..b9bd9f9472f6 100644 --- a/drivers/net/ethernet/intel/ice/ice_devlink.c +++ b/drivers/net/ethernet/intel/ice/ice_devlink.c @@ -62,8 +62,6 @@ static void ice_info_fw_api(struct ice_pf *pf, struct ice_info_ctx *ctx) snprintf(ctx->buf, sizeof(ctx->buf), "%u.%u.%u", hw->api_maj_ver, hw->api_min_ver, hw->api_patch); - - return 0; } static void ice_info_fw_build(struct ice_pf *pf, struct ice_info_ctx *ctx) From 016c89460d348c405313019bffeb73971e35ed09 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 22 Oct 2021 08:51:45 -0700 Subject: [PATCH 428/440] mlx5: fix build after merge Silent merge conflict between these two: 3d677735d3b7 ("net/mlx5: Lag, move lag files into directory") 14fe2471c628 ("net/mlx5: Lag, change multipath and bonding to be mutually exclusive") Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c | 4 ++-- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c index b7461c17d601..d7e613d0139a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -10,8 +10,8 @@ #include "en_tc.h" #include "rep/tc.h" #include "rep/neigh.h" -#include "lag.h" -#include "lag_mp.h" +#include "lag/lag.h" +#include "lag/mp.h" struct mlx5e_tc_tun_route_attr { struct net_device *out_dev; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 57369925a788..3af3da214a5b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -68,8 +68,8 @@ #include "lib/fs_chains.h" #include "diag/en_tc_tracepoint.h" #include -#include "lag.h" -#include "lag_mp.h" +#include "lag/lag.h" +#include "lag/mp.h" #define nic_chains(priv) ((priv)->fs.tc.chains) #define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto) From 93772114413ee7ba33b9abda54dd857e6e5d9a6e Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 21 Oct 2021 06:12:03 -0700 Subject: [PATCH 429/440] net: xen: use eth_hw_addr_set() Commit 406f42fa0d3c ("net-next: When a bond have a massive amount of VLANs...") introduced a rbtree for faster Ethernet address look up. To maintain netdev->dev_addr in this tree we need to make all the writes to it got through appropriate helpers. Signed-off-by: Jakub Kicinski --- drivers/net/xen-netback/interface.c | 6 ++++-- drivers/net/xen-netfront.c | 4 +++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index c58996c1e230..fe8e21ad8ed9 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -494,6 +494,9 @@ static const struct net_device_ops xenvif_netdev_ops = { struct xenvif *xenvif_alloc(struct device *parent, domid_t domid, unsigned int handle) { + static const u8 dummy_addr[ETH_ALEN] = { + 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, + }; int err; struct net_device *dev; struct xenvif *vif; @@ -551,8 +554,7 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid, * stolen by an Ethernet bridge for STP purposes. * (FE:FF:FF:FF:FF:FF) */ - eth_broadcast_addr(dev->dev_addr); - dev->dev_addr[0] &= ~0x01; + eth_hw_addr_set(dev, dummy_addr); netif_carrier_off(dev); diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index e31b98403f31..57437e4b8a94 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -2157,6 +2157,7 @@ static int talk_to_netback(struct xenbus_device *dev, unsigned int max_queues = 0; struct netfront_queue *queue = NULL; unsigned int num_queues = 1; + u8 addr[ETH_ALEN]; info->netdev->irq = 0; @@ -2170,11 +2171,12 @@ static int talk_to_netback(struct xenbus_device *dev, "feature-split-event-channels", 0); /* Read mac addr. */ - err = xen_net_read_mac(dev, info->netdev->dev_addr); + err = xen_net_read_mac(dev, addr); if (err) { xenbus_dev_fatal(dev, err, "parsing %s/mac", dev->nodename); goto out_unlocked; } + eth_hw_addr_set(info->netdev, addr); info->netback_has_xdp_headroom = xenbus_read_unsigned(info->xbdev->otherend, "feature-xdp-headroom", 0); From a7021af707a3637c963ce41802b650db6793eb8a Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 21 Oct 2021 06:12:04 -0700 Subject: [PATCH 430/440] usb: smsc: use eth_hw_addr_set() Commit 406f42fa0d3c ("net-next: When a bond have a massive amount of VLANs...") introduced a rbtree for faster Ethernet address look up. To maintain netdev->dev_addr in this tree we need to make all the writes to it got through appropriate helpers. Signed-off-by: Jakub Kicinski --- drivers/net/usb/smsc75xx.c | 6 ++++-- drivers/net/usb/smsc95xx.c | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c index 3b6987bb4fbe..95de452ff4da 100644 --- a/drivers/net/usb/smsc75xx.c +++ b/drivers/net/usb/smsc75xx.c @@ -757,6 +757,8 @@ static int smsc75xx_ioctl(struct net_device *netdev, struct ifreq *rq, int cmd) static void smsc75xx_init_mac_address(struct usbnet *dev) { + u8 addr[ETH_ALEN]; + /* maybe the boot loader passed the MAC address in devicetree */ if (!platform_get_ethdev_address(&dev->udev->dev, dev->net)) { if (is_valid_ether_addr(dev->net->dev_addr)) { @@ -767,8 +769,8 @@ static void smsc75xx_init_mac_address(struct usbnet *dev) } /* try reading mac address from EEPROM */ - if (smsc75xx_read_eeprom(dev, EEPROM_MAC_OFFSET, ETH_ALEN, - dev->net->dev_addr) == 0) { + if (smsc75xx_read_eeprom(dev, EEPROM_MAC_OFFSET, ETH_ALEN, addr) == 0) { + eth_hw_addr_set(dev->net, addr); if (is_valid_ether_addr(dev->net->dev_addr)) { /* eeprom values are valid so use them */ netif_dbg(dev, ifup, dev->net, diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c index 21a42a6527dc..20fe4cd8f784 100644 --- a/drivers/net/usb/smsc95xx.c +++ b/drivers/net/usb/smsc95xx.c @@ -755,6 +755,8 @@ static int smsc95xx_ioctl(struct net_device *netdev, struct ifreq *rq, int cmd) static void smsc95xx_init_mac_address(struct usbnet *dev) { + u8 addr[ETH_ALEN]; + /* maybe the boot loader passed the MAC address in devicetree */ if (!platform_get_ethdev_address(&dev->udev->dev, dev->net)) { if (is_valid_ether_addr(dev->net->dev_addr)) { @@ -765,8 +767,8 @@ static void smsc95xx_init_mac_address(struct usbnet *dev) } /* try reading mac address from EEPROM */ - if (smsc95xx_read_eeprom(dev, EEPROM_MAC_OFFSET, ETH_ALEN, - dev->net->dev_addr) == 0) { + if (smsc95xx_read_eeprom(dev, EEPROM_MAC_OFFSET, ETH_ALEN, addr) == 0) { + eth_hw_addr_set(dev->net, addr); if (is_valid_ether_addr(dev->net->dev_addr)) { /* eeprom values are valid so use them */ netif_dbg(dev, ifup, dev->net, "MAC address read from EEPROM\n"); From 18867486fea3f9439c4c7e66e92146bd6bf3de85 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 21 Oct 2021 06:12:05 -0700 Subject: [PATCH 431/440] net: qmi_wwan: use dev_addr_mod() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 406f42fa0d3c ("net-next: When a bond have a massive amount of VLANs...") introduced a rbtree for faster Ethernet address look up. To maintain netdev->dev_addr in this tree we need to make all the writes to it got through appropriate helpers. Acked-by: Bjørn Mork Signed-off-by: Jakub Kicinski --- drivers/net/usb/qmi_wwan.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 33ada2c59952..86b814e99224 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -835,8 +835,11 @@ static int qmi_wwan_bind(struct usbnet *dev, struct usb_interface *intf) /* make MAC addr easily distinguishable from an IP header */ if (possibly_iphdr(dev->net->dev_addr)) { - dev->net->dev_addr[0] |= 0x02; /* set local assignment bit */ - dev->net->dev_addr[0] &= 0xbf; /* clear "IP" bit */ + u8 addr = dev->net->dev_addr[0]; + + addr |= 0x02; /* set local assignment bit */ + addr &= 0xbf; /* clear "IP" bit */ + dev_addr_mod(dev->net, 0, &addr, 1); } dev->net->netdev_ops = &qmi_wwan_netdev_ops; dev->net->sysfs_groups[0] = &qmi_wwan_sysfs_attr_group; From 2674e7ea22ba0e22a2d1603bd51e0b8f6442a267 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 21 Oct 2021 06:12:06 -0700 Subject: [PATCH 432/440] net: usb: don't write directly to netdev->dev_addr Commit 406f42fa0d3c ("net-next: When a bond have a massive amount of VLANs...") introduced a rbtree for faster Ethernet address look up. To maintain netdev->dev_addr in this tree we need to make all the writes to it got through appropriate helpers. Manually fix all net/usb drivers without separate maintainers. v2: catc does DMA to the buffer, leave the conversion to Oliver Signed-off-by: Jakub Kicinski --- drivers/net/usb/ch9200.c | 4 +++- drivers/net/usb/cx82310_eth.c | 5 +++-- drivers/net/usb/kaweth.c | 3 +-- drivers/net/usb/mcs7830.c | 4 +++- drivers/net/usb/sierra_net.c | 6 ++++-- drivers/net/usb/sr9700.c | 4 +++- drivers/net/usb/sr9800.c | 5 +++-- drivers/net/usb/usbnet.c | 6 ++++-- 8 files changed, 24 insertions(+), 13 deletions(-) diff --git a/drivers/net/usb/ch9200.c b/drivers/net/usb/ch9200.c index d7f3b70d5477..f69d9b902da0 100644 --- a/drivers/net/usb/ch9200.c +++ b/drivers/net/usb/ch9200.c @@ -336,6 +336,7 @@ static int ch9200_bind(struct usbnet *dev, struct usb_interface *intf) { int retval = 0; unsigned char data[2]; + u8 addr[ETH_ALEN]; retval = usbnet_get_endpoints(dev, intf); if (retval) @@ -383,7 +384,8 @@ static int ch9200_bind(struct usbnet *dev, struct usb_interface *intf) retval = control_write(dev, REQUEST_WRITE, 0, MAC_REG_CTRL, data, 0x02, CONTROL_TIMEOUT_MS); - retval = get_mac_address(dev, dev->net->dev_addr); + retval = get_mac_address(dev, addr); + eth_hw_addr_set(dev->net, addr); return retval; } diff --git a/drivers/net/usb/cx82310_eth.c b/drivers/net/usb/cx82310_eth.c index c4568a491dc4..79a47e2fd437 100644 --- a/drivers/net/usb/cx82310_eth.c +++ b/drivers/net/usb/cx82310_eth.c @@ -146,6 +146,7 @@ static int cx82310_bind(struct usbnet *dev, struct usb_interface *intf) u8 link[3]; int timeout = 50; struct cx82310_priv *priv; + u8 addr[ETH_ALEN]; /* avoid ADSL modems - continue only if iProduct is "USB NET CARD" */ if (usb_string(udev, udev->descriptor.iProduct, buf, sizeof(buf)) > 0 @@ -202,12 +203,12 @@ static int cx82310_bind(struct usbnet *dev, struct usb_interface *intf) goto err; /* get the MAC address */ - ret = cx82310_cmd(dev, CMD_GET_MAC_ADDR, true, NULL, 0, - dev->net->dev_addr, ETH_ALEN); + ret = cx82310_cmd(dev, CMD_GET_MAC_ADDR, true, NULL, 0, addr, ETH_ALEN); if (ret) { netdev_err(dev->net, "unable to read MAC address: %d\n", ret); goto err; } + eth_hw_addr_set(dev->net, addr); /* start (does not seem to have any effect?) */ ret = cx82310_cmd(dev, CMD_START, false, NULL, 0, NULL, 0); diff --git a/drivers/net/usb/kaweth.c b/drivers/net/usb/kaweth.c index 144c686b4333..9b2bc1993ece 100644 --- a/drivers/net/usb/kaweth.c +++ b/drivers/net/usb/kaweth.c @@ -1044,8 +1044,7 @@ err_fw: goto err_all_but_rxbuf; memcpy(netdev->broadcast, &bcast_addr, sizeof(bcast_addr)); - memcpy(netdev->dev_addr, &kaweth->configuration.hw_addr, - sizeof(kaweth->configuration.hw_addr)); + eth_hw_addr_set(netdev, (u8 *)&kaweth->configuration.hw_addr); netdev->netdev_ops = &kaweth_netdev_ops; netdev->watchdog_timeo = KAWETH_TX_TIMEOUT; diff --git a/drivers/net/usb/mcs7830.c b/drivers/net/usb/mcs7830.c index 5f42db26d200..326cc4e749d8 100644 --- a/drivers/net/usb/mcs7830.c +++ b/drivers/net/usb/mcs7830.c @@ -473,17 +473,19 @@ static const struct net_device_ops mcs7830_netdev_ops = { static int mcs7830_bind(struct usbnet *dev, struct usb_interface *udev) { struct net_device *net = dev->net; + u8 addr[ETH_ALEN]; int ret; int retry; /* Initial startup: Gather MAC address setting from EEPROM */ ret = -EINVAL; for (retry = 0; retry < 5 && ret; retry++) - ret = mcs7830_hif_get_mac_address(dev, net->dev_addr); + ret = mcs7830_hif_get_mac_address(dev, addr); if (ret) { dev_warn(&dev->udev->dev, "Cannot read MAC address\n"); goto out; } + eth_hw_addr_set(net, addr); mcs7830_data_set_multicast(net); diff --git a/drivers/net/usb/sierra_net.c b/drivers/net/usb/sierra_net.c index 55025202dc4f..bb4cbe8fc846 100644 --- a/drivers/net/usb/sierra_net.c +++ b/drivers/net/usb/sierra_net.c @@ -669,6 +669,7 @@ static int sierra_net_bind(struct usbnet *dev, struct usb_interface *intf) 0x00, 0x00, SIERRA_NET_HIP_MSYNC_ID, 0x00}; static const u8 shdwn_tmplate[sizeof(priv->shdwn_msg)] = { 0x00, 0x00, SIERRA_NET_HIP_SHUTD_ID, 0x00}; + u8 mod[2]; dev_dbg(&dev->udev->dev, "%s", __func__); @@ -698,8 +699,9 @@ static int sierra_net_bind(struct usbnet *dev, struct usb_interface *intf) dev->net->netdev_ops = &sierra_net_device_ops; /* change MAC addr to include, ifacenum, and to be unique */ - dev->net->dev_addr[ETH_ALEN-2] = atomic_inc_return(&iface_counter); - dev->net->dev_addr[ETH_ALEN-1] = ifacenum; + mod[0] = atomic_inc_return(&iface_counter); + mod[1] = ifacenum; + dev_addr_mod(dev->net, ETH_ALEN - 2, mod, 2); /* prepare shutdown message template */ memcpy(priv->shdwn_msg, shdwn_tmplate, sizeof(priv->shdwn_msg)); diff --git a/drivers/net/usb/sr9700.c b/drivers/net/usb/sr9700.c index 15209de1849e..b658510cc9a4 100644 --- a/drivers/net/usb/sr9700.c +++ b/drivers/net/usb/sr9700.c @@ -320,6 +320,7 @@ static int sr9700_bind(struct usbnet *dev, struct usb_interface *intf) { struct net_device *netdev; struct mii_if_info *mii; + u8 addr[ETH_ALEN]; int ret; ret = usbnet_get_endpoints(dev, intf); @@ -350,11 +351,12 @@ static int sr9700_bind(struct usbnet *dev, struct usb_interface *intf) * EEPROM automatically to PAR. In case there is no EEPROM externally, * a default MAC address is stored in PAR for making chip work properly. */ - if (sr_read(dev, SR_PAR, ETH_ALEN, netdev->dev_addr) < 0) { + if (sr_read(dev, SR_PAR, ETH_ALEN, addr) < 0) { netdev_err(netdev, "Error reading MAC address\n"); ret = -ENODEV; goto out; } + eth_hw_addr_set(netdev, addr); /* power up and reset phy */ sr_write_reg(dev, SR_PRR, PRR_PHY_RST); diff --git a/drivers/net/usb/sr9800.c b/drivers/net/usb/sr9800.c index 838f4e9e8b58..f5e19f3ef6cd 100644 --- a/drivers/net/usb/sr9800.c +++ b/drivers/net/usb/sr9800.c @@ -731,6 +731,7 @@ static int sr9800_bind(struct usbnet *dev, struct usb_interface *intf) struct sr_data *data = (struct sr_data *)&dev->data; u16 led01_mux, led23_mux; int ret, embd_phy; + u8 addr[ETH_ALEN]; u32 phyid; u16 rx_ctl; @@ -754,12 +755,12 @@ static int sr9800_bind(struct usbnet *dev, struct usb_interface *intf) } /* Get the MAC address */ - ret = sr_read_cmd(dev, SR_CMD_READ_NODE_ID, 0, 0, ETH_ALEN, - dev->net->dev_addr); + ret = sr_read_cmd(dev, SR_CMD_READ_NODE_ID, 0, 0, ETH_ALEN, addr); if (ret < 0) { netdev_dbg(dev->net, "Failed to read MAC address: %d\n", ret); return ret; } + eth_hw_addr_set(dev->net, addr); netdev_dbg(dev->net, "mac addr : %pM\n", dev->net->dev_addr); /* Initialize MII structure */ diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 80432ee0ce69..350bae673ed4 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -165,12 +165,13 @@ EXPORT_SYMBOL_GPL(usbnet_get_endpoints); int usbnet_get_ethernet_addr(struct usbnet *dev, int iMACAddress) { + u8 addr[ETH_ALEN]; int tmp = -1, ret; unsigned char buf [13]; ret = usb_string(dev->udev, iMACAddress, buf, sizeof buf); if (ret == 12) - tmp = hex2bin(dev->net->dev_addr, buf, 6); + tmp = hex2bin(addr, buf, 6); if (tmp < 0) { dev_dbg(&dev->udev->dev, "bad MAC string %d fetch, %d\n", iMACAddress, tmp); @@ -178,6 +179,7 @@ int usbnet_get_ethernet_addr(struct usbnet *dev, int iMACAddress) ret = -EINVAL; return ret; } + eth_hw_addr_set(dev->net, addr); return 0; } EXPORT_SYMBOL_GPL(usbnet_get_ethernet_addr); @@ -1726,7 +1728,7 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod) dev->net = net; strscpy(net->name, "usb%d", sizeof(net->name)); - memcpy (net->dev_addr, node_id, sizeof node_id); + eth_hw_addr_set(net, node_id); /* rx and tx sides can use different message sizes; * bind() should set rx_urb_size in that case. From 1e9258c389ee58b34238abaa600c10270b081af8 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 21 Oct 2021 06:12:07 -0700 Subject: [PATCH 433/440] fddi: defxx,defza: use dev_addr_set() Commit 406f42fa0d3c ("net-next: When a bond have a massive amount of VLANs...") introduced a rbtree for faster Ethernet address look up. To maintain netdev->dev_addr in this tree we need to make all the writes to it got through appropriate helpers. Acked-by: Maciej W. Rozycki Signed-off-by: Jakub Kicinski --- drivers/net/fddi/defxx.c | 6 +++--- drivers/net/fddi/defza.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/fddi/defxx.c b/drivers/net/fddi/defxx.c index 5810e8473789..b584ffe38ad6 100644 --- a/drivers/net/fddi/defxx.c +++ b/drivers/net/fddi/defxx.c @@ -1117,7 +1117,7 @@ static int dfx_driver_init(struct net_device *dev, const char *print_name, * dfx_ctl_set_mac_address. */ - memcpy(dev->dev_addr, bp->factory_mac_addr, FDDI_K_ALEN); + dev_addr_set(dev, bp->factory_mac_addr); if (dfx_bus_tc) board_name = "DEFTA"; if (dfx_bus_eisa) @@ -1474,7 +1474,7 @@ static int dfx_open(struct net_device *dev) * address. */ - memcpy(dev->dev_addr, bp->factory_mac_addr, FDDI_K_ALEN); + dev_addr_set(dev, bp->factory_mac_addr); /* Clear local unicast/multicast address tables and counts */ @@ -2379,7 +2379,7 @@ static int dfx_ctl_set_mac_address(struct net_device *dev, void *addr) /* Copy unicast address to driver-maintained structs and update count */ - memcpy(dev->dev_addr, p_sockaddr->sa_data, FDDI_K_ALEN); /* update device struct */ + dev_addr_set(dev, p_sockaddr->sa_data); /* update device struct */ memcpy(&bp->uc_table[0], p_sockaddr->sa_data, FDDI_K_ALEN); /* update driver struct */ bp->uc_count = 1; diff --git a/drivers/net/fddi/defza.c b/drivers/net/fddi/defza.c index 0de2c4552f5e..3a6b08eb5e1b 100644 --- a/drivers/net/fddi/defza.c +++ b/drivers/net/fddi/defza.c @@ -1380,7 +1380,7 @@ static int fza_probe(struct device *bdev) goto err_out_irq; fza_reads(&init->hw_addr, &hw_addr, sizeof(hw_addr)); - memcpy(dev->dev_addr, &hw_addr, FDDI_K_ALEN); + dev_addr_set(dev, &hw_addr); fza_reads(&init->rom_rev, &rom_rev, sizeof(rom_rev)); fza_reads(&init->fw_rev, &fw_rev, sizeof(fw_rev)); From 2e0566aeb9ff83db9fb22bf6f0b994f03377b038 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 21 Oct 2021 06:12:08 -0700 Subject: [PATCH 434/440] fddi: skfp: constify and use dev_addr_set() Get it ready for constant netdev->dev_addr. Signed-off-by: Jakub Kicinski --- drivers/net/fddi/skfp/h/smc.h | 2 +- drivers/net/fddi/skfp/skfddi.c | 2 +- drivers/net/fddi/skfp/smtinit.c | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/fddi/skfp/h/smc.h b/drivers/net/fddi/skfp/h/smc.h index 3814a2ff64ae..b0e6ce0d893e 100644 --- a/drivers/net/fddi/skfp/h/smc.h +++ b/drivers/net/fddi/skfp/h/smc.h @@ -470,7 +470,7 @@ void card_stop(struct s_smc *smc); void init_board(struct s_smc *smc, u_char *mac_addr); int init_fplus(struct s_smc *smc); void init_plc(struct s_smc *smc); -int init_smt(struct s_smc *smc, u_char *mac_addr); +int init_smt(struct s_smc *smc, const u_char *mac_addr); void mac1_irq(struct s_smc *smc, u_short stu, u_short stl); void mac2_irq(struct s_smc *smc, u_short code_s2u, u_short code_s2l); void mac3_irq(struct s_smc *smc, u_short code_s3u, u_short code_s3l); diff --git a/drivers/net/fddi/skfp/skfddi.c b/drivers/net/fddi/skfp/skfddi.c index 652cb174302e..2b6a607ac0b7 100644 --- a/drivers/net/fddi/skfp/skfddi.c +++ b/drivers/net/fddi/skfp/skfddi.c @@ -925,7 +925,7 @@ static int skfp_ctl_set_mac_address(struct net_device *dev, void *addr) unsigned long Flags; - memcpy(dev->dev_addr, p_sockaddr->sa_data, FDDI_K_ALEN); + dev_addr_set(dev, p_sockaddr->sa_data); spin_lock_irqsave(&bp->DriverLock, Flags); ResetAdapter(smc); spin_unlock_irqrestore(&bp->DriverLock, Flags); diff --git a/drivers/net/fddi/skfp/smtinit.c b/drivers/net/fddi/skfp/smtinit.c index c9898c83fe30..8b172c195685 100644 --- a/drivers/net/fddi/skfp/smtinit.c +++ b/drivers/net/fddi/skfp/smtinit.c @@ -19,7 +19,7 @@ #include "h/fddi.h" #include "h/smc.h" -void init_fddi_driver(struct s_smc *smc, u_char *mac_addr); +void init_fddi_driver(struct s_smc *smc, const u_char *mac_addr); /* define global debug variable */ #if defined(DEBUG) && !defined(DEBUG_BRD) @@ -57,7 +57,7 @@ static void set_oem_spec_val(struct s_smc *smc) /* * Init SMT */ -int init_smt(struct s_smc *smc, u_char *mac_addr) +int init_smt(struct s_smc *smc, const u_char *mac_addr) /* u_char *mac_addr; canonical address or NULL */ { int p ; From ed088907563da490d227a734076d705acc0d236c Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 21 Oct 2021 06:12:09 -0700 Subject: [PATCH 435/440] net: fjes: constify and use eth_hw_addr_set() Get it ready for constant netdev->dev_addr. Signed-off-by: Jakub Kicinski --- drivers/net/fjes/fjes_hw.c | 3 ++- drivers/net/fjes/fjes_hw.h | 2 +- drivers/net/fjes/fjes_main.c | 14 ++++++++------ 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/net/fjes/fjes_hw.c b/drivers/net/fjes/fjes_hw.c index 065bb0a40b1d..704e949484d0 100644 --- a/drivers/net/fjes/fjes_hw.c +++ b/drivers/net/fjes/fjes_hw.c @@ -137,7 +137,8 @@ static void fjes_hw_free_epbuf(struct epbuf_handler *epbh) epbh->ring = NULL; } -void fjes_hw_setup_epbuf(struct epbuf_handler *epbh, u8 *mac_addr, u32 mtu) +void fjes_hw_setup_epbuf(struct epbuf_handler *epbh, const u8 *mac_addr, + u32 mtu) { union ep_buffer_info *info = epbh->info; u16 vlan_id[EP_BUFFER_SUPPORT_VLAN_MAX]; diff --git a/drivers/net/fjes/fjes_hw.h b/drivers/net/fjes/fjes_hw.h index b4608ea2a2d5..997c7b37a402 100644 --- a/drivers/net/fjes/fjes_hw.h +++ b/drivers/net/fjes/fjes_hw.h @@ -330,7 +330,7 @@ int fjes_hw_register_buff_addr(struct fjes_hw *, int, int fjes_hw_unregister_buff_addr(struct fjes_hw *, int); void fjes_hw_init_command_registers(struct fjes_hw *, struct fjes_device_command_param *); -void fjes_hw_setup_epbuf(struct epbuf_handler *, u8 *, u32); +void fjes_hw_setup_epbuf(struct epbuf_handler *, const u8 *, u32); int fjes_hw_raise_interrupt(struct fjes_hw *, int, enum REG_ICTL_MASK); void fjes_hw_set_irqmask(struct fjes_hw *, enum REG_ICTL_MASK, bool); u32 fjes_hw_capture_interrupt_status(struct fjes_hw *); diff --git a/drivers/net/fjes/fjes_main.c b/drivers/net/fjes/fjes_main.c index 185c8a398681..b06c17ac8d4e 100644 --- a/drivers/net/fjes/fjes_main.c +++ b/drivers/net/fjes/fjes_main.c @@ -1203,6 +1203,7 @@ static int fjes_probe(struct platform_device *plat_dev) struct net_device *netdev; struct resource *res; struct fjes_hw *hw; + u8 addr[ETH_ALEN]; int err; err = -ENOMEM; @@ -1266,12 +1267,13 @@ static int fjes_probe(struct platform_device *plat_dev) goto err_free_control_wq; /* setup MAC address (02:00:00:00:00:[epid])*/ - netdev->dev_addr[0] = 2; - netdev->dev_addr[1] = 0; - netdev->dev_addr[2] = 0; - netdev->dev_addr[3] = 0; - netdev->dev_addr[4] = 0; - netdev->dev_addr[5] = hw->my_epid; /* EPID */ + addr[0] = 2; + addr[1] = 0; + addr[2] = 0; + addr[3] = 0; + addr[4] = 0; + addr[5] = hw->my_epid; /* EPID */ + eth_hw_addr_set(netdev, addr); err = register_netdev(netdev); if (err) From 5ed5b1912a81b9084fad2ef2b126990d18c83827 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 21 Oct 2021 06:12:10 -0700 Subject: [PATCH 436/440] net: hippi: use dev_addr_set() Commit 406f42fa0d3c ("net-next: When a bond have a massive amount of VLANs...") introduced a rbtree for faster Ethernet address look up. To maintain netdev->dev_addr in this tree we need to make all the writes to it got through appropriate helpers. Signed-off-by: Jakub Kicinski --- drivers/net/hippi/rrunner.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/hippi/rrunner.c b/drivers/net/hippi/rrunner.c index 7661dbb31162..16105292b140 100644 --- a/drivers/net/hippi/rrunner.c +++ b/drivers/net/hippi/rrunner.c @@ -502,6 +502,7 @@ static unsigned int write_eeprom(struct rr_private *rrpriv, static int rr_init(struct net_device *dev) { + u8 addr[HIPPI_ALEN] __aligned(4); struct rr_private *rrpriv; struct rr_regs __iomem *regs; u32 sram_size, rev; @@ -537,10 +538,11 @@ static int rr_init(struct net_device *dev) * other method I've seen. -VAL */ - *(__be16 *)(dev->dev_addr) = + *(__be16 *)(addr) = htons(rr_read_eeprom_word(rrpriv, offsetof(struct eeprom, manf.BoardULA))); - *(__be32 *)(dev->dev_addr+2) = + *(__be32 *)(addr+2) = htonl(rr_read_eeprom_word(rrpriv, offsetof(struct eeprom, manf.BoardULA[4]))); + dev_addr_set(dev, addr); printk(" MAC: %pM\n", dev->dev_addr); From 978bb0ae8b83097b2889f19907d2b519528ef235 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 21 Oct 2021 06:12:11 -0700 Subject: [PATCH 437/440] net: s390: constify and use eth_hw_addr_set() Commit 406f42fa0d3c ("net-next: When a bond have a massive amount of VLANs...") introduced a rbtree for faster Ethernet address look up. To maintain netdev->dev_addr in this tree we need to make all the writes to it got through appropriate helpers. Make sure local references to netdev->dev_addr are constant. Acked-by: Julian Wiedmann Signed-off-by: Jakub Kicinski --- drivers/s390/net/lcs.c | 2 +- drivers/s390/net/qeth_core_main.c | 4 ++-- drivers/s390/net/qeth_l2_main.c | 6 +++--- drivers/s390/net/qeth_l3_main.c | 3 +-- 4 files changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/s390/net/lcs.c b/drivers/s390/net/lcs.c index c18fd48e02b6..2a6479740600 100644 --- a/drivers/s390/net/lcs.c +++ b/drivers/s390/net/lcs.c @@ -2162,7 +2162,7 @@ lcs_new_device(struct ccwgroup_device *ccwgdev) card->dev->ml_priv = card; card->dev->netdev_ops = &lcs_netdev_ops; card->dev->dev_port = card->portno; - memcpy(card->dev->dev_addr, card->mac, LCS_MAC_LENGTH); + eth_hw_addr_set(card->dev, card->mac); #ifdef CONFIG_IP_MULTICAST if (!lcs_check_multicast_support(card)) card->dev->netdev_ops = &lcs_mc_netdev_ops; diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index e9807d2996a9..15999a816054 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -4375,7 +4375,7 @@ static int qeth_setadpparms_change_macaddr_cb(struct qeth_card *card, !(adp_cmd->hdr.flags & QETH_SETADP_FLAGS_VIRTUAL_MAC)) return -EADDRNOTAVAIL; - ether_addr_copy(card->dev->dev_addr, adp_cmd->data.change_addr.addr); + eth_hw_addr_set(card->dev, adp_cmd->data.change_addr.addr); return 0; } @@ -5046,7 +5046,7 @@ int qeth_vm_request_mac(struct qeth_card *card) QETH_CARD_TEXT(card, 2, "badmac"); QETH_CARD_HEX(card, 2, response->mac, ETH_ALEN); } else { - ether_addr_copy(card->dev->dev_addr, response->mac); + eth_hw_addr_set(card->dev, response->mac); } out: diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index dc6c00768d91..5b6187f2d9d6 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -71,7 +71,7 @@ static int qeth_l2_send_setdelmac_cb(struct qeth_card *card, return qeth_l2_setdelmac_makerc(card, cmd->hdr.return_code); } -static int qeth_l2_send_setdelmac(struct qeth_card *card, __u8 *mac, +static int qeth_l2_send_setdelmac(struct qeth_card *card, const __u8 *mac, enum qeth_ipa_cmds ipacmd) { struct qeth_ipa_cmd *cmd; @@ -88,7 +88,7 @@ static int qeth_l2_send_setdelmac(struct qeth_card *card, __u8 *mac, return qeth_send_ipa_cmd(card, iob, qeth_l2_send_setdelmac_cb, NULL); } -static int qeth_l2_send_setmac(struct qeth_card *card, __u8 *mac) +static int qeth_l2_send_setmac(struct qeth_card *card, const __u8 *mac) { int rc; @@ -377,7 +377,7 @@ static int qeth_l2_set_mac_address(struct net_device *dev, void *p) if (rc) return rc; ether_addr_copy(old_addr, dev->dev_addr); - ether_addr_copy(dev->dev_addr, addr->sa_data); + eth_hw_addr_set(dev, addr->sa_data); if (card->info.dev_addr_is_registered) qeth_l2_remove_mac(card, old_addr); diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 6fd3e288f059..e6e921310211 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -913,8 +913,7 @@ static int qeth_l3_iqd_read_initial_mac_cb(struct qeth_card *card, if (!is_valid_ether_addr(cmd->data.create_destroy_addr.mac_addr)) return -EADDRNOTAVAIL; - ether_addr_copy(card->dev->dev_addr, - cmd->data.create_destroy_addr.mac_addr); + eth_hw_addr_set(card->dev, cmd->data.create_destroy_addr.mac_addr); return 0; } From 7996acffd7cc83504eb300e791c80ac9c7f7e893 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 21 Oct 2021 06:12:12 -0700 Subject: [PATCH 438/440] net: plip: use eth_hw_addr_set() Get it ready for constant netdev->dev_addr. Signed-off-by: Jakub Kicinski --- drivers/net/plip/plip.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/plip/plip.c b/drivers/net/plip/plip.c index 82d609401711..0d491b4d6667 100644 --- a/drivers/net/plip/plip.c +++ b/drivers/net/plip/plip.c @@ -284,12 +284,16 @@ static const struct net_device_ops plip_netdev_ops = { static void plip_init_netdev(struct net_device *dev) { + static const u8 addr_init[ETH_ALEN] = { + 0xfc, 0xfc, 0xfc, + 0xfc, 0xfc, 0xfc, + }; struct net_local *nl = netdev_priv(dev); /* Then, override parts of it */ dev->tx_queue_len = 10; dev->flags = IFF_POINTOPOINT|IFF_NOARP; - memset(dev->dev_addr, 0xfc, ETH_ALEN); + eth_hw_addr_set(dev, addr_init); dev->netdev_ops = &plip_netdev_ops; dev->header_ops = &plip_header_ops; @@ -1109,7 +1113,7 @@ plip_open(struct net_device *dev) plip_init_dev(). */ const struct in_ifaddr *ifa = rcu_dereference(in_dev->ifa_list); if (ifa != NULL) { - memcpy(dev->dev_addr+2, &ifa->ifa_local, 4); + dev_addr_mod(dev, 2, &ifa->ifa_local, 4); } } From 5f07da89bcd0759ac8391b917245a07fbdcf9234 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 21 Oct 2021 06:12:13 -0700 Subject: [PATCH 439/440] net: sb1000,rionet: use eth_hw_addr_set() Get these two oldies ready for constant netdev->dev_addr. Signed-off-by: Jakub Kicinski --- drivers/net/rionet.c | 14 ++++++++------ drivers/net/sb1000.c | 12 ++++++++---- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/drivers/net/rionet.c b/drivers/net/rionet.c index 2056d6ad04b5..1a95f3beb784 100644 --- a/drivers/net/rionet.c +++ b/drivers/net/rionet.c @@ -482,6 +482,7 @@ static int rionet_setup_netdev(struct rio_mport *mport, struct net_device *ndev) { int rc = 0; struct rionet_private *rnet; + u8 addr[ETH_ALEN]; u16 device_id; const size_t rionet_active_bytes = sizeof(void *) * RIO_MAX_ROUTE_ENTRIES(mport->sys_size); @@ -501,12 +502,13 @@ static int rionet_setup_netdev(struct rio_mport *mport, struct net_device *ndev) /* Set the default MAC address */ device_id = rio_local_get_device_id(mport); - ndev->dev_addr[0] = 0x00; - ndev->dev_addr[1] = 0x01; - ndev->dev_addr[2] = 0x00; - ndev->dev_addr[3] = 0x01; - ndev->dev_addr[4] = device_id >> 8; - ndev->dev_addr[5] = device_id & 0xff; + addr[0] = 0x00; + addr[1] = 0x01; + addr[2] = 0x00; + addr[3] = 0x01; + addr[4] = device_id >> 8; + addr[5] = device_id & 0xff; + eth_hw_addr_set(ndev, addr); ndev->netdev_ops = &rionet_netdev_ops; ndev->mtu = RIONET_MAX_MTU; diff --git a/drivers/net/sb1000.c b/drivers/net/sb1000.c index f01c9db01b16..57a6d598467b 100644 --- a/drivers/net/sb1000.c +++ b/drivers/net/sb1000.c @@ -149,6 +149,7 @@ sb1000_probe_one(struct pnp_dev *pdev, const struct pnp_device_id *id) unsigned short ioaddr[2], irq; unsigned int serial_number; int error = -ENODEV; + u8 addr[ETH_ALEN]; if (pnp_device_attach(pdev) < 0) return -ENODEV; @@ -203,10 +204,13 @@ sb1000_probe_one(struct pnp_dev *pdev, const struct pnp_device_id *id) dev->netdev_ops = &sb1000_netdev_ops; /* hardware address is 0:0:serial_number */ - dev->dev_addr[2] = serial_number >> 24 & 0xff; - dev->dev_addr[3] = serial_number >> 16 & 0xff; - dev->dev_addr[4] = serial_number >> 8 & 0xff; - dev->dev_addr[5] = serial_number >> 0 & 0xff; + addr[0] = 0; + addr[1] = 0; + addr[2] = serial_number >> 24 & 0xff; + addr[3] = serial_number >> 16 & 0xff; + addr[4] = serial_number >> 8 & 0xff; + addr[5] = serial_number >> 0 & 0xff; + eth_hw_addr_set(dev, addr); pnp_set_drvdata(pdev, dev); From 65a4fbbf22636af5dd110fc272fac6b0d84e42fc Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 21 Oct 2021 06:12:14 -0700 Subject: [PATCH 440/440] net: hldc_fr: use dev_addr_set() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 406f42fa0d3c ("net-next: When a bond have a massive amount of VLANs...") introduced a rbtree for faster Ethernet address look up. To maintain netdev->dev_addr in this tree we need to make all the writes to it got through appropriate helpers. Acked-by: Krzysztof Hałasa Signed-off-by: Jakub Kicinski --- drivers/net/wan/hdlc_fr.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wan/hdlc_fr.c b/drivers/net/wan/hdlc_fr.c index 7637edce443e..81e72bc1891f 100644 --- a/drivers/net/wan/hdlc_fr.c +++ b/drivers/net/wan/hdlc_fr.c @@ -1093,7 +1093,9 @@ static int fr_add_pvc(struct net_device *frad, unsigned int dlci, int type) dev->priv_flags &= ~IFF_TX_SKB_SHARING; eth_hw_addr_random(dev); } else { - *(__be16 *)dev->dev_addr = htons(dlci); + __be16 addr = htons(dlci); + + dev_addr_set(dev, (u8 *)&addr); dlci_to_q922(dev->broadcast, dlci); } dev->netdev_ops = &pvc_ops;