From 195bb48fccdef4965a65579ef05db8a8fcba8dca Mon Sep 17 00:00:00 2001 From: Michal Swiatkowski Date: Tue, 12 Oct 2021 11:31:03 -0700 Subject: [PATCH 01/38] ice: support for indirect notification Implement indirect notification mechanism to support offloading TC rules on tunnel devices. Keep indirect block list in netdev priv. Notification will call setting tc cls flower function. For now we can offload only ingress type. Return not supported for other flow block binder. Signed-off-by: Michal Swiatkowski Acked-by: Paul Menzel Tested-by: Sandeep Penigalapati Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice.h | 8 + drivers/net/ethernet/intel/ice/ice_main.c | 188 +++++++++++++++++++- drivers/net/ethernet/intel/ice/ice_tc_lib.h | 6 + 3 files changed, 200 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 967a90efcb11..763add09559c 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -610,6 +611,13 @@ struct ice_pf { struct ice_netdev_priv { struct ice_vsi *vsi; struct ice_repr *repr; + /* indirect block callbacks on registered higher level devices + * (e.g. tunnel devices) + * + * tc_indr_block_cb_priv_list is used to look up indirect callback + * private data + */ + struct list_head tc_indr_block_priv_list; }; /** diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 9ba22778011d..e6a8a07d30e5 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -58,6 +58,12 @@ static void ice_vsi_release_all(struct ice_pf *pf); static int ice_rebuild_channels(struct ice_pf *pf); static void ice_remove_q_channels(struct ice_vsi *vsi, bool rem_adv_fltr); +static int +ice_indr_setup_tc_cb(struct net_device *netdev, struct Qdisc *sch, + void *cb_priv, enum tc_setup_type type, void *type_data, + void *data, + void (*cleanup)(struct flow_block_cb *block_cb)); + bool netif_is_ice(struct net_device *dev) { return dev && (dev->netdev_ops == &ice_netdev_ops); @@ -3393,6 +3399,63 @@ ice_vlan_rx_kill_vid(struct net_device *netdev, __always_unused __be16 proto, return ret; } +/** + * ice_rep_indr_tc_block_unbind + * @cb_priv: indirection block private data + */ +static void ice_rep_indr_tc_block_unbind(void *cb_priv) +{ + struct ice_indr_block_priv *indr_priv = cb_priv; + + list_del(&indr_priv->list); + kfree(indr_priv); +} + +/** + * ice_tc_indir_block_unregister - Unregister TC indirect block notifications + * @vsi: VSI struct which has the netdev + */ +static void ice_tc_indir_block_unregister(struct ice_vsi *vsi) +{ + struct ice_netdev_priv *np = netdev_priv(vsi->netdev); + + flow_indr_dev_unregister(ice_indr_setup_tc_cb, np, + ice_rep_indr_tc_block_unbind); +} + +/** + * ice_tc_indir_block_remove - clean indirect TC block notifications + * @pf: PF structure + */ +static void ice_tc_indir_block_remove(struct ice_pf *pf) +{ + struct ice_vsi *pf_vsi = ice_get_main_vsi(pf); + + if (!pf_vsi) + return; + + ice_tc_indir_block_unregister(pf_vsi); +} + +/** + * ice_tc_indir_block_register - Register TC indirect block notifications + * @vsi: VSI struct which has the netdev + * + * Returns 0 on success, negative value on failure + */ +static int ice_tc_indir_block_register(struct ice_vsi *vsi) +{ + struct ice_netdev_priv *np; + + if (!vsi || !vsi->netdev) + return -EINVAL; + + np = netdev_priv(vsi->netdev); + + INIT_LIST_HEAD(&np->tc_indr_block_priv_list); + return flow_indr_dev_register(ice_indr_setup_tc_cb, np); +} + /** * ice_setup_pf_sw - Setup the HW switch on startup or after reset * @pf: board private structure @@ -3401,6 +3464,7 @@ ice_vlan_rx_kill_vid(struct net_device *netdev, __always_unused __be16 proto, */ static int ice_setup_pf_sw(struct ice_pf *pf) { + struct device *dev = ice_pf_to_dev(pf); struct ice_vsi *vsi; int status = 0; @@ -3422,6 +3486,13 @@ static int ice_setup_pf_sw(struct ice_pf *pf) /* netdev has to be configured before setting frame size */ ice_vsi_cfg_frame_size(vsi); + /* init indirect block notifications */ + status = ice_tc_indir_block_register(vsi); + if (status) { + dev_err(dev, "Failed to register netdev notifier\n"); + goto unroll_cfg_netdev; + } + /* Setup DCB netlink interface */ ice_dcbnl_setup(vsi); @@ -3433,7 +3504,7 @@ static int ice_setup_pf_sw(struct ice_pf *pf) status = ice_set_cpu_rx_rmap(vsi); if (status) { - dev_err(ice_pf_to_dev(pf), "Failed to set CPU Rx map VSI %d error %d\n", + dev_err(dev, "Failed to set CPU Rx map VSI %d error %d\n", vsi->vsi_num, status); status = -EINVAL; goto unroll_napi_add; @@ -3446,8 +3517,9 @@ static int ice_setup_pf_sw(struct ice_pf *pf) free_cpu_rx_map: ice_free_cpu_rx_rmap(vsi); - unroll_napi_add: + ice_tc_indir_block_unregister(vsi); +unroll_cfg_netdev: if (vsi) { ice_napi_del(vsi); if (vsi->netdev) { @@ -4721,6 +4793,8 @@ static void ice_remove(struct pci_dev *pdev) msleep(100); } + ice_tc_indir_block_remove(pf); + if (test_bit(ICE_FLAG_SRIOV_ENA, pf->flags)) { set_bit(ICE_VF_RESETS_DISABLED, pf->state); ice_free_vfs(pf); @@ -8155,6 +8229,116 @@ ice_setup_tc(struct net_device *netdev, enum tc_setup_type type, return -EOPNOTSUPP; } +static struct ice_indr_block_priv * +ice_indr_block_priv_lookup(struct ice_netdev_priv *np, + struct net_device *netdev) +{ + struct ice_indr_block_priv *cb_priv; + + list_for_each_entry(cb_priv, &np->tc_indr_block_priv_list, list) { + if (!cb_priv->netdev) + return NULL; + if (cb_priv->netdev == netdev) + return cb_priv; + } + return NULL; +} + +static int +ice_indr_setup_block_cb(enum tc_setup_type type, void *type_data, + void *indr_priv) +{ + struct ice_indr_block_priv *priv = indr_priv; + struct ice_netdev_priv *np = priv->np; + + switch (type) { + case TC_SETUP_CLSFLOWER: + return ice_setup_tc_cls_flower(np, priv->netdev, + (struct flow_cls_offload *) + type_data); + default: + return -EOPNOTSUPP; + } +} + +static int +ice_indr_setup_tc_block(struct net_device *netdev, struct Qdisc *sch, + struct ice_netdev_priv *np, + struct flow_block_offload *f, void *data, + void (*cleanup)(struct flow_block_cb *block_cb)) +{ + struct ice_indr_block_priv *indr_priv; + struct flow_block_cb *block_cb; + + if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS) + return -EOPNOTSUPP; + + switch (f->command) { + case FLOW_BLOCK_BIND: + indr_priv = ice_indr_block_priv_lookup(np, netdev); + if (indr_priv) + return -EEXIST; + + indr_priv = kzalloc(sizeof(*indr_priv), GFP_KERNEL); + if (!indr_priv) + return -ENOMEM; + + indr_priv->netdev = netdev; + indr_priv->np = np; + list_add(&indr_priv->list, &np->tc_indr_block_priv_list); + + block_cb = + flow_indr_block_cb_alloc(ice_indr_setup_block_cb, + indr_priv, indr_priv, + ice_rep_indr_tc_block_unbind, + f, netdev, sch, data, np, + cleanup); + + if (IS_ERR(block_cb)) { + list_del(&indr_priv->list); + kfree(indr_priv); + return PTR_ERR(block_cb); + } + flow_block_cb_add(block_cb, f); + list_add_tail(&block_cb->driver_list, &ice_block_cb_list); + break; + case FLOW_BLOCK_UNBIND: + indr_priv = ice_indr_block_priv_lookup(np, netdev); + if (!indr_priv) + return -ENOENT; + + block_cb = flow_block_cb_lookup(f->block, + ice_indr_setup_block_cb, + indr_priv); + if (!block_cb) + return -ENOENT; + + flow_indr_block_cb_remove(block_cb, f); + + list_del(&block_cb->driver_list); + break; + default: + return -EOPNOTSUPP; + } + return 0; +} + +static int +ice_indr_setup_tc_cb(struct net_device *netdev, struct Qdisc *sch, + void *cb_priv, enum tc_setup_type type, void *type_data, + void *data, + void (*cleanup)(struct flow_block_cb *block_cb)) +{ + switch (type) { + case TC_SETUP_BLOCK: + return ice_indr_setup_tc_block(netdev, sch, cb_priv, type_data, + data, cleanup); + + default: + return -EOPNOTSUPP; + } +} + /** * ice_open - Called when a network interface becomes active * @netdev: network interface device structure diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.h b/drivers/net/ethernet/intel/ice/ice_tc_lib.h index ee9b284fcc02..99ececeef445 100644 --- a/drivers/net/ethernet/intel/ice/ice_tc_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.h @@ -23,6 +23,12 @@ #define ICE_TC_FLWR_FIELD_ENC_DST_MAC BIT(16) #define ICE_TC_FLWR_FIELD_ETH_TYPE_ID BIT(17) +struct ice_indr_block_priv { + struct net_device *netdev; + struct ice_netdev_priv *np; + struct list_head list; +}; + struct ice_tc_flower_action { u32 tc_class; enum ice_sw_fwd_act_type fltr_act; From 9e300987d4a81fb95c323f042dd5aa484f4eb3dd Mon Sep 17 00:00:00 2001 From: Michal Swiatkowski Date: Tue, 12 Oct 2021 11:31:04 -0700 Subject: [PATCH 02/38] ice: VXLAN and Geneve TC support Add definition for VXLAN and Geneve dummy packet. Define VXLAN and Geneve type of fields to match on correct UDP tunnel header. Parse tunnel specific fields from TC tool like outer MACs, outer IPs, outer destination port and VNI. Save values and masks in outer header struct and move header pointer to inner to simplify parsing inner values. There are two cases for redirect action: - from uplink to VF - TC filter is added on tunnel device - from VF to uplink - TC filter is added on PR, for this case check if redirect device is tunnel device VXLAN example: - create tunnel device ip l add $VXLAN_DEV type vxlan id $VXLAN_VNI dstport $VXLAN_PORT \ dev $PF - add TC filter (in switchdev mode) tc filter add dev $VXLAN_DEV protocol ip parent ffff: flower \ enc_dst_ip $VF1_IP enc_key_id $VXLAN_VNI action mirred egress \ redirect dev $VF1_PR Geneve example: - create tunnel device ip l add $GENEVE_DEV type geneve id $GENEVE_VNI dstport $GENEVE_PORT \ remote $GENEVE_IP - add TC filter (in switchdev mode) tc filter add dev $GENEVE_DEV protocol ip parent ffff: flower \ enc_key_id $GENEVE_VNI dst_ip $GENEVE1_IP action mirred egress \ redirect dev $VF1_PR Signed-off-by: Michal Swiatkowski Tested-by: Sandeep Penigalapati Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_main.c | 5 + .../ethernet/intel/ice/ice_protocol_type.h | 16 + drivers/net/ethernet/intel/ice/ice_switch.h | 1 + drivers/net/ethernet/intel/ice/ice_tc_lib.c | 380 ++++++++++++++++-- drivers/net/ethernet/intel/ice/ice_tc_lib.h | 4 + 5 files changed, 362 insertions(+), 44 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index e6a8a07d30e5..2ebbbe1edd82 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -8270,6 +8270,11 @@ ice_indr_setup_tc_block(struct net_device *netdev, struct Qdisc *sch, struct ice_indr_block_priv *indr_priv; struct flow_block_cb *block_cb; + if (!ice_is_tunnel_supported(netdev) && + !(is_vlan_dev(netdev) && + vlan_dev_real_dev(netdev) == np->vsi->netdev)) + return -EOPNOTSUPP; + if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS) return -EOPNOTSUPP; diff --git a/drivers/net/ethernet/intel/ice/ice_protocol_type.h b/drivers/net/ethernet/intel/ice/ice_protocol_type.h index 0b220dfa7457..d717d1158545 100644 --- a/drivers/net/ethernet/intel/ice/ice_protocol_type.h +++ b/drivers/net/ethernet/intel/ice/ice_protocol_type.h @@ -37,10 +37,19 @@ enum ice_protocol_type { ICE_TCP_IL, ICE_UDP_OF, ICE_UDP_ILOS, + ICE_VXLAN, + ICE_GENEVE, + ICE_VXLAN_GPE, ICE_SCTP_IL, ICE_PROTOCOL_LAST }; +enum ice_sw_tunnel_type { + ICE_NON_TUN = 0, + ICE_SW_TUN_VXLAN, + ICE_SW_TUN_GENEVE, +}; + /* Decoders for ice_prot_id: * - F: First * - I: Inner @@ -152,6 +161,12 @@ struct ice_l4_hdr { __be16 check; }; +struct ice_udp_tnl_hdr { + __be16 field; + __be16 proto_type; + __be32 vni; /* only use lower 24-bits */ +}; + union ice_prot_hdr { struct ice_ether_hdr eth_hdr; struct ice_ethtype_hdr ethertype; @@ -160,6 +175,7 @@ union ice_prot_hdr { struct ice_ipv6_hdr ipv6_hdr; struct ice_l4_hdr l4_hdr; struct ice_sctp_hdr sctp_hdr; + struct ice_udp_tnl_hdr tnl_hdr; }; /* This is mapping table entry that maps every word within a given protocol diff --git a/drivers/net/ethernet/intel/ice/ice_switch.h b/drivers/net/ethernet/intel/ice/ice_switch.h index c4dd2062c469..7d661c9be81b 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.h +++ b/drivers/net/ethernet/intel/ice/ice_switch.h @@ -171,6 +171,7 @@ struct ice_adv_rule_flags_info { }; struct ice_adv_rule_info { + enum ice_sw_tunnel_type tun_type; struct ice_sw_act_ctrl sw_act; u32 priority; u8 rx; /* true means LOOKUP_RX otherwise LOOKUP_TX */ diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.c b/drivers/net/ethernet/intel/ice/ice_tc_lib.c index 725caa160b13..920d9024a6c1 100644 --- a/drivers/net/ethernet/intel/ice/ice_tc_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.c @@ -3,8 +3,9 @@ #include "ice.h" #include "ice_tc_lib.h" -#include "ice_lib.h" #include "ice_fltr.h" +#include "ice_lib.h" +#include "ice_protocol_type.h" /** * ice_tc_count_lkups - determine lookup count for switch filter @@ -20,7 +21,21 @@ ice_tc_count_lkups(u32 flags, struct ice_tc_flower_lyr_2_4_hdrs *headers, { int lkups_cnt = 0; - if (flags & ICE_TC_FLWR_FIELD_ETH_TYPE_ID) + if (flags & ICE_TC_FLWR_FIELD_TENANT_ID) + lkups_cnt++; + + if (flags & (ICE_TC_FLWR_FIELD_ENC_SRC_IPV4 | + ICE_TC_FLWR_FIELD_ENC_DEST_IPV4 | + ICE_TC_FLWR_FIELD_ENC_SRC_IPV6 | + ICE_TC_FLWR_FIELD_ENC_DEST_IPV6)) + lkups_cnt++; + + if (flags & ICE_TC_FLWR_FIELD_ENC_DEST_L4_PORT) + lkups_cnt++; + + /* currently inner etype filter isn't supported */ + if ((flags & ICE_TC_FLWR_FIELD_ETH_TYPE_ID) && + fltr->tunnel_type == TNL_LAST) lkups_cnt++; /* are MAC fields specified? */ @@ -32,10 +47,8 @@ ice_tc_count_lkups(u32 flags, struct ice_tc_flower_lyr_2_4_hdrs *headers, lkups_cnt++; /* are IPv[4|6] fields specified? */ - if (flags & (ICE_TC_FLWR_FIELD_DEST_IPV4 | ICE_TC_FLWR_FIELD_SRC_IPV4)) - lkups_cnt++; - else if (flags & (ICE_TC_FLWR_FIELD_DEST_IPV6 | - ICE_TC_FLWR_FIELD_SRC_IPV6)) + if (flags & (ICE_TC_FLWR_FIELD_DEST_IPV4 | ICE_TC_FLWR_FIELD_SRC_IPV4 | + ICE_TC_FLWR_FIELD_DEST_IPV6 | ICE_TC_FLWR_FIELD_SRC_IPV6)) lkups_cnt++; /* is L4 (TCP/UDP/any other L4 protocol fields) specified? */ @@ -46,6 +59,132 @@ ice_tc_count_lkups(u32 flags, struct ice_tc_flower_lyr_2_4_hdrs *headers, return lkups_cnt; } +static enum ice_protocol_type ice_proto_type_from_mac(bool inner) +{ + return inner ? ICE_MAC_IL : ICE_MAC_OFOS; +} + +static enum ice_protocol_type ice_proto_type_from_ipv4(bool inner) +{ + return inner ? ICE_IPV4_IL : ICE_IPV4_OFOS; +} + +static enum ice_protocol_type ice_proto_type_from_ipv6(bool inner) +{ + return inner ? ICE_IPV6_IL : ICE_IPV6_OFOS; +} + +static enum ice_protocol_type +ice_proto_type_from_l4_port(bool inner, u16 ip_proto) +{ + if (inner) { + switch (ip_proto) { + case IPPROTO_UDP: + return ICE_UDP_ILOS; + } + } else { + switch (ip_proto) { + case IPPROTO_TCP: + return ICE_TCP_IL; + case IPPROTO_UDP: + return ICE_UDP_OF; + } + } + + return 0; +} + +static enum ice_protocol_type +ice_proto_type_from_tunnel(enum ice_tunnel_type type) +{ + switch (type) { + case TNL_VXLAN: + return ICE_VXLAN; + case TNL_GENEVE: + return ICE_GENEVE; + default: + return 0; + } +} + +static enum ice_sw_tunnel_type +ice_sw_type_from_tunnel(enum ice_tunnel_type type) +{ + switch (type) { + case TNL_VXLAN: + return ICE_SW_TUN_VXLAN; + case TNL_GENEVE: + return ICE_SW_TUN_GENEVE; + default: + return ICE_NON_TUN; + } +} + +static int +ice_tc_fill_tunnel_outer(u32 flags, struct ice_tc_flower_fltr *fltr, + struct ice_adv_lkup_elem *list) +{ + struct ice_tc_flower_lyr_2_4_hdrs *hdr = &fltr->outer_headers; + int i = 0; + + if (flags & ICE_TC_FLWR_FIELD_TENANT_ID) { + u32 tenant_id; + + list[i].type = ice_proto_type_from_tunnel(fltr->tunnel_type); + tenant_id = be32_to_cpu(fltr->tenant_id) << 8; + list[i].h_u.tnl_hdr.vni = cpu_to_be32(tenant_id); + memcpy(&list[i].m_u.tnl_hdr.vni, "\xff\xff\xff\x00", 4); + i++; + } + + if (flags & (ICE_TC_FLWR_FIELD_ENC_SRC_IPV4 | + ICE_TC_FLWR_FIELD_ENC_DEST_IPV4)) { + list[i].type = ice_proto_type_from_ipv4(false); + + if (flags & ICE_TC_FLWR_FIELD_ENC_SRC_IPV4) { + list[i].h_u.ipv4_hdr.src_addr = hdr->l3_key.src_ipv4; + list[i].m_u.ipv4_hdr.src_addr = hdr->l3_mask.src_ipv4; + } + if (flags & ICE_TC_FLWR_FIELD_ENC_DEST_IPV4) { + list[i].h_u.ipv4_hdr.dst_addr = hdr->l3_key.dst_ipv4; + list[i].m_u.ipv4_hdr.dst_addr = hdr->l3_mask.dst_ipv4; + } + i++; + } + + if (flags & (ICE_TC_FLWR_FIELD_ENC_SRC_IPV6 | + ICE_TC_FLWR_FIELD_ENC_DEST_IPV6)) { + list[i].type = ice_proto_type_from_ipv6(false); + + if (flags & ICE_TC_FLWR_FIELD_ENC_SRC_IPV6) { + memcpy(&list[i].h_u.ipv6_hdr.src_addr, + &hdr->l3_key.src_ipv6_addr, + sizeof(hdr->l3_key.src_ipv6_addr)); + memcpy(&list[i].m_u.ipv6_hdr.src_addr, + &hdr->l3_mask.src_ipv6_addr, + sizeof(hdr->l3_mask.src_ipv6_addr)); + } + if (flags & ICE_TC_FLWR_FIELD_ENC_DEST_IPV6) { + memcpy(&list[i].h_u.ipv6_hdr.dst_addr, + &hdr->l3_key.dst_ipv6_addr, + sizeof(hdr->l3_key.dst_ipv6_addr)); + memcpy(&list[i].m_u.ipv6_hdr.dst_addr, + &hdr->l3_mask.dst_ipv6_addr, + sizeof(hdr->l3_mask.dst_ipv6_addr)); + } + i++; + } + + if (flags & ICE_TC_FLWR_FIELD_ENC_DEST_L4_PORT) { + list[i].type = ice_proto_type_from_l4_port(false, hdr->l3_key.ip_proto); + list[i].h_u.l4_hdr.dst_port = hdr->l4_key.dst_port; + list[i].m_u.l4_hdr.dst_port = hdr->l4_mask.dst_port; + i++; + } + + return i; +} + /** * ice_tc_fill_rules - fill filter rules based on TC fltr * @hw: pointer to HW structure @@ -67,9 +206,16 @@ ice_tc_fill_rules(struct ice_hw *hw, u32 flags, u16 *l4_proto) { struct ice_tc_flower_lyr_2_4_hdrs *headers = &tc_fltr->outer_headers; + bool inner = false; int i = 0; - if (flags & ICE_TC_FLWR_FIELD_ETH_TYPE_ID) { + rule_info->tun_type = ice_sw_type_from_tunnel(tc_fltr->tunnel_type); + if (tc_fltr->tunnel_type != TNL_LAST) { + i = ice_tc_fill_tunnel_outer(flags, tc_fltr, list); + + headers = &tc_fltr->inner_headers; + inner = true; + } else if (flags & ICE_TC_FLWR_FIELD_ETH_TYPE_ID) { list[i].type = ICE_ETYPE_OL; list[i].h_u.ethertype.ethtype_id = headers->l2_key.n_proto; list[i].m_u.ethertype.ethtype_id = headers->l2_mask.n_proto; @@ -83,7 +229,7 @@ ice_tc_fill_rules(struct ice_hw *hw, u32 flags, l2_key = &headers->l2_key; l2_mask = &headers->l2_mask; - list[i].type = ICE_MAC_OFOS; + list[i].type = ice_proto_type_from_mac(inner); if (flags & ICE_TC_FLWR_FIELD_DST_MAC) { ether_addr_copy(list[i].h_u.eth_hdr.dst_addr, l2_key->dst_mac); @@ -112,7 +258,7 @@ ice_tc_fill_rules(struct ice_hw *hw, u32 flags, ICE_TC_FLWR_FIELD_SRC_IPV4)) { struct ice_tc_l3_hdr *l3_key, *l3_mask; - list[i].type = ICE_IPV4_OFOS; + list[i].type = ice_proto_type_from_ipv4(inner); l3_key = &headers->l3_key; l3_mask = &headers->l3_mask; if (flags & ICE_TC_FLWR_FIELD_DEST_IPV4) { @@ -129,7 +275,7 @@ ice_tc_fill_rules(struct ice_hw *hw, u32 flags, struct ice_ipv6_hdr *ipv6_hdr, *ipv6_mask; struct ice_tc_l3_hdr *l3_key, *l3_mask; - list[i].type = ICE_IPV6_OFOS; + list[i].type = ice_proto_type_from_ipv6(inner); ipv6_hdr = &list[i].h_u.ipv6_hdr; ipv6_mask = &list[i].m_u.ipv6_hdr; l3_key = &headers->l3_key; @@ -155,19 +301,10 @@ ice_tc_fill_rules(struct ice_hw *hw, u32 flags, ICE_TC_FLWR_FIELD_SRC_L4_PORT)) { struct ice_tc_l4_hdr *l4_key, *l4_mask; + list[i].type = ice_proto_type_from_l4_port(inner, headers->l3_key.ip_proto); l4_key = &headers->l4_key; l4_mask = &headers->l4_mask; - if (headers->l3_key.ip_proto == IPPROTO_TCP) { - list[i].type = ICE_TCP_IL; - /* detected L4 proto is TCP */ - if (l4_proto) - *l4_proto = IPPROTO_TCP; - } else if (headers->l3_key.ip_proto == IPPROTO_UDP) { - list[i].type = ICE_UDP_ILOS; - /* detected L4 proto is UDP */ - if (l4_proto) - *l4_proto = IPPROTO_UDP; - } + if (flags & ICE_TC_FLWR_FIELD_DEST_L4_PORT) { list[i].h_u.l4_hdr.dst_port = l4_key->dst_port; list[i].m_u.l4_hdr.dst_port = l4_mask->dst_port; @@ -182,6 +319,27 @@ ice_tc_fill_rules(struct ice_hw *hw, u32 flags, return i; } +/** + * ice_tc_tun_get_type - get the tunnel type + * @tunnel_dev: ptr to tunnel device + * + * This function detects appropriate tunnel_type if specified device is + * tunnel device such as VXLAN/Geneve + */ +static int ice_tc_tun_get_type(struct net_device *tunnel_dev) +{ + if (netif_is_vxlan(tunnel_dev)) + return TNL_VXLAN; + if (netif_is_geneve(tunnel_dev)) + return TNL_GENEVE; + return TNL_LAST; +} + +bool ice_is_tunnel_supported(struct net_device *dev) +{ + return ice_tc_tun_get_type(dev) != TNL_LAST; +} + static int ice_eswitch_tc_parse_action(struct ice_tc_flower_fltr *fltr, struct flow_action_entry *act) @@ -201,10 +359,8 @@ ice_eswitch_tc_parse_action(struct ice_tc_flower_fltr *fltr, fltr->dest_vsi = repr->src_vsi; fltr->direction = ICE_ESWITCH_FLTR_INGRESS; - } else if (netif_is_ice(act->dev)) { - struct ice_netdev_priv *np = netdev_priv(act->dev); - - fltr->dest_vsi = np->vsi; + } else if (netif_is_ice(act->dev) || + ice_is_tunnel_supported(act->dev)) { fltr->direction = ICE_ESWITCH_FLTR_EGRESS; } else { NL_SET_ERR_MSG_MOD(fltr->extack, "Unsupported netdevice in switchdev mode"); @@ -235,11 +391,7 @@ ice_eswitch_add_tc_fltr(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr) int ret = 0; int i; - if (!flags || (flags & (ICE_TC_FLWR_FIELD_ENC_DEST_IPV4 | - ICE_TC_FLWR_FIELD_ENC_SRC_IPV4 | - ICE_TC_FLWR_FIELD_ENC_DEST_IPV6 | - ICE_TC_FLWR_FIELD_ENC_SRC_IPV6 | - ICE_TC_FLWR_FIELD_ENC_SRC_L4_PORT))) { + if (!flags || (flags & ICE_TC_FLWR_FIELD_ENC_SRC_L4_PORT)) { NL_SET_ERR_MSG_MOD(fltr->extack, "Unsupported encap field(s)"); return -EOPNOTSUPP; } @@ -255,6 +407,10 @@ ice_eswitch_add_tc_fltr(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr) goto exit; } + /* egress traffic is always redirect to uplink */ + if (fltr->direction == ICE_ESWITCH_FLTR_EGRESS) + fltr->dest_vsi = vsi->back->switchdev.uplink_vsi; + rule_info.sw_act.fltr_act = fltr->action.fltr_act; if (fltr->action.fltr_act != ICE_DROP_PACKET) rule_info.sw_act.vsi_handle = fltr->dest_vsi->idx; @@ -438,19 +594,26 @@ exit: * @match: Pointer to flow match structure * @fltr: Pointer to filter structure * @headers: inner or outer header fields + * @is_encap: set true for tunnel IPv4 address */ static int ice_tc_set_ipv4(struct flow_match_ipv4_addrs *match, struct ice_tc_flower_fltr *fltr, - struct ice_tc_flower_lyr_2_4_hdrs *headers) + struct ice_tc_flower_lyr_2_4_hdrs *headers, bool is_encap) { if (match->key->dst) { - fltr->flags |= ICE_TC_FLWR_FIELD_DEST_IPV4; + if (is_encap) + fltr->flags |= ICE_TC_FLWR_FIELD_ENC_DEST_IPV4; + else + fltr->flags |= ICE_TC_FLWR_FIELD_DEST_IPV4; headers->l3_key.dst_ipv4 = match->key->dst; headers->l3_mask.dst_ipv4 = match->mask->dst; } if (match->key->src) { - fltr->flags |= ICE_TC_FLWR_FIELD_SRC_IPV4; + if (is_encap) + fltr->flags |= ICE_TC_FLWR_FIELD_ENC_SRC_IPV4; + else + fltr->flags |= ICE_TC_FLWR_FIELD_SRC_IPV4; headers->l3_key.src_ipv4 = match->key->src; headers->l3_mask.src_ipv4 = match->mask->src; } @@ -462,11 +625,12 @@ ice_tc_set_ipv4(struct flow_match_ipv4_addrs *match, * @match: Pointer to flow match structure * @fltr: Pointer to filter structure * @headers: inner or outer header fields + * @is_encap: set true for tunnel IPv6 address */ static int ice_tc_set_ipv6(struct flow_match_ipv6_addrs *match, struct ice_tc_flower_fltr *fltr, - struct ice_tc_flower_lyr_2_4_hdrs *headers) + struct ice_tc_flower_lyr_2_4_hdrs *headers, bool is_encap) { struct ice_tc_l3_hdr *l3_key, *l3_mask; @@ -484,21 +648,31 @@ ice_tc_set_ipv6(struct flow_match_ipv6_addrs *match, NL_SET_ERR_MSG_MOD(fltr->extack, "Bad src/dest IPv6, addr is any"); return -EINVAL; } - if (!ipv6_addr_any(&match->mask->dst)) - fltr->flags |= ICE_TC_FLWR_FIELD_DEST_IPV6; - if (!ipv6_addr_any(&match->mask->src)) - fltr->flags |= ICE_TC_FLWR_FIELD_SRC_IPV6; + if (!ipv6_addr_any(&match->mask->dst)) { + if (is_encap) + fltr->flags |= ICE_TC_FLWR_FIELD_ENC_DEST_IPV6; + else + fltr->flags |= ICE_TC_FLWR_FIELD_DEST_IPV6; + } + if (!ipv6_addr_any(&match->mask->src)) { + if (is_encap) + fltr->flags |= ICE_TC_FLWR_FIELD_ENC_SRC_IPV6; + else + fltr->flags |= ICE_TC_FLWR_FIELD_SRC_IPV6; + } l3_key = &headers->l3_key; l3_mask = &headers->l3_mask; - if (fltr->flags & ICE_TC_FLWR_FIELD_SRC_IPV6) { + if (fltr->flags & (ICE_TC_FLWR_FIELD_ENC_SRC_IPV6 | + ICE_TC_FLWR_FIELD_SRC_IPV6)) { memcpy(&l3_key->src_ipv6_addr, &match->key->src.s6_addr, sizeof(match->key->src.s6_addr)); memcpy(&l3_mask->src_ipv6_addr, &match->mask->src.s6_addr, sizeof(match->mask->src.s6_addr)); } - if (fltr->flags & ICE_TC_FLWR_FIELD_DEST_IPV6) { + if (fltr->flags & (ICE_TC_FLWR_FIELD_ENC_DEST_IPV6 | + ICE_TC_FLWR_FIELD_DEST_IPV6)) { memcpy(&l3_key->dst_ipv6_addr, &match->key->dst.s6_addr, sizeof(match->key->dst.s6_addr)); memcpy(&l3_mask->dst_ipv6_addr, &match->mask->dst.s6_addr, @@ -513,18 +687,27 @@ ice_tc_set_ipv6(struct flow_match_ipv6_addrs *match, * @match: Flow match structure * @fltr: Pointer to filter structure * @headers: inner or outer header fields + * @is_encap: set true for tunnel port */ static int ice_tc_set_port(struct flow_match_ports match, struct ice_tc_flower_fltr *fltr, - struct ice_tc_flower_lyr_2_4_hdrs *headers) + struct ice_tc_flower_lyr_2_4_hdrs *headers, bool is_encap) { if (match.key->dst) { + if (is_encap) + fltr->flags |= ICE_TC_FLWR_FIELD_ENC_DEST_L4_PORT; + else + fltr->flags |= ICE_TC_FLWR_FIELD_DEST_L4_PORT; fltr->flags |= ICE_TC_FLWR_FIELD_DEST_L4_PORT; headers->l4_key.dst_port = match.key->dst; headers->l4_mask.dst_port = match.mask->dst; } if (match.key->src) { + if (is_encap) + fltr->flags |= ICE_TC_FLWR_FIELD_ENC_SRC_L4_PORT; + else + fltr->flags |= ICE_TC_FLWR_FIELD_SRC_L4_PORT; fltr->flags |= ICE_TC_FLWR_FIELD_SRC_L4_PORT; headers->l4_key.src_port = match.key->src; headers->l4_mask.src_port = match.mask->src; @@ -532,6 +715,85 @@ ice_tc_set_port(struct flow_match_ports match, return 0; } +static struct net_device * +ice_get_tunnel_device(struct net_device *dev, struct flow_rule *rule) +{ + struct flow_action_entry *act; + int i; + + if (ice_is_tunnel_supported(dev)) + return dev; + + flow_action_for_each(i, act, &rule->action) { + if (act->id == FLOW_ACTION_REDIRECT && + ice_is_tunnel_supported(act->dev)) + return act->dev; + } + + return NULL; +} + +static int +ice_parse_tunnel_attr(struct net_device *dev, struct flow_rule *rule, + struct ice_tc_flower_fltr *fltr) +{ + struct ice_tc_flower_lyr_2_4_hdrs *headers = &fltr->outer_headers; + struct flow_match_control enc_control; + + fltr->tunnel_type = ice_tc_tun_get_type(dev); + headers->l3_key.ip_proto = IPPROTO_UDP; + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) { + struct flow_match_enc_keyid enc_keyid; + + flow_rule_match_enc_keyid(rule, &enc_keyid); + + if (!enc_keyid.mask->keyid || + enc_keyid.mask->keyid != cpu_to_be32(ICE_TC_FLOWER_MASK_32)) + return -EINVAL; + + fltr->flags |= ICE_TC_FLWR_FIELD_TENANT_ID; + fltr->tenant_id = enc_keyid.key->keyid; + } + + flow_rule_match_enc_control(rule, &enc_control); + + if (enc_control.key->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { + struct flow_match_ipv4_addrs match; + + flow_rule_match_enc_ipv4_addrs(rule, &match); + if (ice_tc_set_ipv4(&match, fltr, headers, true)) + return -EINVAL; + } else if (enc_control.key->addr_type == + FLOW_DISSECTOR_KEY_IPV6_ADDRS) { + struct flow_match_ipv6_addrs match; + + flow_rule_match_enc_ipv6_addrs(rule, &match); + if (ice_tc_set_ipv6(&match, fltr, headers, true)) + return -EINVAL; + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) { + struct flow_match_ip match; + + flow_rule_match_enc_ip(rule, &match); + headers->l3_key.tos = match.key->tos; + headers->l3_key.ttl = match.key->ttl; + headers->l3_mask.tos = match.mask->tos; + headers->l3_mask.ttl = match.mask->ttl; + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS)) { + struct flow_match_ports match; + + flow_rule_match_enc_ports(rule, &match); + if (ice_tc_set_port(match, fltr, headers, true)) + return -EINVAL; + } + + return 0; +} + /** * ice_parse_cls_flower - Parse TC flower filters provided by kernel * @vsi: Pointer to the VSI @@ -559,12 +821,42 @@ ice_parse_cls_flower(struct net_device *filter_dev, struct ice_vsi *vsi, BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) | BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) | BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | + BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) | + BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_ENC_PORTS) | BIT(FLOW_DISSECTOR_KEY_ENC_IP) | BIT(FLOW_DISSECTOR_KEY_PORTS))) { NL_SET_ERR_MSG_MOD(fltr->extack, "Unsupported key used"); return -EOPNOTSUPP; } + if ((flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) || + flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) || + flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID) || + flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS))) { + int err; + + filter_dev = ice_get_tunnel_device(filter_dev, rule); + if (!filter_dev) { + NL_SET_ERR_MSG_MOD(fltr->extack, "Tunnel device not found"); + return -EOPNOTSUPP; + } + + err = ice_parse_tunnel_attr(filter_dev, rule, fltr); + if (err) { + NL_SET_ERR_MSG_MOD(fltr->extack, "Failed to parse TC flower tunnel attributes"); + return err; + } + + /* header pointers should point to the inner headers, outer + * header were already set by ice_parse_tunnel_attr + */ + headers = &fltr->inner_headers; + } else { + fltr->tunnel_type = TNL_LAST; + } + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { struct flow_match_basic match; @@ -651,7 +943,7 @@ ice_parse_cls_flower(struct net_device *filter_dev, struct ice_vsi *vsi, struct flow_match_ipv4_addrs match; flow_rule_match_ipv4_addrs(rule, &match); - if (ice_tc_set_ipv4(&match, fltr, headers)) + if (ice_tc_set_ipv4(&match, fltr, headers, false)) return -EINVAL; } @@ -659,7 +951,7 @@ ice_parse_cls_flower(struct net_device *filter_dev, struct ice_vsi *vsi, struct flow_match_ipv6_addrs match; flow_rule_match_ipv6_addrs(rule, &match); - if (ice_tc_set_ipv6(&match, fltr, headers)) + if (ice_tc_set_ipv6(&match, fltr, headers, false)) return -EINVAL; } @@ -667,7 +959,7 @@ ice_parse_cls_flower(struct net_device *filter_dev, struct ice_vsi *vsi, struct flow_match_ports match; flow_rule_match_ports(rule, &match); - if (ice_tc_set_port(match, fltr, headers)) + if (ice_tc_set_port(match, fltr, headers, false)) return -EINVAL; switch (headers->l3_key.ip_proto) { case IPPROTO_TCP: diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.h b/drivers/net/ethernet/intel/ice/ice_tc_lib.h index 99ececeef445..319049477959 100644 --- a/drivers/net/ethernet/intel/ice/ice_tc_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.h @@ -23,6 +23,8 @@ #define ICE_TC_FLWR_FIELD_ENC_DST_MAC BIT(16) #define ICE_TC_FLWR_FIELD_ETH_TYPE_ID BIT(17) +#define ICE_TC_FLOWER_MASK_32 0xFFFFFFFF + struct ice_indr_block_priv { struct net_device *netdev; struct ice_netdev_priv *np; @@ -118,6 +120,7 @@ struct ice_tc_flower_fltr { struct ice_vsi *src_vsi; __be32 tenant_id; u32 flags; + u8 tunnel_type; struct ice_tc_flower_action action; /* cache ptr which is used wherever needed to communicate netlink @@ -154,5 +157,6 @@ ice_add_cls_flower(struct net_device *netdev, struct ice_vsi *vsi, int ice_del_cls_flower(struct ice_vsi *vsi, struct flow_cls_offload *cls_flower); void ice_replay_tc_fltrs(struct ice_pf *pf); +bool ice_is_tunnel_supported(struct net_device *dev); #endif /* _ICE_TC_LIB_H_ */ From 8b032a55c1bd5d47527263445aba9dc45144b00d Mon Sep 17 00:00:00 2001 From: Michal Swiatkowski Date: Tue, 12 Oct 2021 11:31:05 -0700 Subject: [PATCH 03/38] ice: low level support for tunnels Add definition of UDP tunnel dummy packets. Fill destination port value in filter based on UDP tunnel port. Append tunnel flags to switch filter definition in case of matching the tunnel. Both VXLAN and Geneve are UDP tunnels, so only one new header is needed. Signed-off-by: Michal Swiatkowski Tested-by: Sandeep Penigalapati Signed-off-by: Tony Nguyen --- .../net/ethernet/intel/ice/ice_flex_pipe.c | 26 +- .../net/ethernet/intel/ice/ice_flex_type.h | 2 + .../ethernet/intel/ice/ice_protocol_type.h | 9 + drivers/net/ethernet/intel/ice/ice_switch.c | 275 +++++++++++++++++- drivers/net/ethernet/intel/ice/ice_switch.h | 2 + 5 files changed, 298 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c index e731b46270c3..8736d3ae230f 100644 --- a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c +++ b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c @@ -1565,6 +1565,26 @@ static struct ice_buf_build *ice_pkg_buf_alloc(struct ice_hw *hw) return bld; } +/** + * ice_get_sw_prof_type - determine switch profile type + * @hw: pointer to the HW structure + * @fv: pointer to the switch field vector + */ +static enum ice_prof_type +ice_get_sw_prof_type(struct ice_hw *hw, struct ice_fv *fv) +{ + u16 i; + + for (i = 0; i < hw->blk[ICE_BLK_SW].es.fvw; i++) { + /* UDP tunnel will have UDP_OF protocol ID and VNI offset */ + if (fv->ew[i].prot_id == (u8)ICE_PROT_UDP_OF && + fv->ew[i].off == ICE_VNI_OFFSET) + return ICE_PROF_TUN_UDP; + } + + return ICE_PROF_NON_TUN; +} + /** * ice_get_sw_fv_bitmap - Get switch field vector bitmap based on profile type * @hw: pointer to hardware structure @@ -1588,6 +1608,7 @@ ice_get_sw_fv_bitmap(struct ice_hw *hw, enum ice_prof_type req_profs, bitmap_zero(bm, ICE_MAX_NUM_PROFILES); ice_seg = hw->seg; do { + enum ice_prof_type prof_type; u32 offset; fv = ice_pkg_enum_entry(ice_seg, &state, ICE_SID_FLD_VEC_SW, @@ -1595,7 +1616,10 @@ ice_get_sw_fv_bitmap(struct ice_hw *hw, enum ice_prof_type req_profs, ice_seg = NULL; if (fv) { - if (req_profs & ICE_PROF_NON_TUN) + /* Determine field vector type */ + prof_type = ice_get_sw_prof_type(hw, fv); + + if (req_profs & prof_type) set_bit((u16)offset, bm); } } while (fv); diff --git a/drivers/net/ethernet/intel/ice/ice_flex_type.h b/drivers/net/ethernet/intel/ice/ice_flex_type.h index 120bcebaa080..07d3795d2b10 100644 --- a/drivers/net/ethernet/intel/ice/ice_flex_type.h +++ b/drivers/net/ethernet/intel/ice/ice_flex_type.h @@ -614,6 +614,8 @@ struct ice_chs_chg { enum ice_prof_type { ICE_PROF_NON_TUN = 0x1, + ICE_PROF_TUN_UDP = 0x2, + ICE_PROF_TUN_ALL = 0x6, ICE_PROF_ALL = 0xFF, }; #endif /* _ICE_FLEX_TYPE_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_protocol_type.h b/drivers/net/ethernet/intel/ice/ice_protocol_type.h index d717d1158545..8309ecaa771c 100644 --- a/drivers/net/ethernet/intel/ice/ice_protocol_type.h +++ b/drivers/net/ethernet/intel/ice/ice_protocol_type.h @@ -48,6 +48,7 @@ enum ice_sw_tunnel_type { ICE_NON_TUN = 0, ICE_SW_TUN_VXLAN, ICE_SW_TUN_GENEVE, + ICE_ALL_TUNNELS /* All tunnel types */ }; /* Decoders for ice_prot_id: @@ -83,6 +84,8 @@ enum ice_prot_id { ICE_PROT_INVALID = 255 /* when offset == ICE_FV_OFFSET_INVAL */ }; +#define ICE_VNI_OFFSET 12 /* offset of VNI from ICE_PROT_UDP_OF */ + #define ICE_MAC_OFOS_HW 1 #define ICE_MAC_IL_HW 4 #define ICE_ETYPE_OL_HW 9 @@ -96,6 +99,12 @@ enum ice_prot_id { #define ICE_UDP_ILOS_HW 53 #define ICE_UDP_OF_HW 52 /* UDP Tunnels */ +#define ICE_META_DATA_ID_HW 255 /* this is used for tunnel type */ + +#define ICE_MDID_SIZE 2 +#define ICE_TUN_FLAG_MDID 21 +#define ICE_TUN_FLAG_MDID_OFF (ICE_MDID_SIZE * ICE_TUN_FLAG_MDID) +#define ICE_TUN_FLAG_MASK 0xFF #define ICE_TUN_FLAG_FV_IND 2 diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c index 2742e1c1e337..a2dfe8e3d3fa 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.c +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@ -35,6 +35,105 @@ struct ice_dummy_pkt_offsets { u16 offset; /* ICE_PROTOCOL_LAST indicates end of list */ }; +static const struct ice_dummy_pkt_offsets dummy_udp_tun_tcp_packet_offsets[] = { + { ICE_MAC_OFOS, 0 }, + { ICE_ETYPE_OL, 12 }, + { ICE_IPV4_OFOS, 14 }, + { ICE_UDP_OF, 34 }, + { ICE_VXLAN, 42 }, + { ICE_GENEVE, 42 }, + { ICE_VXLAN_GPE, 42 }, + { ICE_MAC_IL, 50 }, + { ICE_IPV4_IL, 64 }, + { ICE_TCP_IL, 84 }, + { ICE_PROTOCOL_LAST, 0 }, +}; + +static const u8 dummy_udp_tun_tcp_packet[] = { + 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x08, 0x00, /* ICE_ETYPE_OL 12 */ + + 0x45, 0x00, 0x00, 0x5a, /* ICE_IPV4_OFOS 14 */ + 0x00, 0x01, 0x00, 0x00, + 0x40, 0x11, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x00, 0x00, 0x12, 0xb5, /* ICE_UDP_OF 34 */ + 0x00, 0x46, 0x00, 0x00, + + 0x00, 0x00, 0x65, 0x58, /* ICE_VXLAN 42 */ + 0x00, 0x00, 0x00, 0x00, + + 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_IL 50 */ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x08, 0x00, + + 0x45, 0x00, 0x00, 0x28, /* ICE_IPV4_IL 64 */ + 0x00, 0x01, 0x00, 0x00, + 0x40, 0x06, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x00, 0x00, 0x00, 0x00, /* ICE_TCP_IL 84 */ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x50, 0x02, 0x20, 0x00, + 0x00, 0x00, 0x00, 0x00 +}; + +static const struct ice_dummy_pkt_offsets dummy_udp_tun_udp_packet_offsets[] = { + { ICE_MAC_OFOS, 0 }, + { ICE_ETYPE_OL, 12 }, + { ICE_IPV4_OFOS, 14 }, + { ICE_UDP_OF, 34 }, + { ICE_VXLAN, 42 }, + { ICE_GENEVE, 42 }, + { ICE_VXLAN_GPE, 42 }, + { ICE_MAC_IL, 50 }, + { ICE_IPV4_IL, 64 }, + { ICE_UDP_ILOS, 84 }, + { ICE_PROTOCOL_LAST, 0 }, +}; + +static const u8 dummy_udp_tun_udp_packet[] = { + 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x08, 0x00, /* ICE_ETYPE_OL 12 */ + + 0x45, 0x00, 0x00, 0x4e, /* ICE_IPV4_OFOS 14 */ + 0x00, 0x01, 0x00, 0x00, + 0x00, 0x11, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x00, 0x00, 0x12, 0xb5, /* ICE_UDP_OF 34 */ + 0x00, 0x3a, 0x00, 0x00, + + 0x00, 0x00, 0x65, 0x58, /* ICE_VXLAN 42 */ + 0x00, 0x00, 0x00, 0x00, + + 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_IL 50 */ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x08, 0x00, + + 0x45, 0x00, 0x00, 0x1c, /* ICE_IPV4_IL 64 */ + 0x00, 0x01, 0x00, 0x00, + 0x00, 0x11, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x00, 0x00, 0x00, 0x00, /* ICE_UDP_ILOS 84 */ + 0x00, 0x08, 0x00, 0x00, +}; + /* offset info for MAC + IPv4 + UDP dummy packet */ static const struct ice_dummy_pkt_offsets dummy_udp_packet_offsets[] = { { ICE_MAC_OFOS, 0 }, @@ -3582,6 +3681,8 @@ static const struct ice_prot_ext_tbl_entry ice_prot_ext[ICE_PROTOCOL_LAST] = { { ICE_TCP_IL, { 0, 2 } }, { ICE_UDP_OF, { 0, 2 } }, { ICE_UDP_ILOS, { 0, 2 } }, + { ICE_VXLAN, { 8, 10, 12, 14 } }, + { ICE_GENEVE, { 8, 10, 12, 14 } }, }; static struct ice_protocol_entry ice_prot_id_tbl[ICE_PROTOCOL_LAST] = { @@ -3596,6 +3697,8 @@ static struct ice_protocol_entry ice_prot_id_tbl[ICE_PROTOCOL_LAST] = { { ICE_TCP_IL, ICE_TCP_IL_HW }, { ICE_UDP_OF, ICE_UDP_OF_HW }, { ICE_UDP_ILOS, ICE_UDP_ILOS_HW }, + { ICE_VXLAN, ICE_UDP_OF_HW }, + { ICE_GENEVE, ICE_UDP_OF_HW }, }; /** @@ -3915,12 +4018,11 @@ ice_find_free_recp_res_idx(struct ice_hw *hw, const unsigned long *profiles, * ice_add_sw_recipe - function to call AQ calls to create switch recipe * @hw: pointer to hardware structure * @rm: recipe management list entry - * @match_tun_mask: tunnel mask that needs to be programmed * @profiles: bitmap of profiles that will be associated. */ static enum ice_status ice_add_sw_recipe(struct ice_hw *hw, struct ice_sw_recipe *rm, - u16 match_tun_mask, unsigned long *profiles) + unsigned long *profiles) { DECLARE_BITMAP(result_idx_bm, ICE_MAX_FV_WORDS); struct ice_aqc_recipe_data_elem *tmp; @@ -4128,15 +4230,6 @@ ice_add_sw_recipe(struct ice_hw *hw, struct ice_sw_recipe *rm, } buf[recps].content.act_ctrl_fwd_priority = rm->priority; - /* To differentiate among different UDP tunnels, a meta data ID - * flag is used. - */ - if (match_tun_mask) { - buf[recps].content.lkup_indx[i] = ICE_TUN_FLAG_FV_IND; - buf[recps].content.mask[i] = - cpu_to_le16(match_tun_mask); - } - recps++; rm->root_rid = (u8)rid; } @@ -4199,6 +4292,7 @@ ice_add_sw_recipe(struct ice_hw *hw, struct ice_sw_recipe *rm, recp->chain_idx = entry->chain_idx; recp->priority = buf[buf_idx].content.act_ctrl_fwd_priority; recp->n_grp_count = rm->n_grp_count; + recp->tun_type = rm->tun_type; recp->recp_created = true; } rm->root_buf = buf; @@ -4279,6 +4373,54 @@ free_mem: return status; } +/** + * ice_tun_type_match_word - determine if tun type needs a match mask + * @tun_type: tunnel type + * @mask: mask to be used for the tunnel + */ +static bool ice_tun_type_match_word(enum ice_sw_tunnel_type tun_type, u16 *mask) +{ + switch (tun_type) { + case ICE_SW_TUN_GENEVE: + case ICE_SW_TUN_VXLAN: + *mask = ICE_TUN_FLAG_MASK; + return true; + + default: + *mask = 0; + return false; + } +} + +/** + * ice_add_special_words - Add words that are not protocols, such as metadata + * @rinfo: other information regarding the rule e.g. priority and action info + * @lkup_exts: lookup word structure + */ +static enum ice_status +ice_add_special_words(struct ice_adv_rule_info *rinfo, + struct ice_prot_lkup_ext *lkup_exts) +{ + u16 mask; + + /* If this is a tunneled packet, then add recipe index to match the + * tunnel bit in the packet metadata flags. + */ + if (ice_tun_type_match_word(rinfo->tun_type, &mask)) { + if (lkup_exts->n_val_words < ICE_MAX_CHAIN_WORDS) { + u8 word = lkup_exts->n_val_words++; + + lkup_exts->fv_words[word].prot_id = ICE_META_DATA_ID_HW; + lkup_exts->fv_words[word].off = ICE_TUN_FLAG_MDID_OFF; + lkup_exts->field_mask[word] = mask; + } else { + return ICE_ERR_MAX_LIMIT; + } + } + + return 0; +} + /* ice_get_compat_fv_bitmap - Get compatible field vector bitmap for rule * @hw: pointer to hardware structure * @rinfo: other information regarding the rule e.g. priority and action info @@ -4288,9 +4430,27 @@ static void ice_get_compat_fv_bitmap(struct ice_hw *hw, struct ice_adv_rule_info *rinfo, unsigned long *bm) { + enum ice_prof_type prof_type; + bitmap_zero(bm, ICE_MAX_NUM_PROFILES); - ice_get_sw_fv_bitmap(hw, ICE_PROF_NON_TUN, bm); + switch (rinfo->tun_type) { + case ICE_NON_TUN: + prof_type = ICE_PROF_NON_TUN; + break; + case ICE_ALL_TUNNELS: + prof_type = ICE_PROF_TUN_ALL; + break; + case ICE_SW_TUN_GENEVE: + case ICE_SW_TUN_VXLAN: + prof_type = ICE_PROF_TUN_UDP; + break; + default: + prof_type = ICE_PROF_ALL; + break; + } + + ice_get_sw_fv_bitmap(hw, prof_type, bm); } /** @@ -4315,7 +4475,6 @@ ice_add_adv_recipe(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups, struct ice_sw_fv_list_entry *tmp; enum ice_status status = 0; struct ice_sw_recipe *rm; - u16 match_tun_mask = 0; u8 i; if (!lkups_cnt) @@ -4365,6 +4524,13 @@ ice_add_adv_recipe(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups, if (status) goto err_unroll; + /* Create any special protocol/offset pairs, such as looking at tunnel + * bits by extracting metadata + */ + status = ice_add_special_words(rinfo, lkup_exts); + if (status) + goto err_free_lkup_exts; + /* Group match words into recipes using preferred recipe grouping * criteria. */ @@ -4396,7 +4562,7 @@ ice_add_adv_recipe(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups, goto err_unroll; /* Recipe we need does not exist, add a recipe */ - status = ice_add_sw_recipe(hw, rm, match_tun_mask, profiles); + status = ice_add_sw_recipe(hw, rm, profiles); if (status) goto err_unroll; @@ -4466,12 +4632,14 @@ err_free_lkup_exts: * @lkups: lookup elements or match criteria for the advanced recipe, one * structure per protocol header * @lkups_cnt: number of protocols + * @tun_type: tunnel type * @pkt: dummy packet to fill according to filter match criteria * @pkt_len: packet length of dummy packet * @offsets: pointer to receive the pointer to the offsets for the packet */ static void ice_find_dummy_packet(struct ice_adv_lkup_elem *lkups, u16 lkups_cnt, + enum ice_sw_tunnel_type tun_type, const u8 **pkt, u16 *pkt_len, const struct ice_dummy_pkt_offsets **offsets) { @@ -4495,6 +4663,21 @@ ice_find_dummy_packet(struct ice_adv_lkup_elem *lkups, u16 lkups_cnt, ipv6 = true; } + if (tun_type == ICE_SW_TUN_VXLAN || + tun_type == ICE_SW_TUN_GENEVE) { + if (tcp) { + *pkt = dummy_udp_tun_tcp_packet; + *pkt_len = sizeof(dummy_udp_tun_tcp_packet); + *offsets = dummy_udp_tun_tcp_packet_offsets; + return; + } + + *pkt = dummy_udp_tun_udp_packet; + *pkt_len = sizeof(dummy_udp_tun_udp_packet); + *offsets = dummy_udp_tun_udp_packet_offsets; + return; + } + if (udp && !ipv6) { if (vlan) { *pkt = dummy_vlan_udp_packet; @@ -4615,6 +4798,10 @@ ice_fill_adv_dummy_packet(struct ice_adv_lkup_elem *lkups, u16 lkups_cnt, case ICE_SCTP_IL: len = sizeof(struct ice_sctp_hdr); break; + case ICE_VXLAN: + case ICE_GENEVE: + len = sizeof(struct ice_udp_tnl_hdr); + break; default: return ICE_ERR_PARAM; } @@ -4644,6 +4831,48 @@ ice_fill_adv_dummy_packet(struct ice_adv_lkup_elem *lkups, u16 lkups_cnt, return 0; } +/** + * ice_fill_adv_packet_tun - fill dummy packet with udp tunnel port + * @hw: pointer to the hardware structure + * @tun_type: tunnel type + * @pkt: dummy packet to fill in + * @offsets: offset info for the dummy packet + */ +static enum ice_status +ice_fill_adv_packet_tun(struct ice_hw *hw, enum ice_sw_tunnel_type tun_type, + u8 *pkt, const struct ice_dummy_pkt_offsets *offsets) +{ + u16 open_port, i; + + switch (tun_type) { + case ICE_SW_TUN_VXLAN: + case ICE_SW_TUN_GENEVE: + if (!ice_get_open_tunnel_port(hw, &open_port)) + return ICE_ERR_CFG; + break; + + default: + /* Nothing needs to be done for this tunnel type */ + return 0; + } + + /* Find the outer UDP protocol header and insert the port number */ + for (i = 0; offsets[i].type != ICE_PROTOCOL_LAST; i++) { + if (offsets[i].type == ICE_UDP_OF) { + struct ice_l4_hdr *hdr; + u16 offset; + + offset = offsets[i].offset; + hdr = (struct ice_l4_hdr *)&pkt[offset]; + hdr->dst_port = cpu_to_be16(open_port); + + return 0; + } + } + + return ICE_ERR_CFG; +} + /** * ice_find_adv_rule_entry - Search a rule entry * @hw: pointer to the hardware structure @@ -4678,6 +4907,7 @@ ice_find_adv_rule_entry(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups, break; } if (rinfo->sw_act.flag == list_itr->rule_info.sw_act.flag && + rinfo->tun_type == list_itr->rule_info.tun_type && lkups_matched) return list_itr; } @@ -4852,7 +5082,7 @@ ice_add_adv_rule(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups, return ICE_ERR_PARAM; /* make sure that we can locate a dummy packet */ - ice_find_dummy_packet(lkups, lkups_cnt, &pkt, &pkt_len, + ice_find_dummy_packet(lkups, lkups_cnt, rinfo->tun_type, &pkt, &pkt_len, &pkt_offsets); if (!pkt) { status = ICE_ERR_PARAM; @@ -4963,6 +5193,14 @@ ice_add_adv_rule(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups, if (status) goto err_ice_add_adv_rule; + if (rinfo->tun_type != ICE_NON_TUN) { + status = ice_fill_adv_packet_tun(hw, rinfo->tun_type, + s_rule->pdata.lkup_tx_rx.hdr, + pkt_offsets); + if (status) + goto err_ice_add_adv_rule; + } + status = ice_aq_sw_rules(hw, (struct ice_aqc_sw_rules *)s_rule, rule_buf_sz, 1, ice_aqc_opc_add_sw_rules, NULL); @@ -5198,6 +5436,13 @@ ice_rem_adv_rule(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups, return ICE_ERR_CFG; } + /* Create any special protocol/offset pairs, such as looking at tunnel + * bits by extracting metadata + */ + status = ice_add_special_words(rinfo, &lkup_exts); + if (status) + return status; + rid = ice_find_recp(hw, &lkup_exts); /* If did not find a recipe that match the existing criteria */ if (rid == ICE_MAX_NUM_RECIPES) diff --git a/drivers/net/ethernet/intel/ice/ice_switch.h b/drivers/net/ethernet/intel/ice/ice_switch.h index 7d661c9be81b..d8a38906f16f 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.h +++ b/drivers/net/ethernet/intel/ice/ice_switch.h @@ -212,6 +212,8 @@ struct ice_sw_recipe { /* Bit map specifying the IDs associated with this group of recipe */ DECLARE_BITMAP(r_bitmap, ICE_MAX_NUM_RECIPES); + enum ice_sw_tunnel_type tun_type; + /* List of type ice_fltr_mgmt_list_entry or adv_rule */ u8 adv_rule; struct list_head filt_rules; From f0a35040adbe72f6b2e9ddc9fefdbcdbe0b92c55 Mon Sep 17 00:00:00 2001 From: Michal Swiatkowski Date: Tue, 12 Oct 2021 11:31:06 -0700 Subject: [PATCH 04/38] ice: support for GRE in eswitch Mostly reuse code from Geneve and VXLAN in TC parsing code. Add new GRE header to match on correct fields. Create new dummy packets with GRE fields. Instead of checking if any encap values are presented in TC flower, check if device is tunnel type or redirect is to tunnel device. This will allow adding all combination of rules. For example filters only with inner fields. Return error in case device isn't tunnel but encap values are presented. gre example: - create tunnel device ip l add $NVGRE_DEV type gretap remote $NVGRE_REM_IP local $VF1_IP \ dev $PF - add tc filter (in switchdev mode) tc filter add dev $NVGRE_DEV protocol ip parent ffff: flower dst_ip \ $NVGRE1_IP action mirred egress redirect dev $VF1_PR Signed-off-by: Michal Swiatkowski Tested-by: Sandeep Penigalapati Signed-off-by: Tony Nguyen --- .../net/ethernet/intel/ice/ice_flex_pipe.c | 4 + .../net/ethernet/intel/ice/ice_flex_type.h | 2 + .../ethernet/intel/ice/ice_protocol_type.h | 12 +- drivers/net/ethernet/intel/ice/ice_switch.c | 110 ++++++++++++++++++ drivers/net/ethernet/intel/ice/ice_tc_lib.c | 47 +++++--- 5 files changed, 161 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c index 8736d3ae230f..23cfcceb1536 100644 --- a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c +++ b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c @@ -1580,6 +1580,10 @@ ice_get_sw_prof_type(struct ice_hw *hw, struct ice_fv *fv) if (fv->ew[i].prot_id == (u8)ICE_PROT_UDP_OF && fv->ew[i].off == ICE_VNI_OFFSET) return ICE_PROF_TUN_UDP; + + /* GRE tunnel will have GRE protocol */ + if (fv->ew[i].prot_id == (u8)ICE_PROT_GRE_OF) + return ICE_PROF_TUN_GRE; } return ICE_PROF_NON_TUN; diff --git a/drivers/net/ethernet/intel/ice/ice_flex_type.h b/drivers/net/ethernet/intel/ice/ice_flex_type.h index 07d3795d2b10..0f572a36d021 100644 --- a/drivers/net/ethernet/intel/ice/ice_flex_type.h +++ b/drivers/net/ethernet/intel/ice/ice_flex_type.h @@ -373,6 +373,7 @@ struct ice_pkg_enum { enum ice_tunnel_type { TNL_VXLAN = 0, TNL_GENEVE, + TNL_GRETAP, __TNL_TYPE_CNT, TNL_LAST = 0xFF, TNL_ALL = 0xFF, @@ -615,6 +616,7 @@ struct ice_chs_chg { enum ice_prof_type { ICE_PROF_NON_TUN = 0x1, ICE_PROF_TUN_UDP = 0x2, + ICE_PROF_TUN_GRE = 0x4, ICE_PROF_TUN_ALL = 0x6, ICE_PROF_ALL = 0xFF, }; diff --git a/drivers/net/ethernet/intel/ice/ice_protocol_type.h b/drivers/net/ethernet/intel/ice/ice_protocol_type.h index 8309ecaa771c..dc1b0e9e6df5 100644 --- a/drivers/net/ethernet/intel/ice/ice_protocol_type.h +++ b/drivers/net/ethernet/intel/ice/ice_protocol_type.h @@ -39,6 +39,7 @@ enum ice_protocol_type { ICE_UDP_ILOS, ICE_VXLAN, ICE_GENEVE, + ICE_NVGRE, ICE_VXLAN_GPE, ICE_SCTP_IL, ICE_PROTOCOL_LAST @@ -48,7 +49,8 @@ enum ice_sw_tunnel_type { ICE_NON_TUN = 0, ICE_SW_TUN_VXLAN, ICE_SW_TUN_GENEVE, - ICE_ALL_TUNNELS /* All tunnel types */ + ICE_SW_TUN_NVGRE, + ICE_ALL_TUNNELS /* All tunnel types including NVGRE */ }; /* Decoders for ice_prot_id: @@ -97,6 +99,7 @@ enum ice_prot_id { #define ICE_IPV6_IL_HW 41 #define ICE_TCP_IL_HW 49 #define ICE_UDP_ILOS_HW 53 +#define ICE_GRE_OF_HW 64 #define ICE_UDP_OF_HW 52 /* UDP Tunnels */ #define ICE_META_DATA_ID_HW 255 /* this is used for tunnel type */ @@ -176,6 +179,12 @@ struct ice_udp_tnl_hdr { __be32 vni; /* only use lower 24-bits */ }; +struct ice_nvgre_hdr { + __be16 flags; + __be16 protocol; + __be32 tni_flow; +}; + union ice_prot_hdr { struct ice_ether_hdr eth_hdr; struct ice_ethtype_hdr ethertype; @@ -185,6 +194,7 @@ union ice_prot_hdr { struct ice_l4_hdr l4_hdr; struct ice_sctp_hdr sctp_hdr; struct ice_udp_tnl_hdr tnl_hdr; + struct ice_nvgre_hdr nvgre_hdr; }; /* This is mapping table entry that maps every word within a given protocol diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c index a2dfe8e3d3fa..2af03b9845eb 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.c +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@ -35,6 +35,93 @@ struct ice_dummy_pkt_offsets { u16 offset; /* ICE_PROTOCOL_LAST indicates end of list */ }; +static const struct ice_dummy_pkt_offsets dummy_gre_tcp_packet_offsets[] = { + { ICE_MAC_OFOS, 0 }, + { ICE_ETYPE_OL, 12 }, + { ICE_IPV4_OFOS, 14 }, + { ICE_NVGRE, 34 }, + { ICE_MAC_IL, 42 }, + { ICE_IPV4_IL, 56 }, + { ICE_TCP_IL, 76 }, + { ICE_PROTOCOL_LAST, 0 }, +}; + +static const u8 dummy_gre_tcp_packet[] = { + 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x08, 0x00, /* ICE_ETYPE_OL 12 */ + + 0x45, 0x00, 0x00, 0x3E, /* ICE_IPV4_OFOS 14 */ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x2F, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x80, 0x00, 0x65, 0x58, /* ICE_NVGRE 34 */ + 0x00, 0x00, 0x00, 0x00, + + 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_IL 42 */ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x08, 0x00, + + 0x45, 0x00, 0x00, 0x14, /* ICE_IPV4_IL 56 */ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x06, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x00, 0x00, 0x00, 0x00, /* ICE_TCP_IL 76 */ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x50, 0x02, 0x20, 0x00, + 0x00, 0x00, 0x00, 0x00 +}; + +static const struct ice_dummy_pkt_offsets dummy_gre_udp_packet_offsets[] = { + { ICE_MAC_OFOS, 0 }, + { ICE_ETYPE_OL, 12 }, + { ICE_IPV4_OFOS, 14 }, + { ICE_NVGRE, 34 }, + { ICE_MAC_IL, 42 }, + { ICE_IPV4_IL, 56 }, + { ICE_UDP_ILOS, 76 }, + { ICE_PROTOCOL_LAST, 0 }, +}; + +static const u8 dummy_gre_udp_packet[] = { + 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x08, 0x00, /* ICE_ETYPE_OL 12 */ + + 0x45, 0x00, 0x00, 0x3E, /* ICE_IPV4_OFOS 14 */ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x2F, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x80, 0x00, 0x65, 0x58, /* ICE_NVGRE 34 */ + 0x00, 0x00, 0x00, 0x00, + + 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_IL 42 */ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x08, 0x00, + + 0x45, 0x00, 0x00, 0x14, /* ICE_IPV4_IL 56 */ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x11, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x00, 0x00, 0x00, 0x00, /* ICE_UDP_ILOS 76 */ + 0x00, 0x08, 0x00, 0x00, +}; + static const struct ice_dummy_pkt_offsets dummy_udp_tun_tcp_packet_offsets[] = { { ICE_MAC_OFOS, 0 }, { ICE_ETYPE_OL, 12 }, @@ -3683,6 +3770,7 @@ static const struct ice_prot_ext_tbl_entry ice_prot_ext[ICE_PROTOCOL_LAST] = { { ICE_UDP_ILOS, { 0, 2 } }, { ICE_VXLAN, { 8, 10, 12, 14 } }, { ICE_GENEVE, { 8, 10, 12, 14 } }, + { ICE_NVGRE, { 0, 2, 4, 6 } }, }; static struct ice_protocol_entry ice_prot_id_tbl[ICE_PROTOCOL_LAST] = { @@ -3699,6 +3787,7 @@ static struct ice_protocol_entry ice_prot_id_tbl[ICE_PROTOCOL_LAST] = { { ICE_UDP_ILOS, ICE_UDP_ILOS_HW }, { ICE_VXLAN, ICE_UDP_OF_HW }, { ICE_GENEVE, ICE_UDP_OF_HW }, + { ICE_NVGRE, ICE_GRE_OF_HW }, }; /** @@ -4383,6 +4472,7 @@ static bool ice_tun_type_match_word(enum ice_sw_tunnel_type tun_type, u16 *mask) switch (tun_type) { case ICE_SW_TUN_GENEVE: case ICE_SW_TUN_VXLAN: + case ICE_SW_TUN_NVGRE: *mask = ICE_TUN_FLAG_MASK; return true; @@ -4445,6 +4535,9 @@ ice_get_compat_fv_bitmap(struct ice_hw *hw, struct ice_adv_rule_info *rinfo, case ICE_SW_TUN_VXLAN: prof_type = ICE_PROF_TUN_UDP; break; + case ICE_SW_TUN_NVGRE: + prof_type = ICE_PROF_TUN_GRE; + break; default: prof_type = ICE_PROF_ALL; break; @@ -4663,6 +4756,20 @@ ice_find_dummy_packet(struct ice_adv_lkup_elem *lkups, u16 lkups_cnt, ipv6 = true; } + if (tun_type == ICE_SW_TUN_NVGRE) { + if (tcp) { + *pkt = dummy_gre_tcp_packet; + *pkt_len = sizeof(dummy_gre_tcp_packet); + *offsets = dummy_gre_tcp_packet_offsets; + return; + } + + *pkt = dummy_gre_udp_packet; + *pkt_len = sizeof(dummy_gre_udp_packet); + *offsets = dummy_gre_udp_packet_offsets; + return; + } + if (tun_type == ICE_SW_TUN_VXLAN || tun_type == ICE_SW_TUN_GENEVE) { if (tcp) { @@ -4798,6 +4905,9 @@ ice_fill_adv_dummy_packet(struct ice_adv_lkup_elem *lkups, u16 lkups_cnt, case ICE_SCTP_IL: len = sizeof(struct ice_sctp_hdr); break; + case ICE_NVGRE: + len = sizeof(struct ice_nvgre_hdr); + break; case ICE_VXLAN: case ICE_GENEVE: len = sizeof(struct ice_udp_tnl_hdr); diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.c b/drivers/net/ethernet/intel/ice/ice_tc_lib.c index 920d9024a6c1..e5d23feb6701 100644 --- a/drivers/net/ethernet/intel/ice/ice_tc_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.c @@ -102,6 +102,8 @@ ice_proto_type_from_tunnel(enum ice_tunnel_type type) return ICE_VXLAN; case TNL_GENEVE: return ICE_GENEVE; + case TNL_GRETAP: + return ICE_NVGRE; default: return 0; } @@ -115,6 +117,8 @@ ice_sw_type_from_tunnel(enum ice_tunnel_type type) return ICE_SW_TUN_VXLAN; case TNL_GENEVE: return ICE_SW_TUN_GENEVE; + case TNL_GRETAP: + return ICE_SW_TUN_NVGRE; default: return ICE_NON_TUN; } @@ -131,10 +135,22 @@ ice_tc_fill_tunnel_outer(u32 flags, struct ice_tc_flower_fltr *fltr, u32 tenant_id; list[i].type = ice_proto_type_from_tunnel(fltr->tunnel_type); - tenant_id = be32_to_cpu(fltr->tenant_id) << 8; - list[i].h_u.tnl_hdr.vni = cpu_to_be32(tenant_id); - memcpy(&list[i].m_u.tnl_hdr.vni, "\xff\xff\xff\x00", 4); - i++; + switch (fltr->tunnel_type) { + case TNL_VXLAN: + case TNL_GENEVE: + tenant_id = be32_to_cpu(fltr->tenant_id) << 8; + list[i].h_u.tnl_hdr.vni = cpu_to_be32(tenant_id); + memcpy(&list[i].m_u.tnl_hdr.vni, "\xff\xff\xff\x00", 4); + i++; + break; + case TNL_GRETAP: + list[i].h_u.nvgre_hdr.tni_flow = fltr->tenant_id; + memcpy(&list[i].m_u.nvgre_hdr.tni_flow, "\xff\xff\xff\xff", 4); + i++; + break; + default: + break; + } } if (flags & (ICE_TC_FLWR_FIELD_ENC_SRC_IPV4 | @@ -332,6 +348,9 @@ static int ice_tc_tun_get_type(struct net_device *tunnel_dev) return TNL_VXLAN; if (netif_is_geneve(tunnel_dev)) return TNL_GENEVE; + if (netif_is_gretap(tunnel_dev) || + netif_is_ip6gretap(tunnel_dev)) + return TNL_GRETAP; return TNL_LAST; } @@ -810,6 +829,7 @@ ice_parse_cls_flower(struct net_device *filter_dev, struct ice_vsi *vsi, struct flow_rule *rule = flow_cls_offload_flow_rule(f); u16 n_proto_mask = 0, n_proto_key = 0, addr_type = 0; struct flow_dissector *dissector; + struct net_device *tunnel_dev; dissector = rule->match.dissector; @@ -831,17 +851,11 @@ ice_parse_cls_flower(struct net_device *filter_dev, struct ice_vsi *vsi, return -EOPNOTSUPP; } - if ((flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) || - flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) || - flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID) || - flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS))) { + tunnel_dev = ice_get_tunnel_device(filter_dev, rule); + if (tunnel_dev) { int err; - filter_dev = ice_get_tunnel_device(filter_dev, rule); - if (!filter_dev) { - NL_SET_ERR_MSG_MOD(fltr->extack, "Tunnel device not found"); - return -EOPNOTSUPP; - } + filter_dev = tunnel_dev; err = ice_parse_tunnel_attr(filter_dev, rule, fltr); if (err) { @@ -853,6 +867,13 @@ ice_parse_cls_flower(struct net_device *filter_dev, struct ice_vsi *vsi, * header were already set by ice_parse_tunnel_attr */ headers = &fltr->inner_headers; + } else if (dissector->used_keys & + (BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) | + BIT(FLOW_DISSECTOR_KEY_ENC_PORTS))) { + NL_SET_ERR_MSG_MOD(fltr->extack, "Tunnel key used, but device isn't a tunnel"); + return -EOPNOTSUPP; } else { fltr->tunnel_type = TNL_LAST; } From e492c2e12d7bb2cf3f10abd8038431e7de565058 Mon Sep 17 00:00:00 2001 From: Michal Swiatkowski Date: Wed, 13 Oct 2021 08:27:07 +0200 Subject: [PATCH 05/38] ice: send correct vc status in switchdev Part of virtchannel messages are treated in different way in switchdev mode to block configuring VFs from iavf driver side. This blocking was done by doing nothing and returning success, event without sending response. Not sending response for opcodes that aren't supported in switchdev mode leads to block iavf driver message handling. This happens for example when vlan is configured at VF config time (VLAN module is already loaded). To get rid of it ice driver should answer for each VF message. In switchdev mode: - for adding/deleting VLAN driver should answer success without doing anything to allow creating vlan device on VFs - for enabling/disabling VLAN stripping and promiscuous mode driver should answer not supported, this feature in switchdev can be only set from host side Signed-off-by: Michal Swiatkowski Tested-by: Sandeep Penigalapati Signed-off-by: Tony Nguyen --- .../net/ethernet/intel/ice/ice_virtchnl_pf.c | 63 ++++++++++++++----- 1 file changed, 49 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c index a42eaf6f942e..6a74344a3c21 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c @@ -4499,13 +4499,6 @@ void ice_vc_set_dflt_vf_ops(struct ice_vc_vf_ops *ops) *ops = ice_vc_vf_dflt_ops; } -static int -ice_vc_repr_no_action_msg(struct ice_vf __always_unused *vf, - u8 __always_unused *msg) -{ - return 0; -} - /** * ice_vc_repr_add_mac * @vf: pointer to VF @@ -4581,20 +4574,62 @@ ice_vc_repr_del_mac(struct ice_vf __always_unused *vf, u8 __always_unused *msg) VIRTCHNL_STATUS_SUCCESS, NULL, 0); } -static int ice_vc_repr_no_action(struct ice_vf __always_unused *vf) +static int ice_vc_repr_add_vlan(struct ice_vf *vf, u8 __always_unused *msg) { - return 0; + dev_dbg(ice_pf_to_dev(vf->pf), + "Can't add VLAN in switchdev mode for VF %d\n", vf->vf_id); + return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_ADD_VLAN, + VIRTCHNL_STATUS_SUCCESS, NULL, 0); +} + +static int ice_vc_repr_del_vlan(struct ice_vf *vf, u8 __always_unused *msg) +{ + dev_dbg(ice_pf_to_dev(vf->pf), + "Can't delete VLAN in switchdev mode for VF %d\n", vf->vf_id); + return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_DEL_VLAN, + VIRTCHNL_STATUS_SUCCESS, NULL, 0); +} + +static int ice_vc_repr_ena_vlan_stripping(struct ice_vf *vf) +{ + dev_dbg(ice_pf_to_dev(vf->pf), + "Can't enable VLAN stripping in switchdev mode for VF %d\n", + vf->vf_id); + return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_ENABLE_VLAN_STRIPPING, + VIRTCHNL_STATUS_ERR_NOT_SUPPORTED, + NULL, 0); +} + +static int ice_vc_repr_dis_vlan_stripping(struct ice_vf *vf) +{ + dev_dbg(ice_pf_to_dev(vf->pf), + "Can't disable VLAN stripping in switchdev mode for VF %d\n", + vf->vf_id); + return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_DISABLE_VLAN_STRIPPING, + VIRTCHNL_STATUS_ERR_NOT_SUPPORTED, + NULL, 0); +} + +static int +ice_vc_repr_cfg_promiscuous_mode(struct ice_vf *vf, u8 __always_unused *msg) +{ + dev_dbg(ice_pf_to_dev(vf->pf), + "Can't config promiscuous mode in switchdev mode for VF %d\n", + vf->vf_id); + return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE, + VIRTCHNL_STATUS_ERR_NOT_SUPPORTED, + NULL, 0); } void ice_vc_change_ops_to_repr(struct ice_vc_vf_ops *ops) { ops->add_mac_addr_msg = ice_vc_repr_add_mac; ops->del_mac_addr_msg = ice_vc_repr_del_mac; - ops->add_vlan_msg = ice_vc_repr_no_action_msg; - ops->remove_vlan_msg = ice_vc_repr_no_action_msg; - ops->ena_vlan_stripping = ice_vc_repr_no_action; - ops->dis_vlan_stripping = ice_vc_repr_no_action; - ops->cfg_promiscuous_mode_msg = ice_vc_repr_no_action_msg; + ops->add_vlan_msg = ice_vc_repr_add_vlan; + ops->remove_vlan_msg = ice_vc_repr_del_vlan; + ops->ena_vlan_stripping = ice_vc_repr_ena_vlan_stripping; + ops->dis_vlan_stripping = ice_vc_repr_dis_vlan_stripping; + ops->cfg_promiscuous_mode_msg = ice_vc_repr_cfg_promiscuous_mode; } /** From e984c4408fc9a88d7eb51f241aee41f71c71f080 Mon Sep 17 00:00:00 2001 From: Marcin Szycik Date: Mon, 18 Oct 2021 13:30:32 +0200 Subject: [PATCH 06/38] ice: Add support for changing MTU on PR in switchdev mode This change adds support for changing MTU on port representor in switchdev mode, by setting the min/max MTU values on port representor netdev. Before it was possible to change the MTU only in a limited, default range (68-1500). Signed-off-by: Marcin Szycik Tested-by: Sandeep Penigalapati Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_repr.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_repr.c b/drivers/net/ethernet/intel/ice/ice_repr.c index c49eeea7cb67..af8e6ef5f571 100644 --- a/drivers/net/ethernet/intel/ice/ice_repr.c +++ b/drivers/net/ethernet/intel/ice/ice_repr.c @@ -267,6 +267,9 @@ static int ice_repr_add(struct ice_vf *vf) if (err) goto err_devlink; + repr->netdev->min_mtu = ETH_MIN_MTU; + repr->netdev->max_mtu = ICE_MAX_MTU; + err = ice_repr_reg_netdev(repr->netdev); if (err) goto err_netdev; From 99d407524cdffa0f8938586d82e9538fa9a6618f Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Wed, 13 Oct 2021 09:02:19 -0700 Subject: [PATCH 07/38] ice: Add support to print error on PHY FW load failure Some devices have support for loading the PHY FW and in some cases this can fail. When this fails, the FW will set the corresponding bit in the link info structure. Also, the FW will send a link event if the correct link event mask bit is set. Add support for printing an error message when the PHY FW load fails during any link configuration flow and the link event flow. Since ice_check_module_power() is already doing something very similar add a new function ice_check_link_cfg_err() so any failures reported in the link info's link_cfg_err member can be printed in this one function. Also, add the new ICE_FLAG_PHY_FW_LOAD_FAILED bit to the PF's flags so we don't constantly print this error message during link polling if the value never changed. Signed-off-by: Brett Creeley Tested-by: Sunitha Mekala Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice.h | 1 + .../net/ethernet/intel/ice/ice_adminq_cmd.h | 2 + drivers/net/ethernet/intel/ice/ice_main.c | 49 +++++++++++++++++-- 3 files changed, 47 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 763add09559c..bf4ecd9a517c 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -480,6 +480,7 @@ enum ice_pf_flags { ICE_FLAG_NO_MEDIA, ICE_FLAG_FW_LLDP_AGENT, ICE_FLAG_MOD_POWER_UNSUPPORTED, + ICE_FLAG_PHY_FW_LOAD_FAILED, ICE_FLAG_ETHTOOL_CTXT, /* set when ethtool holds RTNL lock */ ICE_FLAG_LEGACY_RX, ICE_FLAG_VF_TRUE_PROMISC_ENA, diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h index a5425f0dce3f..4eef3488d86f 100644 --- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h @@ -1185,6 +1185,7 @@ struct ice_aqc_get_link_status_data { #define ICE_AQ_LINK_TOPO_UNSUPP_MEDIA BIT(7) u8 link_cfg_err; #define ICE_AQ_LINK_MODULE_POWER_UNSUPPORTED BIT(5) +#define ICE_AQ_LINK_EXTERNAL_PHY_LOAD_FAILURE BIT(6) #define ICE_AQ_LINK_INVAL_MAX_POWER_LIMIT BIT(7) u8 link_info; #define ICE_AQ_LINK_UP BIT(0) /* Link Status */ @@ -1268,6 +1269,7 @@ struct ice_aqc_set_event_mask { #define ICE_AQ_LINK_EVENT_AN_COMPLETED BIT(7) #define ICE_AQ_LINK_EVENT_MODULE_QUAL_FAIL BIT(8) #define ICE_AQ_LINK_EVENT_PORT_TX_SUSPENDED BIT(9) +#define ICE_AQ_LINK_EVENT_PHY_FW_LOAD_FAIL BIT(12) u8 reserved1[6]; }; diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 2ebbbe1edd82..66112addfb9a 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -936,6 +936,29 @@ static void ice_set_dflt_mib(struct ice_pf *pf) kfree(lldpmib); } +/** + * ice_check_phy_fw_load - check if PHY FW load failed + * @pf: pointer to PF struct + * @link_cfg_err: bitmap from the link info structure + * + * check if external PHY FW load failed and print an error message if it did + */ +static void ice_check_phy_fw_load(struct ice_pf *pf, u8 link_cfg_err) +{ + if (!(link_cfg_err & ICE_AQ_LINK_EXTERNAL_PHY_LOAD_FAILURE)) { + clear_bit(ICE_FLAG_PHY_FW_LOAD_FAILED, pf->flags); + return; + } + + if (test_bit(ICE_FLAG_PHY_FW_LOAD_FAILED, pf->flags)) + return; + + if (link_cfg_err & ICE_AQ_LINK_EXTERNAL_PHY_LOAD_FAILURE) { + dev_err(ice_pf_to_dev(pf), "Device failed to load the FW for the external PHY. Please download and install the latest NVM for your device and try again\n"); + set_bit(ICE_FLAG_PHY_FW_LOAD_FAILED, pf->flags); + } +} + /** * ice_check_module_power * @pf: pointer to PF struct @@ -968,6 +991,20 @@ static void ice_check_module_power(struct ice_pf *pf, u8 link_cfg_err) } } +/** + * ice_check_link_cfg_err - check if link configuration failed + * @pf: pointer to the PF struct + * @link_cfg_err: bitmap from the link info structure + * + * print if any link configuration failure happens due to the value in the + * link_cfg_err parameter in the link info structure + */ +static void ice_check_link_cfg_err(struct ice_pf *pf, u8 link_cfg_err) +{ + ice_check_module_power(pf, link_cfg_err); + ice_check_phy_fw_load(pf, link_cfg_err); +} + /** * ice_link_event - process the link event * @pf: PF that the link event is associated with @@ -1003,7 +1040,7 @@ ice_link_event(struct ice_pf *pf, struct ice_port_info *pi, bool link_up, pi->lport, ice_stat_str(status), ice_aq_str(pi->hw->adminq.sq_last_status)); - ice_check_module_power(pf, pi->phy.link_info.link_cfg_err); + ice_check_link_cfg_err(pf, pi->phy.link_info.link_cfg_err); /* Check if the link state is up after updating link info, and treat * this event as an UP event since the link is actually UP now. @@ -1081,7 +1118,8 @@ static int ice_init_link_events(struct ice_port_info *pi) u16 mask; mask = ~((u16)(ICE_AQ_LINK_EVENT_UPDOWN | ICE_AQ_LINK_EVENT_MEDIA_NA | - ICE_AQ_LINK_EVENT_MODULE_QUAL_FAIL)); + ICE_AQ_LINK_EVENT_MODULE_QUAL_FAIL | + ICE_AQ_LINK_EVENT_PHY_FW_LOAD_FAIL)); if (ice_aq_set_event_mask(pi->hw, pi->lport, mask, NULL)) { dev_dbg(ice_hw_to_dev(pi->hw), "Failed to set link event mask for port %d\n", @@ -2152,7 +2190,7 @@ static void ice_check_media_subtask(struct ice_pf *pf) if (err) return; - ice_check_module_power(pf, pi->phy.link_info.link_cfg_err); + ice_check_link_cfg_err(pf, pi->phy.link_info.link_cfg_err); if (pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) { if (!test_bit(ICE_PHY_INIT_COMPLETE, pf->state)) @@ -4600,7 +4638,8 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) ice_init_link_dflt_override(pf->hw.port_info); - ice_check_module_power(pf, pf->hw.port_info->phy.link_info.link_cfg_err); + ice_check_link_cfg_err(pf, + pf->hw.port_info->phy.link_info.link_cfg_err); /* if media available, initialize PHY settings */ if (pf->hw.port_info->phy.link_info.link_info & @@ -8402,7 +8441,7 @@ int ice_open_internal(struct net_device *netdev) return -EIO; } - ice_check_module_power(pf, pi->phy.link_info.link_cfg_err); + ice_check_link_cfg_err(pf, pi->phy.link_info.link_cfg_err); /* Set PHY if there is media, otherwise, turn off PHY */ if (pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) { From 370764e60b183eee671e90e62510c2684f4ea849 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 18 Oct 2021 18:42:03 -0700 Subject: [PATCH 08/38] ice: Fix clang -Wimplicit-fallthrough in ice_pull_qvec_from_rc() Clang warns: drivers/net/ethernet/intel/ice/ice_lib.c:1906:2: error: unannotated fall-through between switch labels [-Werror,-Wimplicit-fallthrough] default: ^ drivers/net/ethernet/intel/ice/ice_lib.c:1906:2: note: insert 'break;' to avoid fall-through default: ^ break; 1 error generated. Clang is a little more pedantic than GCC, which does not warn when falling through to a case that is just break or return. Clang's version is more in line with the kernel's own stance in deprecated.rst, which states that all switch/case blocks must end in either break, fallthrough, continue, goto, or return. Add the missing break to silence the warning. Link: https://github.com/ClangBuiltLinux/linux/issues/1482 Signed-off-by: Nathan Chancellor Reviewed-by: Jesse Brandeburg Tested-by: Gurucharan G Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_lib.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 77dceab9fbbe..159c52b9b9d4 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -1983,6 +1983,7 @@ static struct ice_q_vector *ice_pull_qvec_from_rc(struct ice_ring_container *rc) case ICE_TX_CONTAINER: if (rc->tx_ring) return rc->tx_ring->q_vector; + break; default: break; } From c8e51a012214a09017d4065c478f8a908f8f060b Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Tue, 26 Oct 2021 20:23:33 +0800 Subject: [PATCH 09/38] ice: fix error return code in ice_get_recp_frm_fw() Return error code if devm_kmemdup() fails in ice_get_recp_frm_fw() Fixes: fd2a6b71e300 ("ice: create advanced switch recipe") Reported-by: Hulk Robot Signed-off-by: Wang Hai Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_switch.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c index 2af03b9845eb..793f4a9fc2cd 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.c +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@ -1363,8 +1363,10 @@ ice_get_recp_frm_fw(struct ice_hw *hw, struct ice_sw_recipe *recps, u8 rid, recps[rid].root_buf = devm_kmemdup(ice_hw_to_dev(hw), tmp, recps[rid].n_grp_count * sizeof(*recps[rid].root_buf), GFP_KERNEL); - if (!recps[rid].root_buf) + if (!recps[rid].root_buf) { + status = ICE_ERR_NO_MEMORY; goto err_unroll; + } /* Copy result indexes */ bitmap_copy(recps[rid].res_idxs, result_bm, ICE_MAX_FV_WORDS); From 48e4d00b1b93cc9ce9174cc8c99d2bcdfb6ecc0f Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 27 Oct 2021 18:19:59 +0300 Subject: [PATCH 10/38] mlxsw: spectrum_qdisc: Offload root TBF as port shaper The Spectrum ASIC allows configuration of maximum shaper on all levels of the scheduling hierarchy: TCs, subgroups, groups and also ports. Currently, TBF always configures a subgroup. But a user could reasonably express the intent to configure port shaper by putting TBF to a root position, around ETS / PRIO. Accept this usage and offload appropriately. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: Jakub Kicinski --- .../ethernet/mellanox/mlxsw/spectrum_qdisc.c | 55 +++++++++++++------ 1 file changed, 37 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c index ddb5ad88b350..4243d3b883ff 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c @@ -271,6 +271,7 @@ mlxsw_sp_qdisc_destroy(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_qdisc_tree_validate { bool forbid_ets; + bool forbid_root_tbf; bool forbid_tbf; bool forbid_red; }; @@ -310,18 +311,26 @@ __mlxsw_sp_qdisc_tree_validate(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, if (validate.forbid_red) return -EINVAL; validate.forbid_red = true; + validate.forbid_root_tbf = true; validate.forbid_ets = true; break; case MLXSW_SP_QDISC_TBF: - if (validate.forbid_tbf) - return -EINVAL; - validate.forbid_tbf = true; - validate.forbid_ets = true; + if (validate.forbid_root_tbf) { + if (validate.forbid_tbf) + return -EINVAL; + /* This is a TC TBF. */ + validate.forbid_tbf = true; + validate.forbid_ets = true; + } else { + /* This is root TBF. */ + validate.forbid_root_tbf = true; + } break; case MLXSW_SP_QDISC_PRIO: case MLXSW_SP_QDISC_ETS: if (validate.forbid_ets) return -EINVAL; + validate.forbid_root_tbf = true; validate.forbid_ets = true; break; default: @@ -905,16 +914,34 @@ mlxsw_sp_setup_tc_qdisc_leaf_clean_stats(struct mlxsw_sp_port *mlxsw_sp_port, mlxsw_sp_qdisc->stats_base.backlog = 0; } +static enum mlxsw_reg_qeec_hr +mlxsw_sp_qdisc_tbf_hr(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) +{ + if (mlxsw_sp_qdisc == &mlxsw_sp_port->qdisc->root_qdisc) + return MLXSW_REG_QEEC_HR_PORT; + + /* Configure subgroup shaper, so that both UC and MC traffic is subject + * to shaping. That is unlike RED, however UC queue lengths are going to + * be different than MC ones due to different pool and quota + * configurations, so the configuration is not applicable. For shaper on + * the other hand, subjecting the overall stream to the configured + * shaper makes sense. Also note that that is what we do for + * ieee_setmaxrate(). + */ + return MLXSW_REG_QEEC_HR_SUBGROUP; +} + static int mlxsw_sp_qdisc_tbf_destroy(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) { + enum mlxsw_reg_qeec_hr hr = mlxsw_sp_qdisc_tbf_hr(mlxsw_sp_port, + mlxsw_sp_qdisc); int tclass_num = mlxsw_sp_qdisc_get_tclass_num(mlxsw_sp_port, mlxsw_sp_qdisc); - return mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port, - MLXSW_REG_QEEC_HR_SUBGROUP, - tclass_num, 0, + return mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port, hr, tclass_num, 0, MLXSW_REG_QEEC_MAS_DIS, 0); } @@ -996,6 +1023,8 @@ mlxsw_sp_qdisc_tbf_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, void *params) { + enum mlxsw_reg_qeec_hr hr = mlxsw_sp_qdisc_tbf_hr(mlxsw_sp_port, + mlxsw_sp_qdisc); struct tc_tbf_qopt_offload_replace_params *p = params; u64 rate_kbps = mlxsw_sp_qdisc_tbf_rate_kbps(p); int tclass_num; @@ -1016,17 +1045,7 @@ mlxsw_sp_qdisc_tbf_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, /* check_params above was supposed to reject this value. */ return -EINVAL; - /* Configure subgroup shaper, so that both UC and MC traffic is subject - * to shaping. That is unlike RED, however UC queue lengths are going to - * be different than MC ones due to different pool and quota - * configurations, so the configuration is not applicable. For shaper on - * the other hand, subjecting the overall stream to the configured - * shaper makes sense. Also note that that is what we do for - * ieee_setmaxrate(). - */ - return mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port, - MLXSW_REG_QEEC_HR_SUBGROUP, - tclass_num, 0, + return mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port, hr, tclass_num, 0, rate_kbps, burst_size); } From 3d5290ea1daeee5da2e46abda730351c2e5b1faa Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 27 Oct 2021 18:20:00 +0300 Subject: [PATCH 11/38] selftests: mlxsw: Test offloadability of root TBF TBF can be used as a root qdisc, with the usual ETS/RED/TBF hierarchy below it. This use should now be offloaded. Add a test that verifies that it is. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: Jakub Kicinski --- .../selftests/drivers/net/mlxsw/sch_offload.sh | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_offload.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_offload.sh index ade79ef08de3..071a33d10c20 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/sch_offload.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_offload.sh @@ -6,7 +6,9 @@ ALL_TESTS=" test_root + test_port_tbf test_etsprio + test_etsprio_port_tbf " NUM_NETIFS=1 lib_dir=$(dirname $0)/../../../net/forwarding @@ -221,6 +223,12 @@ test_root() do_test_combinations 1 0 } +test_port_tbf() +{ + with_tbf 1: root \ + do_test_combinations 8 1 +} + do_test_etsprio() { local parent=$1; shift @@ -264,6 +272,12 @@ test_etsprio() do_test_etsprio root "" } +test_etsprio_port_tbf() +{ + with_tbf 1: root \ + do_test_etsprio "parent 1:1" "-TBF" +} + cleanup() { tc qdisc del dev $h1 root &>/dev/null From 2b11e24ebaef77e7151ddcc1762429798b9f75d5 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 27 Oct 2021 18:20:01 +0300 Subject: [PATCH 12/38] selftests: mlxsw: Test port shaper TBF can be used as a root qdisc, in which case it is supposed to configure port shaper. Add a test that verifies that this is so by installing a root TBF with a ETS or PRIO below it, and then expecting individual bands to all be shaped according to the root TBF configuration. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: Jakub Kicinski --- .../net/forwarding/sch_tbf_etsprio.sh | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/tools/testing/selftests/net/forwarding/sch_tbf_etsprio.sh b/tools/testing/selftests/net/forwarding/sch_tbf_etsprio.sh index 8bd85da1905a..75a37c189ef3 100644 --- a/tools/testing/selftests/net/forwarding/sch_tbf_etsprio.sh +++ b/tools/testing/selftests/net/forwarding/sch_tbf_etsprio.sh @@ -4,9 +4,12 @@ ALL_TESTS=" ping_ipv4 tbf_test + tbf_root_test " source $lib_dir/sch_tbf_core.sh +QDISC_TYPE=${QDISC% *} + tbf_test_one() { local bs=$1; shift @@ -22,6 +25,8 @@ tbf_test_one() tbf_test() { + log_info "Testing root-$QDISC_TYPE-tbf" + # This test is used for both ETS and PRIO. Even though we only need two # bands, PRIO demands a minimum of three. tc qdisc add dev $swp2 root handle 10: $QDISC 3 priomap 2 1 0 @@ -29,6 +34,29 @@ tbf_test() tc qdisc del dev $swp2 root } +tbf_root_test() +{ + local bs=128K + + log_info "Testing root-tbf-$QDISC_TYPE" + + tc qdisc replace dev $swp2 root handle 1: \ + tbf rate 400Mbit burst $bs limit 1M + tc qdisc replace dev $swp2 parent 1:1 handle 10: \ + $QDISC 3 priomap 2 1 0 + tc qdisc replace dev $swp2 parent 10:3 handle 103: \ + bfifo limit 1M + tc qdisc replace dev $swp2 parent 10:2 handle 102: \ + bfifo limit 1M + tc qdisc replace dev $swp2 parent 10:1 handle 101: \ + bfifo limit 1M + + do_tbf_test 10 400 $bs + do_tbf_test 11 400 $bs + + tc qdisc del dev $swp2 root +} + trap cleanup EXIT setup_prepare From c5f6e5ebc2af65fc7d2e7c3a18446443afeca914 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 27 Oct 2021 19:21:15 +0300 Subject: [PATCH 13/38] net: bridge: provide shim definition for br_vlan_flags br_vlan_replay() needs this, and we're preparing to move it to br_switchdev.c, which will be compiled regardless of whether or not CONFIG_BRIDGE_VLAN_FILTERING is enabled. Signed-off-by: Vladimir Oltean Acked-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- net/bridge/br_private.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 3c9327628060..cc31c3fe1e02 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -1708,6 +1708,11 @@ static inline bool br_vlan_can_enter_range(const struct net_bridge_vlan *v_curr, return true; } +static inline u16 br_vlan_flags(const struct net_bridge_vlan *v, u16 pvid) +{ + return 0; +} + static inline int br_vlan_replay(struct net_device *br_dev, struct net_device *dev, const void *ctx, bool adding, struct notifier_block *nb, From 4a6849e4617309b7b5934f9ea761c02915b5332a Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 27 Oct 2021 19:21:16 +0300 Subject: [PATCH 14/38] net: bridge: move br_vlan_replay to br_switchdev.c br_vlan_replay() is relevant only if CONFIG_NET_SWITCHDEV is enabled, so move it to br_switchdev.c. Signed-off-by: Vladimir Oltean Acked-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- net/bridge/br_private.h | 10 ----- net/bridge/br_switchdev.c | 85 +++++++++++++++++++++++++++++++++++++++ net/bridge/br_vlan.c | 84 -------------------------------------- 3 files changed, 85 insertions(+), 94 deletions(-) diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index cc31c3fe1e02..b16c83e10356 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -1459,9 +1459,6 @@ void br_vlan_notify(const struct net_bridge *br, const struct net_bridge_port *p, u16 vid, u16 vid_range, int cmd); -int br_vlan_replay(struct net_device *br_dev, struct net_device *dev, - const void *ctx, bool adding, struct notifier_block *nb, - struct netlink_ext_ack *extack); bool br_vlan_can_enter_range(const struct net_bridge_vlan *v_curr, const struct net_bridge_vlan *range_end); @@ -1713,13 +1710,6 @@ static inline u16 br_vlan_flags(const struct net_bridge_vlan *v, u16 pvid) return 0; } -static inline int br_vlan_replay(struct net_device *br_dev, - struct net_device *dev, const void *ctx, - bool adding, struct notifier_block *nb, - struct netlink_ext_ack *extack) -{ - return -EOPNOTSUPP; -} #endif /* br_vlan_options.c */ diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c index 2fbe881cdfe2..d773d819a867 100644 --- a/net/bridge/br_switchdev.c +++ b/net/bridge/br_switchdev.c @@ -327,6 +327,91 @@ static int br_fdb_replay(const struct net_device *br_dev, const void *ctx, return err; } +static int br_vlan_replay_one(struct notifier_block *nb, + struct net_device *dev, + struct switchdev_obj_port_vlan *vlan, + const void *ctx, unsigned long action, + struct netlink_ext_ack *extack) +{ + struct switchdev_notifier_port_obj_info obj_info = { + .info = { + .dev = dev, + .extack = extack, + .ctx = ctx, + }, + .obj = &vlan->obj, + }; + int err; + + err = nb->notifier_call(nb, action, &obj_info); + return notifier_to_errno(err); +} + +static int br_vlan_replay(struct net_device *br_dev, struct net_device *dev, + const void *ctx, bool adding, + struct notifier_block *nb, + struct netlink_ext_ack *extack) +{ + struct net_bridge_vlan_group *vg; + struct net_bridge_vlan *v; + struct net_bridge_port *p; + struct net_bridge *br; + unsigned long action; + int err = 0; + u16 pvid; + + ASSERT_RTNL(); + + if (!nb) + return 0; + + if (!netif_is_bridge_master(br_dev)) + return -EINVAL; + + if (!netif_is_bridge_master(dev) && !netif_is_bridge_port(dev)) + return -EINVAL; + + if (netif_is_bridge_master(dev)) { + br = netdev_priv(dev); + vg = br_vlan_group(br); + p = NULL; + } else { + p = br_port_get_rtnl(dev); + if (WARN_ON(!p)) + return -EINVAL; + vg = nbp_vlan_group(p); + br = p->br; + } + + if (!vg) + return 0; + + if (adding) + action = SWITCHDEV_PORT_OBJ_ADD; + else + action = SWITCHDEV_PORT_OBJ_DEL; + + pvid = br_get_pvid(vg); + + list_for_each_entry(v, &vg->vlan_list, vlist) { + struct switchdev_obj_port_vlan vlan = { + .obj.orig_dev = dev, + .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN, + .flags = br_vlan_flags(v, pvid), + .vid = v->vid, + }; + + if (!br_vlan_should_use(v)) + continue; + + err = br_vlan_replay_one(nb, dev, &vlan, ctx, action, extack); + if (err) + return err; + } + + return err; +} + static int nbp_switchdev_sync_objs(struct net_bridge_port *p, const void *ctx, struct notifier_block *atomic_nb, struct notifier_block *blocking_nb, diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 57bd6ee72a07..49e105e0a447 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -1860,90 +1860,6 @@ out_kfree: kfree_skb(skb); } -static int br_vlan_replay_one(struct notifier_block *nb, - struct net_device *dev, - struct switchdev_obj_port_vlan *vlan, - const void *ctx, unsigned long action, - struct netlink_ext_ack *extack) -{ - struct switchdev_notifier_port_obj_info obj_info = { - .info = { - .dev = dev, - .extack = extack, - .ctx = ctx, - }, - .obj = &vlan->obj, - }; - int err; - - err = nb->notifier_call(nb, action, &obj_info); - return notifier_to_errno(err); -} - -int br_vlan_replay(struct net_device *br_dev, struct net_device *dev, - const void *ctx, bool adding, struct notifier_block *nb, - struct netlink_ext_ack *extack) -{ - struct net_bridge_vlan_group *vg; - struct net_bridge_vlan *v; - struct net_bridge_port *p; - struct net_bridge *br; - unsigned long action; - int err = 0; - u16 pvid; - - ASSERT_RTNL(); - - if (!nb) - return 0; - - if (!netif_is_bridge_master(br_dev)) - return -EINVAL; - - if (!netif_is_bridge_master(dev) && !netif_is_bridge_port(dev)) - return -EINVAL; - - if (netif_is_bridge_master(dev)) { - br = netdev_priv(dev); - vg = br_vlan_group(br); - p = NULL; - } else { - p = br_port_get_rtnl(dev); - if (WARN_ON(!p)) - return -EINVAL; - vg = nbp_vlan_group(p); - br = p->br; - } - - if (!vg) - return 0; - - if (adding) - action = SWITCHDEV_PORT_OBJ_ADD; - else - action = SWITCHDEV_PORT_OBJ_DEL; - - pvid = br_get_pvid(vg); - - list_for_each_entry(v, &vg->vlan_list, vlist) { - struct switchdev_obj_port_vlan vlan = { - .obj.orig_dev = dev, - .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN, - .flags = br_vlan_flags(v, pvid), - .vid = v->vid, - }; - - if (!br_vlan_should_use(v)) - continue; - - err = br_vlan_replay_one(nb, dev, &vlan, ctx, action, extack); - if (err) - return err; - } - - return err; -} - /* check if v_curr can enter a range ending in range_end */ bool br_vlan_can_enter_range(const struct net_bridge_vlan *v_curr, const struct net_bridge_vlan *range_end) From 9ae9ff994b0e42eefcc33f8adda1ec498f79338e Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 27 Oct 2021 19:21:17 +0300 Subject: [PATCH 15/38] net: bridge: split out the switchdev portion of br_mdb_notify Similar to fdb_notify() and br_switchdev_fdb_notify(), split the switchdev specific logic from br_mdb_notify() into a different function. This will be moved later in br_switchdev.c. Signed-off-by: Vladimir Oltean Acked-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- net/bridge/br_mdb.c | 62 +++++++++++++++++++++++++-------------------- 1 file changed, 35 insertions(+), 27 deletions(-) diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index 61ccf46fcc21..9513f0791c3d 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -759,10 +759,10 @@ static void br_mdb_switchdev_host(struct net_device *dev, br_mdb_switchdev_host_port(dev, lower_dev, mp, type); } -void br_mdb_notify(struct net_device *dev, - struct net_bridge_mdb_entry *mp, - struct net_bridge_port_group *pg, - int type) +static void br_switchdev_mdb_notify(struct net_device *dev, + struct net_bridge_mdb_entry *mp, + struct net_bridge_port_group *pg, + int type) { struct br_mdb_complete_info *complete_info; struct switchdev_obj_port_mdb mdb = { @@ -771,33 +771,41 @@ void br_mdb_notify(struct net_device *dev, .flags = SWITCHDEV_F_DEFER, }, }; + + if (!pg) + return br_mdb_switchdev_host(dev, mp, type); + + br_switchdev_mdb_populate(&mdb, mp); + + mdb.obj.orig_dev = pg->key.port->dev; + switch (type) { + case RTM_NEWMDB: + complete_info = kmalloc(sizeof(*complete_info), GFP_ATOMIC); + if (!complete_info) + break; + complete_info->port = pg->key.port; + complete_info->ip = mp->addr; + mdb.obj.complete_priv = complete_info; + mdb.obj.complete = br_mdb_complete; + if (switchdev_port_obj_add(pg->key.port->dev, &mdb.obj, NULL)) + kfree(complete_info); + break; + case RTM_DELMDB: + switchdev_port_obj_del(pg->key.port->dev, &mdb.obj); + break; + } +} + +void br_mdb_notify(struct net_device *dev, + struct net_bridge_mdb_entry *mp, + struct net_bridge_port_group *pg, + int type) +{ struct net *net = dev_net(dev); struct sk_buff *skb; int err = -ENOBUFS; - if (pg) { - br_switchdev_mdb_populate(&mdb, mp); - - mdb.obj.orig_dev = pg->key.port->dev; - switch (type) { - case RTM_NEWMDB: - complete_info = kmalloc(sizeof(*complete_info), GFP_ATOMIC); - if (!complete_info) - break; - complete_info->port = pg->key.port; - complete_info->ip = mp->addr; - mdb.obj.complete_priv = complete_info; - mdb.obj.complete = br_mdb_complete; - if (switchdev_port_obj_add(pg->key.port->dev, &mdb.obj, NULL)) - kfree(complete_info); - break; - case RTM_DELMDB: - switchdev_port_obj_del(pg->key.port->dev, &mdb.obj); - break; - } - } else { - br_mdb_switchdev_host(dev, mp, type); - } + br_switchdev_mdb_notify(dev, mp, pg, type); skb = nlmsg_new(rtnl_mdb_nlmsg_size(pg), GFP_ATOMIC); if (!skb) From 9776457c784f6549d43f80eb96d4122b51558258 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 27 Oct 2021 19:21:18 +0300 Subject: [PATCH 16/38] net: bridge: mdb: move all switchdev logic to br_switchdev.c The following functions: br_mdb_complete br_switchdev_mdb_populate br_mdb_replay_one br_mdb_queue_one br_mdb_replay br_mdb_switchdev_host_port br_mdb_switchdev_host br_switchdev_mdb_notify are only accessible from code paths where CONFIG_NET_SWITCHDEV is enabled. So move them to br_switchdev.c, in order for that code to be compiled out if that config option is disabled. Note that br_switchdev.c gets build regardless of whether CONFIG_BRIDGE_IGMP_SNOOPING is enabled or not, whereas br_mdb.c only got built when CONFIG_BRIDGE_IGMP_SNOOPING was enabled. So to preserve correct compilation with CONFIG_BRIDGE_IGMP_SNOOPING being disabled, we must now place an #ifdef around these functions in br_switchdev.c. The offending bridge data structures that need this are br->multicast_lock and br->mdb_list, these are also compiled out of struct net_bridge when CONFIG_BRIDGE_IGMP_SNOOPING is turned off. Signed-off-by: Vladimir Oltean Acked-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- net/bridge/br_mdb.c | 244 ------------------------------------ net/bridge/br_private.h | 17 +-- net/bridge/br_switchdev.c | 253 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 262 insertions(+), 252 deletions(-) diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index 9513f0791c3d..4556d913955b 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -552,250 +552,6 @@ out: return nlmsg_size; } -struct br_mdb_complete_info { - struct net_bridge_port *port; - struct br_ip ip; -}; - -static void br_mdb_complete(struct net_device *dev, int err, void *priv) -{ - struct br_mdb_complete_info *data = priv; - struct net_bridge_port_group __rcu **pp; - struct net_bridge_port_group *p; - struct net_bridge_mdb_entry *mp; - struct net_bridge_port *port = data->port; - struct net_bridge *br = port->br; - - if (err) - goto err; - - spin_lock_bh(&br->multicast_lock); - mp = br_mdb_ip_get(br, &data->ip); - if (!mp) - goto out; - for (pp = &mp->ports; (p = mlock_dereference(*pp, br)) != NULL; - pp = &p->next) { - if (p->key.port != port) - continue; - p->flags |= MDB_PG_FLAGS_OFFLOAD; - } -out: - spin_unlock_bh(&br->multicast_lock); -err: - kfree(priv); -} - -static void br_switchdev_mdb_populate(struct switchdev_obj_port_mdb *mdb, - const struct net_bridge_mdb_entry *mp) -{ - if (mp->addr.proto == htons(ETH_P_IP)) - ip_eth_mc_map(mp->addr.dst.ip4, mdb->addr); -#if IS_ENABLED(CONFIG_IPV6) - else if (mp->addr.proto == htons(ETH_P_IPV6)) - ipv6_eth_mc_map(&mp->addr.dst.ip6, mdb->addr); -#endif - else - ether_addr_copy(mdb->addr, mp->addr.dst.mac_addr); - - mdb->vid = mp->addr.vid; -} - -static int br_mdb_replay_one(struct notifier_block *nb, struct net_device *dev, - const struct switchdev_obj_port_mdb *mdb, - unsigned long action, const void *ctx, - struct netlink_ext_ack *extack) -{ - struct switchdev_notifier_port_obj_info obj_info = { - .info = { - .dev = dev, - .extack = extack, - .ctx = ctx, - }, - .obj = &mdb->obj, - }; - int err; - - err = nb->notifier_call(nb, action, &obj_info); - return notifier_to_errno(err); -} - -static int br_mdb_queue_one(struct list_head *mdb_list, - enum switchdev_obj_id id, - const struct net_bridge_mdb_entry *mp, - struct net_device *orig_dev) -{ - struct switchdev_obj_port_mdb *mdb; - - mdb = kzalloc(sizeof(*mdb), GFP_ATOMIC); - if (!mdb) - return -ENOMEM; - - mdb->obj.id = id; - mdb->obj.orig_dev = orig_dev; - br_switchdev_mdb_populate(mdb, mp); - list_add_tail(&mdb->obj.list, mdb_list); - - return 0; -} - -int br_mdb_replay(struct net_device *br_dev, struct net_device *dev, - const void *ctx, bool adding, struct notifier_block *nb, - struct netlink_ext_ack *extack) -{ - const struct net_bridge_mdb_entry *mp; - struct switchdev_obj *obj, *tmp; - struct net_bridge *br; - unsigned long action; - LIST_HEAD(mdb_list); - int err = 0; - - ASSERT_RTNL(); - - if (!nb) - return 0; - - if (!netif_is_bridge_master(br_dev) || !netif_is_bridge_port(dev)) - return -EINVAL; - - br = netdev_priv(br_dev); - - if (!br_opt_get(br, BROPT_MULTICAST_ENABLED)) - return 0; - - /* We cannot walk over br->mdb_list protected just by the rtnl_mutex, - * because the write-side protection is br->multicast_lock. But we - * need to emulate the [ blocking ] calling context of a regular - * switchdev event, so since both br->multicast_lock and RCU read side - * critical sections are atomic, we have no choice but to pick the RCU - * read side lock, queue up all our events, leave the critical section - * and notify switchdev from blocking context. - */ - rcu_read_lock(); - - hlist_for_each_entry_rcu(mp, &br->mdb_list, mdb_node) { - struct net_bridge_port_group __rcu * const *pp; - const struct net_bridge_port_group *p; - - if (mp->host_joined) { - err = br_mdb_queue_one(&mdb_list, - SWITCHDEV_OBJ_ID_HOST_MDB, - mp, br_dev); - if (err) { - rcu_read_unlock(); - goto out_free_mdb; - } - } - - for (pp = &mp->ports; (p = rcu_dereference(*pp)) != NULL; - pp = &p->next) { - if (p->key.port->dev != dev) - continue; - - err = br_mdb_queue_one(&mdb_list, - SWITCHDEV_OBJ_ID_PORT_MDB, - mp, dev); - if (err) { - rcu_read_unlock(); - goto out_free_mdb; - } - } - } - - rcu_read_unlock(); - - if (adding) - action = SWITCHDEV_PORT_OBJ_ADD; - else - action = SWITCHDEV_PORT_OBJ_DEL; - - list_for_each_entry(obj, &mdb_list, list) { - err = br_mdb_replay_one(nb, dev, SWITCHDEV_OBJ_PORT_MDB(obj), - action, ctx, extack); - if (err) - goto out_free_mdb; - } - -out_free_mdb: - list_for_each_entry_safe(obj, tmp, &mdb_list, list) { - list_del(&obj->list); - kfree(SWITCHDEV_OBJ_PORT_MDB(obj)); - } - - return err; -} - -static void br_mdb_switchdev_host_port(struct net_device *dev, - struct net_device *lower_dev, - struct net_bridge_mdb_entry *mp, - int type) -{ - struct switchdev_obj_port_mdb mdb = { - .obj = { - .id = SWITCHDEV_OBJ_ID_HOST_MDB, - .flags = SWITCHDEV_F_DEFER, - .orig_dev = dev, - }, - }; - - br_switchdev_mdb_populate(&mdb, mp); - - switch (type) { - case RTM_NEWMDB: - switchdev_port_obj_add(lower_dev, &mdb.obj, NULL); - break; - case RTM_DELMDB: - switchdev_port_obj_del(lower_dev, &mdb.obj); - break; - } -} - -static void br_mdb_switchdev_host(struct net_device *dev, - struct net_bridge_mdb_entry *mp, int type) -{ - struct net_device *lower_dev; - struct list_head *iter; - - netdev_for_each_lower_dev(dev, lower_dev, iter) - br_mdb_switchdev_host_port(dev, lower_dev, mp, type); -} - -static void br_switchdev_mdb_notify(struct net_device *dev, - struct net_bridge_mdb_entry *mp, - struct net_bridge_port_group *pg, - int type) -{ - struct br_mdb_complete_info *complete_info; - struct switchdev_obj_port_mdb mdb = { - .obj = { - .id = SWITCHDEV_OBJ_ID_PORT_MDB, - .flags = SWITCHDEV_F_DEFER, - }, - }; - - if (!pg) - return br_mdb_switchdev_host(dev, mp, type); - - br_switchdev_mdb_populate(&mdb, mp); - - mdb.obj.orig_dev = pg->key.port->dev; - switch (type) { - case RTM_NEWMDB: - complete_info = kmalloc(sizeof(*complete_info), GFP_ATOMIC); - if (!complete_info) - break; - complete_info->port = pg->key.port; - complete_info->ip = mp->addr; - mdb.obj.complete_priv = complete_info; - mdb.obj.complete = br_mdb_complete; - if (switchdev_port_obj_add(pg->key.port->dev, &mdb.obj, NULL)) - kfree(complete_info); - break; - case RTM_DELMDB: - switchdev_port_obj_del(pg->key.port->dev, &mdb.obj); - break; - } -} - void br_mdb_notify(struct net_device *dev, struct net_bridge_mdb_entry *mp, struct net_bridge_port_group *pg, diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index b16c83e10356..5552c00ed9c4 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -956,9 +956,11 @@ int br_multicast_toggle_vlan_snooping(struct net_bridge *br, bool on, struct netlink_ext_ack *extack); bool br_multicast_toggle_global_vlan(struct net_bridge_vlan *vlan, bool on); -int br_mdb_replay(struct net_device *br_dev, struct net_device *dev, - const void *ctx, bool adding, struct notifier_block *nb, - struct netlink_ext_ack *extack); +void br_switchdev_mdb_notify(struct net_device *dev, + struct net_bridge_mdb_entry *mp, + struct net_bridge_port_group *pg, + int type); + int br_rports_fill_info(struct sk_buff *skb, const struct net_bridge_mcast *brmctx); int br_multicast_dump_querier_state(struct sk_buff *skb, @@ -1394,12 +1396,11 @@ static inline bool br_multicast_toggle_global_vlan(struct net_bridge_vlan *vlan, return false; } -static inline int br_mdb_replay(struct net_device *br_dev, - struct net_device *dev, const void *ctx, - bool adding, struct notifier_block *nb, - struct netlink_ext_ack *extack) +static inline void br_switchdev_mdb_notify(struct net_device *dev, + struct net_bridge_mdb_entry *mp, + struct net_bridge_port_group *pg, + int type) { - return -EOPNOTSUPP; } static inline bool diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c index d773d819a867..b7645165143c 100644 --- a/net/bridge/br_switchdev.c +++ b/net/bridge/br_switchdev.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include "br_private.h" @@ -412,6 +413,258 @@ static int br_vlan_replay(struct net_device *br_dev, struct net_device *dev, return err; } +#ifdef CONFIG_BRIDGE_IGMP_SNOOPING +struct br_mdb_complete_info { + struct net_bridge_port *port; + struct br_ip ip; +}; + +static void br_mdb_complete(struct net_device *dev, int err, void *priv) +{ + struct br_mdb_complete_info *data = priv; + struct net_bridge_port_group __rcu **pp; + struct net_bridge_port_group *p; + struct net_bridge_mdb_entry *mp; + struct net_bridge_port *port = data->port; + struct net_bridge *br = port->br; + + if (err) + goto err; + + spin_lock_bh(&br->multicast_lock); + mp = br_mdb_ip_get(br, &data->ip); + if (!mp) + goto out; + for (pp = &mp->ports; (p = mlock_dereference(*pp, br)) != NULL; + pp = &p->next) { + if (p->key.port != port) + continue; + p->flags |= MDB_PG_FLAGS_OFFLOAD; + } +out: + spin_unlock_bh(&br->multicast_lock); +err: + kfree(priv); +} + +static void br_switchdev_mdb_populate(struct switchdev_obj_port_mdb *mdb, + const struct net_bridge_mdb_entry *mp) +{ + if (mp->addr.proto == htons(ETH_P_IP)) + ip_eth_mc_map(mp->addr.dst.ip4, mdb->addr); +#if IS_ENABLED(CONFIG_IPV6) + else if (mp->addr.proto == htons(ETH_P_IPV6)) + ipv6_eth_mc_map(&mp->addr.dst.ip6, mdb->addr); +#endif + else + ether_addr_copy(mdb->addr, mp->addr.dst.mac_addr); + + mdb->vid = mp->addr.vid; +} + +static void br_mdb_switchdev_host_port(struct net_device *dev, + struct net_device *lower_dev, + struct net_bridge_mdb_entry *mp, + int type) +{ + struct switchdev_obj_port_mdb mdb = { + .obj = { + .id = SWITCHDEV_OBJ_ID_HOST_MDB, + .flags = SWITCHDEV_F_DEFER, + .orig_dev = dev, + }, + }; + + br_switchdev_mdb_populate(&mdb, mp); + + switch (type) { + case RTM_NEWMDB: + switchdev_port_obj_add(lower_dev, &mdb.obj, NULL); + break; + case RTM_DELMDB: + switchdev_port_obj_del(lower_dev, &mdb.obj); + break; + } +} + +static void br_mdb_switchdev_host(struct net_device *dev, + struct net_bridge_mdb_entry *mp, int type) +{ + struct net_device *lower_dev; + struct list_head *iter; + + netdev_for_each_lower_dev(dev, lower_dev, iter) + br_mdb_switchdev_host_port(dev, lower_dev, mp, type); +} + +static int br_mdb_replay_one(struct notifier_block *nb, struct net_device *dev, + const struct switchdev_obj_port_mdb *mdb, + unsigned long action, const void *ctx, + struct netlink_ext_ack *extack) +{ + struct switchdev_notifier_port_obj_info obj_info = { + .info = { + .dev = dev, + .extack = extack, + .ctx = ctx, + }, + .obj = &mdb->obj, + }; + int err; + + err = nb->notifier_call(nb, action, &obj_info); + return notifier_to_errno(err); +} + +static int br_mdb_queue_one(struct list_head *mdb_list, + enum switchdev_obj_id id, + const struct net_bridge_mdb_entry *mp, + struct net_device *orig_dev) +{ + struct switchdev_obj_port_mdb *mdb; + + mdb = kzalloc(sizeof(*mdb), GFP_ATOMIC); + if (!mdb) + return -ENOMEM; + + mdb->obj.id = id; + mdb->obj.orig_dev = orig_dev; + br_switchdev_mdb_populate(mdb, mp); + list_add_tail(&mdb->obj.list, mdb_list); + + return 0; +} + +void br_switchdev_mdb_notify(struct net_device *dev, + struct net_bridge_mdb_entry *mp, + struct net_bridge_port_group *pg, + int type) +{ + struct br_mdb_complete_info *complete_info; + struct switchdev_obj_port_mdb mdb = { + .obj = { + .id = SWITCHDEV_OBJ_ID_PORT_MDB, + .flags = SWITCHDEV_F_DEFER, + }, + }; + + if (!pg) + return br_mdb_switchdev_host(dev, mp, type); + + br_switchdev_mdb_populate(&mdb, mp); + + mdb.obj.orig_dev = pg->key.port->dev; + switch (type) { + case RTM_NEWMDB: + complete_info = kmalloc(sizeof(*complete_info), GFP_ATOMIC); + if (!complete_info) + break; + complete_info->port = pg->key.port; + complete_info->ip = mp->addr; + mdb.obj.complete_priv = complete_info; + mdb.obj.complete = br_mdb_complete; + if (switchdev_port_obj_add(pg->key.port->dev, &mdb.obj, NULL)) + kfree(complete_info); + break; + case RTM_DELMDB: + switchdev_port_obj_del(pg->key.port->dev, &mdb.obj); + break; + } +} +#endif + +static int br_mdb_replay(struct net_device *br_dev, struct net_device *dev, + const void *ctx, bool adding, + struct notifier_block *nb, + struct netlink_ext_ack *extack) +{ +#ifdef CONFIG_BRIDGE_IGMP_SNOOPING + const struct net_bridge_mdb_entry *mp; + struct switchdev_obj *obj, *tmp; + struct net_bridge *br; + unsigned long action; + LIST_HEAD(mdb_list); + int err = 0; + + ASSERT_RTNL(); + + if (!nb) + return 0; + + if (!netif_is_bridge_master(br_dev) || !netif_is_bridge_port(dev)) + return -EINVAL; + + br = netdev_priv(br_dev); + + if (!br_opt_get(br, BROPT_MULTICAST_ENABLED)) + return 0; + + /* We cannot walk over br->mdb_list protected just by the rtnl_mutex, + * because the write-side protection is br->multicast_lock. But we + * need to emulate the [ blocking ] calling context of a regular + * switchdev event, so since both br->multicast_lock and RCU read side + * critical sections are atomic, we have no choice but to pick the RCU + * read side lock, queue up all our events, leave the critical section + * and notify switchdev from blocking context. + */ + rcu_read_lock(); + + hlist_for_each_entry_rcu(mp, &br->mdb_list, mdb_node) { + struct net_bridge_port_group __rcu * const *pp; + const struct net_bridge_port_group *p; + + if (mp->host_joined) { + err = br_mdb_queue_one(&mdb_list, + SWITCHDEV_OBJ_ID_HOST_MDB, + mp, br_dev); + if (err) { + rcu_read_unlock(); + goto out_free_mdb; + } + } + + for (pp = &mp->ports; (p = rcu_dereference(*pp)) != NULL; + pp = &p->next) { + if (p->key.port->dev != dev) + continue; + + err = br_mdb_queue_one(&mdb_list, + SWITCHDEV_OBJ_ID_PORT_MDB, + mp, dev); + if (err) { + rcu_read_unlock(); + goto out_free_mdb; + } + } + } + + rcu_read_unlock(); + + if (adding) + action = SWITCHDEV_PORT_OBJ_ADD; + else + action = SWITCHDEV_PORT_OBJ_DEL; + + list_for_each_entry(obj, &mdb_list, list) { + err = br_mdb_replay_one(nb, dev, SWITCHDEV_OBJ_PORT_MDB(obj), + action, ctx, extack); + if (err) + goto out_free_mdb; + } + +out_free_mdb: + list_for_each_entry_safe(obj, tmp, &mdb_list, list) { + list_del(&obj->list); + kfree(SWITCHDEV_OBJ_PORT_MDB(obj)); + } + + if (err) + return err; +#endif + + return 0; +} + static int nbp_switchdev_sync_objs(struct net_bridge_port *p, const void *ctx, struct notifier_block *atomic_nb, struct notifier_block *blocking_nb, From 326b212e9cd67498841f3654a96d91718dd11f39 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 27 Oct 2021 19:21:19 +0300 Subject: [PATCH 17/38] net: bridge: switchdev: consistent function naming Rename all recently imported functions in br_switchdev.c to start with a br_switchdev_* prefix. br_fdb_replay_one() -> br_switchdev_fdb_replay_one() br_fdb_replay() -> br_switchdev_fdb_replay() br_vlan_replay_one() -> br_switchdev_vlan_replay_one() br_vlan_replay() -> br_switchdev_vlan_replay() struct br_mdb_complete_info -> struct br_switchdev_mdb_complete_info br_mdb_complete() -> br_switchdev_mdb_complete() br_mdb_switchdev_host_port() -> br_switchdev_host_mdb_one() br_mdb_switchdev_host() -> br_switchdev_host_mdb() br_mdb_replay_one() -> br_switchdev_mdb_replay_one() br_mdb_replay() -> br_switchdev_mdb_replay() br_mdb_queue_one() -> br_switchdev_mdb_queue_one() Signed-off-by: Vladimir Oltean Acked-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- net/bridge/br_switchdev.c | 117 ++++++++++++++++++++------------------ 1 file changed, 63 insertions(+), 54 deletions(-) diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c index b7645165143c..f8fbaaa7c501 100644 --- a/net/bridge/br_switchdev.c +++ b/net/bridge/br_switchdev.c @@ -281,9 +281,10 @@ static void nbp_switchdev_del(struct net_bridge_port *p) } } -static int br_fdb_replay_one(struct net_bridge *br, struct notifier_block *nb, - const struct net_bridge_fdb_entry *fdb, - unsigned long action, const void *ctx) +static int +br_switchdev_fdb_replay_one(struct net_bridge *br, struct notifier_block *nb, + const struct net_bridge_fdb_entry *fdb, + unsigned long action, const void *ctx) { struct switchdev_notifier_fdb_info item; int err; @@ -294,8 +295,9 @@ static int br_fdb_replay_one(struct net_bridge *br, struct notifier_block *nb, return notifier_to_errno(err); } -static int br_fdb_replay(const struct net_device *br_dev, const void *ctx, - bool adding, struct notifier_block *nb) +static int +br_switchdev_fdb_replay(const struct net_device *br_dev, const void *ctx, + bool adding, struct notifier_block *nb) { struct net_bridge_fdb_entry *fdb; struct net_bridge *br; @@ -318,7 +320,7 @@ static int br_fdb_replay(const struct net_device *br_dev, const void *ctx, rcu_read_lock(); hlist_for_each_entry_rcu(fdb, &br->fdb_list, fdb_node) { - err = br_fdb_replay_one(br, nb, fdb, action, ctx); + err = br_switchdev_fdb_replay_one(br, nb, fdb, action, ctx); if (err) break; } @@ -328,11 +330,12 @@ static int br_fdb_replay(const struct net_device *br_dev, const void *ctx, return err; } -static int br_vlan_replay_one(struct notifier_block *nb, - struct net_device *dev, - struct switchdev_obj_port_vlan *vlan, - const void *ctx, unsigned long action, - struct netlink_ext_ack *extack) +static int +br_switchdev_vlan_replay_one(struct notifier_block *nb, + struct net_device *dev, + struct switchdev_obj_port_vlan *vlan, + const void *ctx, unsigned long action, + struct netlink_ext_ack *extack) { struct switchdev_notifier_port_obj_info obj_info = { .info = { @@ -348,10 +351,11 @@ static int br_vlan_replay_one(struct notifier_block *nb, return notifier_to_errno(err); } -static int br_vlan_replay(struct net_device *br_dev, struct net_device *dev, - const void *ctx, bool adding, - struct notifier_block *nb, - struct netlink_ext_ack *extack) +static int br_switchdev_vlan_replay(struct net_device *br_dev, + struct net_device *dev, + const void *ctx, bool adding, + struct notifier_block *nb, + struct netlink_ext_ack *extack) { struct net_bridge_vlan_group *vg; struct net_bridge_vlan *v; @@ -405,7 +409,8 @@ static int br_vlan_replay(struct net_device *br_dev, struct net_device *dev, if (!br_vlan_should_use(v)) continue; - err = br_vlan_replay_one(nb, dev, &vlan, ctx, action, extack); + err = br_switchdev_vlan_replay_one(nb, dev, &vlan, ctx, + action, extack); if (err) return err; } @@ -414,14 +419,14 @@ static int br_vlan_replay(struct net_device *br_dev, struct net_device *dev, } #ifdef CONFIG_BRIDGE_IGMP_SNOOPING -struct br_mdb_complete_info { +struct br_switchdev_mdb_complete_info { struct net_bridge_port *port; struct br_ip ip; }; -static void br_mdb_complete(struct net_device *dev, int err, void *priv) +static void br_switchdev_mdb_complete(struct net_device *dev, int err, void *priv) { - struct br_mdb_complete_info *data = priv; + struct br_switchdev_mdb_complete_info *data = priv; struct net_bridge_port_group __rcu **pp; struct net_bridge_port_group *p; struct net_bridge_mdb_entry *mp; @@ -462,10 +467,10 @@ static void br_switchdev_mdb_populate(struct switchdev_obj_port_mdb *mdb, mdb->vid = mp->addr.vid; } -static void br_mdb_switchdev_host_port(struct net_device *dev, - struct net_device *lower_dev, - struct net_bridge_mdb_entry *mp, - int type) +static void br_switchdev_host_mdb_one(struct net_device *dev, + struct net_device *lower_dev, + struct net_bridge_mdb_entry *mp, + int type) { struct switchdev_obj_port_mdb mdb = { .obj = { @@ -487,20 +492,21 @@ static void br_mdb_switchdev_host_port(struct net_device *dev, } } -static void br_mdb_switchdev_host(struct net_device *dev, +static void br_switchdev_host_mdb(struct net_device *dev, struct net_bridge_mdb_entry *mp, int type) { struct net_device *lower_dev; struct list_head *iter; netdev_for_each_lower_dev(dev, lower_dev, iter) - br_mdb_switchdev_host_port(dev, lower_dev, mp, type); + br_switchdev_host_mdb_one(dev, lower_dev, mp, type); } -static int br_mdb_replay_one(struct notifier_block *nb, struct net_device *dev, - const struct switchdev_obj_port_mdb *mdb, - unsigned long action, const void *ctx, - struct netlink_ext_ack *extack) +static int +br_switchdev_mdb_replay_one(struct notifier_block *nb, struct net_device *dev, + const struct switchdev_obj_port_mdb *mdb, + unsigned long action, const void *ctx, + struct netlink_ext_ack *extack) { struct switchdev_notifier_port_obj_info obj_info = { .info = { @@ -516,10 +522,10 @@ static int br_mdb_replay_one(struct notifier_block *nb, struct net_device *dev, return notifier_to_errno(err); } -static int br_mdb_queue_one(struct list_head *mdb_list, - enum switchdev_obj_id id, - const struct net_bridge_mdb_entry *mp, - struct net_device *orig_dev) +static int br_switchdev_mdb_queue_one(struct list_head *mdb_list, + enum switchdev_obj_id id, + const struct net_bridge_mdb_entry *mp, + struct net_device *orig_dev) { struct switchdev_obj_port_mdb *mdb; @@ -540,7 +546,7 @@ void br_switchdev_mdb_notify(struct net_device *dev, struct net_bridge_port_group *pg, int type) { - struct br_mdb_complete_info *complete_info; + struct br_switchdev_mdb_complete_info *complete_info; struct switchdev_obj_port_mdb mdb = { .obj = { .id = SWITCHDEV_OBJ_ID_PORT_MDB, @@ -549,7 +555,7 @@ void br_switchdev_mdb_notify(struct net_device *dev, }; if (!pg) - return br_mdb_switchdev_host(dev, mp, type); + return br_switchdev_host_mdb(dev, mp, type); br_switchdev_mdb_populate(&mdb, mp); @@ -562,7 +568,7 @@ void br_switchdev_mdb_notify(struct net_device *dev, complete_info->port = pg->key.port; complete_info->ip = mp->addr; mdb.obj.complete_priv = complete_info; - mdb.obj.complete = br_mdb_complete; + mdb.obj.complete = br_switchdev_mdb_complete; if (switchdev_port_obj_add(pg->key.port->dev, &mdb.obj, NULL)) kfree(complete_info); break; @@ -573,10 +579,10 @@ void br_switchdev_mdb_notify(struct net_device *dev, } #endif -static int br_mdb_replay(struct net_device *br_dev, struct net_device *dev, - const void *ctx, bool adding, - struct notifier_block *nb, - struct netlink_ext_ack *extack) +static int +br_switchdev_mdb_replay(struct net_device *br_dev, struct net_device *dev, + const void *ctx, bool adding, struct notifier_block *nb, + struct netlink_ext_ack *extack) { #ifdef CONFIG_BRIDGE_IGMP_SNOOPING const struct net_bridge_mdb_entry *mp; @@ -614,9 +620,9 @@ static int br_mdb_replay(struct net_device *br_dev, struct net_device *dev, const struct net_bridge_port_group *p; if (mp->host_joined) { - err = br_mdb_queue_one(&mdb_list, - SWITCHDEV_OBJ_ID_HOST_MDB, - mp, br_dev); + err = br_switchdev_mdb_queue_one(&mdb_list, + SWITCHDEV_OBJ_ID_HOST_MDB, + mp, br_dev); if (err) { rcu_read_unlock(); goto out_free_mdb; @@ -628,9 +634,9 @@ static int br_mdb_replay(struct net_device *br_dev, struct net_device *dev, if (p->key.port->dev != dev) continue; - err = br_mdb_queue_one(&mdb_list, - SWITCHDEV_OBJ_ID_PORT_MDB, - mp, dev); + err = br_switchdev_mdb_queue_one(&mdb_list, + SWITCHDEV_OBJ_ID_PORT_MDB, + mp, dev); if (err) { rcu_read_unlock(); goto out_free_mdb; @@ -646,8 +652,9 @@ static int br_mdb_replay(struct net_device *br_dev, struct net_device *dev, action = SWITCHDEV_PORT_OBJ_DEL; list_for_each_entry(obj, &mdb_list, list) { - err = br_mdb_replay_one(nb, dev, SWITCHDEV_OBJ_PORT_MDB(obj), - action, ctx, extack); + err = br_switchdev_mdb_replay_one(nb, dev, + SWITCHDEV_OBJ_PORT_MDB(obj), + action, ctx, extack); if (err) goto out_free_mdb; } @@ -674,15 +681,17 @@ static int nbp_switchdev_sync_objs(struct net_bridge_port *p, const void *ctx, struct net_device *dev = p->dev; int err; - err = br_vlan_replay(br_dev, dev, ctx, true, blocking_nb, extack); + err = br_switchdev_vlan_replay(br_dev, dev, ctx, true, blocking_nb, + extack); if (err && err != -EOPNOTSUPP) return err; - err = br_mdb_replay(br_dev, dev, ctx, true, blocking_nb, extack); + err = br_switchdev_mdb_replay(br_dev, dev, ctx, true, blocking_nb, + extack); if (err && err != -EOPNOTSUPP) return err; - err = br_fdb_replay(br_dev, ctx, true, atomic_nb); + err = br_switchdev_fdb_replay(br_dev, ctx, true, atomic_nb); if (err && err != -EOPNOTSUPP) return err; @@ -697,11 +706,11 @@ static void nbp_switchdev_unsync_objs(struct net_bridge_port *p, struct net_device *br_dev = p->br->dev; struct net_device *dev = p->dev; - br_vlan_replay(br_dev, dev, ctx, false, blocking_nb, NULL); + br_switchdev_vlan_replay(br_dev, dev, ctx, false, blocking_nb, NULL); - br_mdb_replay(br_dev, dev, ctx, false, blocking_nb, NULL); + br_switchdev_mdb_replay(br_dev, dev, ctx, false, blocking_nb, NULL); - br_fdb_replay(br_dev, ctx, false, atomic_nb); + br_switchdev_fdb_replay(br_dev, ctx, false, atomic_nb); } /* Let the bridge know that this port is offloaded, so that it can assign a From c7dd4a5b0a155c4db0ff9758668235651c2ebf22 Mon Sep 17 00:00:00 2001 From: Edwin Peer Date: Fri, 29 Oct 2021 03:47:38 -0400 Subject: [PATCH 18/38] bnxt_en: refactor printing of device info The device info logged during probe will be reused by the devlink driver_reinit code in a following patch. Extract this logic into the new bnxt_print_device_info() function. The board index needs to be saved in the driver context so that the board information can be retrieved at a later time, outside of the probe function. Reviewed-by: Somnath Kotur Signed-off-by: Edwin Peer Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 67 +++++------------------ drivers/net/ethernet/broadcom/bnxt/bnxt.h | 51 ++++++++++++++++- 2 files changed, 63 insertions(+), 55 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 66263aa0d96b..8ff398525488 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -85,55 +85,7 @@ MODULE_DESCRIPTION("Broadcom BCM573xx network driver"); #define BNXT_TX_PUSH_THRESH 164 -enum board_idx { - BCM57301, - BCM57302, - BCM57304, - BCM57417_NPAR, - BCM58700, - BCM57311, - BCM57312, - BCM57402, - BCM57404, - BCM57406, - BCM57402_NPAR, - BCM57407, - BCM57412, - BCM57414, - BCM57416, - BCM57417, - BCM57412_NPAR, - BCM57314, - BCM57417_SFP, - BCM57416_SFP, - BCM57404_NPAR, - BCM57406_NPAR, - BCM57407_SFP, - BCM57407_NPAR, - BCM57414_NPAR, - BCM57416_NPAR, - BCM57452, - BCM57454, - BCM5745x_NPAR, - BCM57508, - BCM57504, - BCM57502, - BCM57508_NPAR, - BCM57504_NPAR, - BCM57502_NPAR, - BCM58802, - BCM58804, - BCM58808, - NETXTREME_E_VF, - NETXTREME_C_VF, - NETXTREME_S_VF, - NETXTREME_C_VF_HV, - NETXTREME_E_VF_HV, - NETXTREME_E_P5_VF, - NETXTREME_E_P5_VF_HV, -}; - -/* indexed by enum above */ +/* indexed by enum board_idx */ static const struct { char *name; } board_info[] = { @@ -13186,6 +13138,15 @@ static int bnxt_map_db_bar(struct bnxt *bp) return 0; } +void bnxt_print_device_info(struct bnxt *bp) +{ + netdev_info(bp->dev, "%s found at mem %lx, node addr %pM\n", + board_info[bp->board_idx].name, + (long)pci_resource_start(bp->pdev, 0), bp->dev->dev_addr); + + pcie_print_link_status(bp->pdev); +} + static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) { struct net_device *dev; @@ -13209,10 +13170,11 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) return -ENOMEM; bp = netdev_priv(dev); + bp->board_idx = ent->driver_data; bp->msg_enable = BNXT_DEF_MSG_ENABLE; bnxt_set_max_func_irqs(bp, max_irqs); - if (bnxt_vf_pciid(ent->driver_data)) + if (bnxt_vf_pciid(bp->board_idx)) bp->flags |= BNXT_FLAG_VF; if (pdev->msix_cap) @@ -13382,10 +13344,7 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) devlink_port_type_eth_set(&bp->dl_port, bp->dev); bnxt_dl_fw_reporters_create(bp); - netdev_info(dev, "%s found at mem %lx, node addr %pM\n", - board_info[ent->driver_data].name, - (long)pci_resource_start(pdev, 0), dev->dev_addr); - pcie_print_link_status(pdev); + bnxt_print_device_info(bp); pci_save_state(pdev); return 0; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 19fe6478e9b4..55da89cb62b5 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -1586,6 +1586,54 @@ struct bnxt_fw_reporter_ctx { #define BNXT_FW_RETRY 5 #define BNXT_FW_IF_RETRY 10 +enum board_idx { + BCM57301, + BCM57302, + BCM57304, + BCM57417_NPAR, + BCM58700, + BCM57311, + BCM57312, + BCM57402, + BCM57404, + BCM57406, + BCM57402_NPAR, + BCM57407, + BCM57412, + BCM57414, + BCM57416, + BCM57417, + BCM57412_NPAR, + BCM57314, + BCM57417_SFP, + BCM57416_SFP, + BCM57404_NPAR, + BCM57406_NPAR, + BCM57407_SFP, + BCM57407_NPAR, + BCM57414_NPAR, + BCM57416_NPAR, + BCM57452, + BCM57454, + BCM5745x_NPAR, + BCM57508, + BCM57504, + BCM57502, + BCM57508_NPAR, + BCM57504_NPAR, + BCM57502_NPAR, + BCM58802, + BCM58804, + BCM58808, + NETXTREME_E_VF, + NETXTREME_C_VF, + NETXTREME_S_VF, + NETXTREME_C_VF_HV, + NETXTREME_E_VF_HV, + NETXTREME_E_P5_VF, + NETXTREME_E_P5_VF_HV, +}; + struct bnxt { void __iomem *bar0; void __iomem *bar1; @@ -2049,6 +2097,7 @@ struct bnxt { struct list_head tc_indr_block_list; struct dentry *debugfs_pdev; struct device *hwmon_dev; + enum board_idx board_idx; }; #define BNXT_NUM_RX_RING_STATS 8 @@ -2219,5 +2268,5 @@ int bnxt_get_port_parent_id(struct net_device *dev, struct netdev_phys_item_id *ppid); void bnxt_dim_work(struct work_struct *work); int bnxt_hwrm_set_ring_coal(struct bnxt *bp, struct bnxt_napi *bnapi); - +void bnxt_print_device_info(struct bnxt *bp); #endif From d900aadd86b0c9ddb8b78e5fa512fb4133b30559 Mon Sep 17 00:00:00 2001 From: Edwin Peer Date: Fri, 29 Oct 2021 03:47:39 -0400 Subject: [PATCH 19/38] bnxt_en: refactor cancellation of resource reservations Resource reservations will also need to be reset after FUNC_DRV_UNRGTR in the following devlink driver_reinit patch. Extract this logic into a reusable function. Signed-off-by: Edwin Peer Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 47 ++++++++++++++--------- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 1 + 2 files changed, 29 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 8ff398525488..8471e47d0480 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -9691,6 +9691,33 @@ static int bnxt_try_recover_fw(struct bnxt *bp) return -ENODEV; } +int bnxt_cancel_reservations(struct bnxt *bp, bool fw_reset) +{ + struct bnxt_hw_resc *hw_resc = &bp->hw_resc; + int rc; + + if (!BNXT_NEW_RM(bp)) + return 0; /* no resource reservations required */ + + rc = bnxt_hwrm_func_resc_qcaps(bp, true); + if (rc) + netdev_err(bp->dev, "resc_qcaps failed\n"); + + hw_resc->resv_cp_rings = 0; + hw_resc->resv_stat_ctxs = 0; + hw_resc->resv_irqs = 0; + hw_resc->resv_tx_rings = 0; + hw_resc->resv_rx_rings = 0; + hw_resc->resv_hw_ring_grps = 0; + hw_resc->resv_vnics = 0; + if (!fw_reset) { + bp->tx_nr_rings = 0; + bp->rx_nr_rings = 0; + } + + return rc; +} + static int bnxt_hwrm_if_change(struct bnxt *bp, bool up) { struct hwrm_func_drv_if_change_output *resp; @@ -9774,25 +9801,7 @@ static int bnxt_hwrm_if_change(struct bnxt *bp, bool up) return rc; } } - if (BNXT_NEW_RM(bp)) { - struct bnxt_hw_resc *hw_resc = &bp->hw_resc; - - rc = bnxt_hwrm_func_resc_qcaps(bp, true); - if (rc) - netdev_err(bp->dev, "resc_qcaps failed\n"); - - hw_resc->resv_cp_rings = 0; - hw_resc->resv_stat_ctxs = 0; - hw_resc->resv_irqs = 0; - hw_resc->resv_tx_rings = 0; - hw_resc->resv_rx_rings = 0; - hw_resc->resv_hw_ring_grps = 0; - hw_resc->resv_vnics = 0; - if (!fw_reset) { - bp->tx_nr_rings = 0; - bp->rx_nr_rings = 0; - } - } + rc = bnxt_cancel_reservations(bp, fw_reset); } return rc; } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 55da89cb62b5..5ca4f19936c3 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -2246,6 +2246,7 @@ void bnxt_tx_enable(struct bnxt *bp); int bnxt_update_link(struct bnxt *bp, bool chng_link_state); int bnxt_hwrm_set_pause(struct bnxt *); int bnxt_hwrm_set_link_setting(struct bnxt *, bool, bool); +int bnxt_cancel_reservations(struct bnxt *bp, bool fw_reset); int bnxt_hwrm_alloc_wol_fltr(struct bnxt *bp); int bnxt_hwrm_free_wol_fltr(struct bnxt *bp); int bnxt_hwrm_func_resc_qcaps(struct bnxt *bp, bool all); From 228ea8c187d814e1b8e369086e640dfc1d42974f Mon Sep 17 00:00:00 2001 From: Edwin Peer Date: Fri, 29 Oct 2021 03:47:40 -0400 Subject: [PATCH 20/38] bnxt_en: implement devlink dev reload driver_reinit The RTNL lock must be held between down and up to prevent interleaving state changes, especially since external state changes might release and allocate different driver resource subsets that would otherwise need to be tracked and carefully handled. If the down function fails, then devlink will not call the corresponding up function, thus the lock is released in the down error paths. v2: Don't use devlink_reload_disable() and devlink_reload_enable(). Instead, check that the netdev is not in unregistered state before proceeding with reload. Signed-off-by: Edwin Peer Signed-Off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 14 +-- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 5 + .../net/ethernet/broadcom/bnxt/bnxt_devlink.c | 97 +++++++++++++++++++ drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c | 2 - drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h | 3 + 5 files changed, 110 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 8471e47d0480..24a17ce35703 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -49,8 +49,6 @@ #include #include #include -#include -#include #include #include #include @@ -4603,7 +4601,7 @@ int bnxt_hwrm_func_drv_rgtr(struct bnxt *bp, unsigned long *bmap, int bmap_size, return rc; } -static int bnxt_hwrm_func_drv_unrgtr(struct bnxt *bp) +int bnxt_hwrm_func_drv_unrgtr(struct bnxt *bp) { struct hwrm_func_drv_unrgtr_input *req; int rc; @@ -7144,7 +7142,7 @@ static void bnxt_free_ctx_pg_tbls(struct bnxt *bp, ctx_pg->nr_pages = 0; } -static void bnxt_free_ctx_mem(struct bnxt *bp) +void bnxt_free_ctx_mem(struct bnxt *bp) { struct bnxt_ctx_mem_info *ctx = bp->ctx; int i; @@ -9198,7 +9196,7 @@ static char *bnxt_report_fec(struct bnxt_link_info *link_info) } } -static void bnxt_report_link(struct bnxt *bp) +void bnxt_report_link(struct bnxt *bp) { if (bp->link_info.link_up) { const char *signal = ""; @@ -9643,8 +9641,6 @@ static int bnxt_hwrm_shutdown_link(struct bnxt *bp) return hwrm_req_send(bp, req); } -static int bnxt_fw_init_one(struct bnxt *bp); - static int bnxt_fw_reset_via_optee(struct bnxt *bp) { #ifdef CONFIG_TEE_BNXT_FW @@ -10279,7 +10275,7 @@ void bnxt_half_close_nic(struct bnxt *bp) bnxt_free_mem(bp, false); } -static void bnxt_reenable_sriov(struct bnxt *bp) +void bnxt_reenable_sriov(struct bnxt *bp) { if (BNXT_PF(bp)) { struct bnxt_pf_info *pf = &bp->pf; @@ -11950,7 +11946,7 @@ static void bnxt_fw_init_one_p3(struct bnxt *bp) static int bnxt_probe_phy(struct bnxt *bp, bool fw_dflt); -static int bnxt_fw_init_one(struct bnxt *bp) +int bnxt_fw_init_one(struct bnxt *bp) { int rc; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 5ca4f19936c3..4a9bdab90c28 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -2230,11 +2230,13 @@ void bnxt_set_ring_params(struct bnxt *); int bnxt_set_rx_skb_mode(struct bnxt *bp, bool page_mode); int bnxt_hwrm_func_drv_rgtr(struct bnxt *bp, unsigned long *bmap, int bmap_size, bool async_only); +int bnxt_hwrm_func_drv_unrgtr(struct bnxt *bp); int bnxt_get_nr_rss_ctxs(struct bnxt *bp, int rx_rings); int bnxt_hwrm_vnic_cfg(struct bnxt *bp, u16 vnic_id); int __bnxt_hwrm_get_tx_rings(struct bnxt *bp, u16 fid, int *tx_rings); int bnxt_nq_rings_in_use(struct bnxt *bp); int bnxt_hwrm_set_coal(struct bnxt *); +void bnxt_free_ctx_mem(struct bnxt *bp); unsigned int bnxt_get_max_func_stat_ctxs(struct bnxt *bp); unsigned int bnxt_get_avail_stat_ctxs_for_en(struct bnxt *bp); unsigned int bnxt_get_max_func_cp_rings(struct bnxt *bp); @@ -2243,6 +2245,7 @@ int bnxt_get_avail_msix(struct bnxt *bp, int num); int bnxt_reserve_rings(struct bnxt *bp, bool irq_re_init); void bnxt_tx_disable(struct bnxt *bp); void bnxt_tx_enable(struct bnxt *bp); +void bnxt_report_link(struct bnxt *bp); int bnxt_update_link(struct bnxt *bp, bool chng_link_state); int bnxt_hwrm_set_pause(struct bnxt *); int bnxt_hwrm_set_link_setting(struct bnxt *, bool, bool); @@ -2255,6 +2258,7 @@ int bnxt_hwrm_fw_set_time(struct bnxt *); int bnxt_open_nic(struct bnxt *, bool, bool); int bnxt_half_open_nic(struct bnxt *bp); void bnxt_half_close_nic(struct bnxt *bp); +void bnxt_reenable_sriov(struct bnxt *bp); int bnxt_close_nic(struct bnxt *, bool, bool); int bnxt_dbg_hwrm_rd_reg(struct bnxt *bp, u32 reg_off, u16 num_words, u32 *reg_buf); @@ -2262,6 +2266,7 @@ void bnxt_fw_exception(struct bnxt *bp); void bnxt_fw_reset(struct bnxt *bp); int bnxt_check_rings(struct bnxt *bp, int tx, int rx, bool sh, int tcs, int tx_xdp); +int bnxt_fw_init_one(struct bnxt *bp); int bnxt_setup_mq_tc(struct net_device *dev, u8 tc); int bnxt_get_max_rings(struct bnxt *, int *, int *, bool); int bnxt_restore_pf_fw_resources(struct bnxt *bp); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c index 951c0c00cc95..d875469f72ce 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c @@ -16,6 +16,8 @@ #include "bnxt_vfr.h" #include "bnxt_devlink.h" #include "bnxt_ethtool.h" +#include "bnxt_ulp.h" +#include "bnxt_ptp.h" static int bnxt_dl_flash_update(struct devlink *dl, @@ -280,6 +282,98 @@ void bnxt_dl_health_recovery_done(struct bnxt *bp) static int bnxt_dl_info_get(struct devlink *dl, struct devlink_info_req *req, struct netlink_ext_ack *extack); +static int bnxt_dl_reload_down(struct devlink *dl, bool netns_change, + enum devlink_reload_action action, + enum devlink_reload_limit limit, + struct netlink_ext_ack *extack) +{ + struct bnxt *bp = bnxt_get_bp_from_dl(dl); + int rc = 0; + + switch (action) { + case DEVLINK_RELOAD_ACTION_DRIVER_REINIT: { + if (BNXT_PF(bp) && bp->pf.active_vfs) { + NL_SET_ERR_MSG_MOD(extack, + "reload is unsupported when VFs are allocated\n"); + return -EOPNOTSUPP; + } + rtnl_lock(); + if (bp->dev->reg_state == NETREG_UNREGISTERED) { + rtnl_unlock(); + return -ENODEV; + } + bnxt_ulp_stop(bp); + if (netif_running(bp->dev)) { + rc = bnxt_close_nic(bp, true, true); + if (rc) { + NL_SET_ERR_MSG_MOD(extack, "Failed to close"); + dev_close(bp->dev); + rtnl_unlock(); + break; + } + } + bnxt_vf_reps_free(bp); + rc = bnxt_hwrm_func_drv_unrgtr(bp); + if (rc) { + NL_SET_ERR_MSG_MOD(extack, "Failed to deregister"); + if (netif_running(bp->dev)) + dev_close(bp->dev); + rtnl_unlock(); + break; + } + bnxt_cancel_reservations(bp, false); + bnxt_free_ctx_mem(bp); + kfree(bp->ctx); + bp->ctx = NULL; + break; + } + default: + rc = -EOPNOTSUPP; + } + + return rc; +} + +static int bnxt_dl_reload_up(struct devlink *dl, enum devlink_reload_action action, + enum devlink_reload_limit limit, u32 *actions_performed, + struct netlink_ext_ack *extack) +{ + struct bnxt *bp = bnxt_get_bp_from_dl(dl); + int rc = 0; + + *actions_performed = 0; + switch (action) { + case DEVLINK_RELOAD_ACTION_DRIVER_REINIT: { + bnxt_fw_init_one(bp); + bnxt_vf_reps_alloc(bp); + if (netif_running(bp->dev)) + rc = bnxt_open_nic(bp, true, true); + bnxt_ulp_start(bp, rc); + if (!rc) { + bnxt_reenable_sriov(bp); + bnxt_ptp_reapply_pps(bp); + } + break; + } + default: + return -EOPNOTSUPP; + } + + if (!rc) { + bnxt_print_device_info(bp); + if (netif_running(bp->dev)) { + mutex_lock(&bp->link_lock); + bnxt_report_link(bp); + mutex_unlock(&bp->link_lock); + } + *actions_performed |= BIT(action); + } else if (netif_running(bp->dev)) { + dev_close(bp->dev); + } + rtnl_unlock(); + return rc; +} + static const struct devlink_ops bnxt_dl_ops = { #ifdef CONFIG_BNXT_SRIOV .eswitch_mode_set = bnxt_dl_eswitch_mode_set, @@ -287,6 +381,9 @@ static const struct devlink_ops bnxt_dl_ops = { #endif /* CONFIG_BNXT_SRIOV */ .info_get = bnxt_dl_info_get, .flash_update = bnxt_dl_flash_update, + .reload_actions = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT), + .reload_down = bnxt_dl_reload_down, + .reload_up = bnxt_dl_reload_up, }; static const struct devlink_ops bnxt_vf_dl_ops; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c index f0aa480799ca..8388be119f9a 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c @@ -11,9 +11,7 @@ #include #include #include -#include #include -#include #include #include #include "bnxt_hsi.h" diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h index fa5f05708e6d..7c528e1f8713 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h @@ -10,6 +10,9 @@ #ifndef BNXT_PTP_H #define BNXT_PTP_H +#include +#include + #define BNXT_PTP_GRC_WIN 6 #define BNXT_PTP_GRC_WIN_BASE 0x6000 From 8f6c5e4d1470499b8feff98353eb2920bd81635a Mon Sep 17 00:00:00 2001 From: Edwin Peer Date: Fri, 29 Oct 2021 03:47:41 -0400 Subject: [PATCH 21/38] bnxt_en: implement devlink dev reload fw_activate Similar to reload driver_reinit, the RTNL lock is held across reload down and up to prevent interleaving state changes. But we need to subsequently release the RTNL lock while waiting for firmware reset to complete. Also keep a statistic on fw_activate resets initiated remotely from other functions. Signed-off-by: Edwin Peer Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 8 ++- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 7 +++ .../net/ethernet/broadcom/bnxt/bnxt_devlink.c | 56 ++++++++++++++++++- .../net/ethernet/broadcom/bnxt/bnxt_devlink.h | 7 +++ .../net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 4 +- .../net/ethernet/broadcom/bnxt/bnxt_ethtool.h | 2 + 6 files changed, 80 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 24a17ce35703..cd5932c75997 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -2134,7 +2134,9 @@ static int bnxt_async_event_process(struct bnxt *bp, bp->fw_reset_max_dsecs = le16_to_cpu(cmpl->timestamp_hi); if (!bp->fw_reset_max_dsecs) bp->fw_reset_max_dsecs = BNXT_DFLT_FW_RST_MAX_DSECS; - if (EVENT_DATA1_RESET_NOTIFY_FATAL(data1)) { + if (EVENT_DATA1_RESET_NOTIFY_FW_ACTIVATION(data1)) { + set_bit(BNXT_STATE_FW_ACTIVATE_RESET, &bp->state); + } else if (EVENT_DATA1_RESET_NOTIFY_FATAL(data1)) { fatal_str = "fatal"; set_bit(BNXT_STATE_FW_FATAL_COND, &bp->state); } @@ -12149,6 +12151,9 @@ static void bnxt_fw_reset_task(struct work_struct *work) } } clear_bit(BNXT_STATE_FW_FATAL_COND, &bp->state); + if (test_and_clear_bit(BNXT_STATE_FW_ACTIVATE_RESET, &bp->state) && + !test_bit(BNXT_STATE_FW_ACTIVATE, &bp->state)) + bnxt_dl_remote_reload(bp); if (pci_enable_device(bp->pdev)) { netdev_err(bp->dev, "Cannot re-enable PCI device\n"); rc = -ENODEV; @@ -12200,6 +12205,7 @@ static void bnxt_fw_reset_task(struct work_struct *work) bnxt_ptp_reapply_pps(bp); bnxt_dl_health_recovery_done(bp); bnxt_dl_health_status_update(bp, true); + clear_bit(BNXT_STATE_FW_ACTIVATE, &bp->state); rtnl_unlock(); break; } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 4a9bdab90c28..38c23b4106a1 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -489,6 +489,11 @@ struct rx_tpa_end_cmp_ext { ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_MASK) ==\ ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_FW_EXCEPTION_FATAL) +#define EVENT_DATA1_RESET_NOTIFY_FW_ACTIVATION(data1) \ + (((data1) & \ + ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_MASK) ==\ + ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_FW_ACTIVATION) + #define EVENT_DATA1_RECOVERY_MASTER_FUNC(data1) \ !!((data1) & \ ASYNC_EVENT_CMPL_ERROR_RECOVERY_EVENT_DATA1_FLAGS_MASTER_FUNC) @@ -1888,6 +1893,8 @@ struct bnxt { #define BNXT_STATE_DRV_REGISTERED 7 #define BNXT_STATE_PCI_CHANNEL_IO_FROZEN 8 #define BNXT_STATE_NAPI_DISABLED 9 +#define BNXT_STATE_FW_ACTIVATE 11 +#define BNXT_STATE_FW_ACTIVATE_RESET 14 #define BNXT_NO_FW_ACCESS(bp) \ (test_bit(BNXT_STATE_FW_FATAL_COND, &(bp)->state) || \ diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c index d875469f72ce..9922c1428129 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c @@ -327,6 +327,30 @@ static int bnxt_dl_reload_down(struct devlink *dl, bool netns_change, bp->ctx = NULL; break; } + case DEVLINK_RELOAD_ACTION_FW_ACTIVATE: { + if (~bp->fw_cap & BNXT_FW_CAP_HOT_RESET) { + NL_SET_ERR_MSG_MOD(extack, "Device not capable, requires reboot"); + return -EOPNOTSUPP; + } + rtnl_lock(); + if (bp->dev->reg_state == NETREG_UNREGISTERED) { + rtnl_unlock(); + return -ENODEV; + } + if (netif_running(bp->dev)) + set_bit(BNXT_STATE_FW_ACTIVATE, &bp->state); + rc = bnxt_hwrm_firmware_reset(bp->dev, + FW_RESET_REQ_EMBEDDED_PROC_TYPE_CHIP, + FW_RESET_REQ_SELFRST_STATUS_SELFRSTASAP, + FW_RESET_REQ_FLAGS_RESET_GRACEFUL | + FW_RESET_REQ_FLAGS_FW_ACTIVATION); + if (rc) { + NL_SET_ERR_MSG_MOD(extack, "Failed to activate firmware"); + clear_bit(BNXT_STATE_FW_ACTIVATE, &bp->state); + rtnl_unlock(); + } + break; + } default: rc = -EOPNOTSUPP; } @@ -355,6 +379,35 @@ static int bnxt_dl_reload_up(struct devlink *dl, enum devlink_reload_action acti } break; } + case DEVLINK_RELOAD_ACTION_FW_ACTIVATE: { + unsigned long start = jiffies; + unsigned long timeout = start + BNXT_DFLT_FW_RST_MAX_DSECS * HZ / 10; + + if (bp->fw_cap & BNXT_FW_CAP_ERROR_RECOVERY) + timeout = start + bp->fw_health->normal_func_wait_dsecs * HZ / 10; + if (!netif_running(bp->dev)) + NL_SET_ERR_MSG_MOD(extack, + "Device is closed, not waiting for reset notice that will never come"); + rtnl_unlock(); + while (test_bit(BNXT_STATE_FW_ACTIVATE, &bp->state)) { + if (time_after(jiffies, timeout)) { + NL_SET_ERR_MSG_MOD(extack, "Activation incomplete"); + rc = -ETIMEDOUT; + break; + } + if (test_bit(BNXT_STATE_ABORT_ERR, &bp->state)) { + NL_SET_ERR_MSG_MOD(extack, "Activation aborted"); + rc = -ENODEV; + break; + } + msleep(50); + } + rtnl_lock(); + if (!rc) + *actions_performed |= BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT); + clear_bit(BNXT_STATE_FW_ACTIVATE, &bp->state); + break; + } default: return -EOPNOTSUPP; } @@ -381,7 +434,8 @@ static const struct devlink_ops bnxt_dl_ops = { #endif /* CONFIG_BNXT_SRIOV */ .info_get = bnxt_dl_info_get, .flash_update = bnxt_dl_flash_update, - .reload_actions = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT), + .reload_actions = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT) | + BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE), .reload_down = bnxt_dl_reload_down, .reload_up = bnxt_dl_reload_up, }; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h index 406dc655a5fc..a189cfe1e441 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h @@ -20,6 +20,13 @@ static inline struct bnxt *bnxt_get_bp_from_dl(struct devlink *dl) return ((struct bnxt_dl *)devlink_priv(dl))->bp; } +static inline void bnxt_dl_remote_reload(struct bnxt *bp) +{ + devlink_remote_reload_actions_performed(bp->dl, 0, + BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT) | + BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE)); +} + #define NVM_OFF_MSIX_VEC_PER_PF_MAX 108 #define NVM_OFF_MSIX_VEC_PER_PF_MIN 114 #define NVM_OFF_IGNORE_ARI 164 diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index fbb56b1f70fd..ac8df5c6906f 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -2180,8 +2180,8 @@ static int bnxt_flash_nvram(struct net_device *dev, u16 dir_type, return rc; } -static int bnxt_hwrm_firmware_reset(struct net_device *dev, u8 proc_type, - u8 self_reset, u8 flags) +int bnxt_hwrm_firmware_reset(struct net_device *dev, u8 proc_type, + u8 self_reset, u8 flags) { struct bnxt *bp = netdev_priv(dev); struct hwrm_fw_reset_input *req; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h index 0a57cb6a4a4b..bbf184c63b0a 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h @@ -94,6 +94,8 @@ u32 bnxt_fw_to_ethtool_speed(u16); u16 bnxt_get_fw_auto_link_speeds(u32); int bnxt_hwrm_nvm_get_dev_info(struct bnxt *bp, struct hwrm_nvm_get_dev_info_output *nvm_dev_info); +int bnxt_hwrm_firmware_reset(struct net_device *dev, u8 proc_type, + u8 self_reset, u8 flags); int bnxt_flash_package_from_fw_obj(struct net_device *dev, const struct firmware *fw, u32 install_type); void bnxt_ethtool_init(struct bnxt *bp); From 892a662f04736ba40e241c794b15f1b2ee489dc3 Mon Sep 17 00:00:00 2001 From: Edwin Peer Date: Fri, 29 Oct 2021 03:47:42 -0400 Subject: [PATCH 22/38] bnxt_en: add enable_remote_dev_reset devlink parameter The reported parameter value should not take into account the state of remote drivers. Firmware will reject remote resets as appropriate, thus it is not strictly necessary to check HOT_RESET_ALLOWED before attempting to initiate a reset. But we add the check so that we can provide more intuitive messages when reset is not permitted. This firmware setting needs to be restored from all functions after a firmware reset. Signed-off-by: Edwin Peer Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 23 ++++++ drivers/net/ethernet/broadcom/bnxt/bnxt.h | 2 + .../net/ethernet/broadcom/bnxt/bnxt_devlink.c | 74 ++++++++++++++++++- .../net/ethernet/broadcom/bnxt/bnxt_devlink.h | 11 +++ .../net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 5 ++ 5 files changed, 111 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index cd5932c75997..80fff3d8b31f 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -7476,6 +7476,8 @@ static int __bnxt_hwrm_func_qcaps(struct bnxt *bp) bp->fw_cap |= BNXT_FW_CAP_EXT_HW_STATS_SUPPORTED; if (BNXT_PF(bp) && (flags_ext & FUNC_QCAPS_RESP_FLAGS_EXT_PTP_PPS_SUPPORTED)) bp->fw_cap |= BNXT_FW_CAP_PTP_PPS; + if (BNXT_PF(bp) && (flags_ext & FUNC_QCAPS_RESP_FLAGS_EXT_HOT_RESET_IF_SUPPORT)) + bp->fw_cap |= BNXT_FW_CAP_HOT_RESET_IF; bp->tx_push_thresh = 0; if ((flags & FUNC_QCAPS_RESP_FLAGS_PUSH_MODE_SUPPORTED) && @@ -12010,6 +12012,27 @@ static void bnxt_fw_reset_writel(struct bnxt *bp, int reg_idx) } } +bool bnxt_hwrm_reset_permitted(struct bnxt *bp) +{ + struct hwrm_func_qcfg_output *resp; + struct hwrm_func_qcfg_input *req; + bool result = true; /* firmware will enforce if unknown */ + + if (~bp->fw_cap & BNXT_FW_CAP_HOT_RESET_IF) + return result; + + if (hwrm_req_init(bp, req, HWRM_FUNC_QCFG)) + return result; + + req->fid = cpu_to_le16(0xffff); + resp = hwrm_req_hold(bp, req); + if (!hwrm_req_send(bp, req)) + result = !!(le16_to_cpu(resp->flags) & + FUNC_QCFG_RESP_FLAGS_HOT_RESET_ALLOWED); + hwrm_req_drop(bp, req); + return result; +} + static void bnxt_reset_all(struct bnxt *bp) { struct bnxt_fw_health *fw_health = bp->fw_health; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 38c23b4106a1..e56f2a27c67a 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -1935,6 +1935,7 @@ struct bnxt { #define BNXT_FW_CAP_VLAN_TX_INSERT 0x02000000 #define BNXT_FW_CAP_EXT_HW_STATS_SUPPORTED 0x04000000 #define BNXT_FW_CAP_PTP_PPS 0x10000000 + #define BNXT_FW_CAP_HOT_RESET_IF 0x20000000 #define BNXT_FW_CAP_RING_MONITOR 0x40000000 #define BNXT_NEW_RM(bp) ((bp)->fw_cap & BNXT_FW_CAP_NEW_RM) @@ -2274,6 +2275,7 @@ void bnxt_fw_reset(struct bnxt *bp); int bnxt_check_rings(struct bnxt *bp, int tx, int rx, bool sh, int tcs, int tx_xdp); int bnxt_fw_init_one(struct bnxt *bp); +bool bnxt_hwrm_reset_permitted(struct bnxt *bp); int bnxt_setup_mq_tc(struct net_device *dev, u8 tc); int bnxt_get_max_rings(struct bnxt *, int *, int *, bool); int bnxt_restore_pf_fw_resources(struct bnxt *bp); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c index 9922c1428129..8673f3c4b581 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c @@ -42,6 +42,26 @@ bnxt_dl_flash_update(struct devlink *dl, return rc; } +static int bnxt_hwrm_remote_dev_reset_set(struct bnxt *bp, bool remote_reset) +{ + struct hwrm_func_cfg_input *req; + int rc; + + if (~bp->fw_cap & BNXT_FW_CAP_HOT_RESET_IF) + return -EOPNOTSUPP; + + rc = hwrm_req_init(bp, req, HWRM_FUNC_CFG); + if (rc) + return rc; + + req->fid = cpu_to_le16(0xffff); + req->enables = cpu_to_le32(FUNC_CFG_REQ_ENABLES_HOT_RESET_IF_SUPPORT); + if (remote_reset) + req->flags = cpu_to_le32(FUNC_CFG_REQ_FLAGS_HOT_RESET_IF_EN_DIS); + + return hwrm_req_send(bp, req); +} + static int bnxt_fw_reporter_diagnose(struct devlink_health_reporter *reporter, struct devlink_fmsg *fmsg, struct netlink_ext_ack *extack) @@ -272,11 +292,13 @@ void bnxt_dl_health_status_update(struct bnxt *bp, bool healthy) void bnxt_dl_health_recovery_done(struct bnxt *bp) { struct bnxt_fw_health *hlth = bp->fw_health; + struct bnxt_dl *dl = devlink_priv(bp->dl); if (hlth->fatal) devlink_health_reporter_recovery_done(hlth->fw_fatal_reporter); else devlink_health_reporter_recovery_done(hlth->fw_reset_reporter); + bnxt_hwrm_remote_dev_reset_set(bp, dl->remote_reset); } static int bnxt_dl_info_get(struct devlink *dl, struct devlink_info_req *req, @@ -332,6 +354,11 @@ static int bnxt_dl_reload_down(struct devlink *dl, bool netns_change, NL_SET_ERR_MSG_MOD(extack, "Device not capable, requires reboot"); return -EOPNOTSUPP; } + if (!bnxt_hwrm_reset_permitted(bp)) { + NL_SET_ERR_MSG_MOD(extack, + "Reset denied by firmware, it may be inhibited by remote driver"); + return -EPERM; + } rtnl_lock(); if (bp->dev->reg_state == NETREG_UNREGISTERED) { rtnl_unlock(); @@ -863,6 +890,32 @@ static int bnxt_dl_msix_validate(struct devlink *dl, u32 id, return 0; } +static int bnxt_remote_dev_reset_get(struct devlink *dl, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct bnxt *bp = bnxt_get_bp_from_dl(dl); + + if (~bp->fw_cap & BNXT_FW_CAP_HOT_RESET_IF) + return -EOPNOTSUPP; + + ctx->val.vbool = bnxt_dl_get_remote_reset(dl); + return 0; +} + +static int bnxt_remote_dev_reset_set(struct devlink *dl, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct bnxt *bp = bnxt_get_bp_from_dl(dl); + int rc; + + rc = bnxt_hwrm_remote_dev_reset_set(bp, ctx->val.vbool); + if (rc) + return rc; + + bnxt_dl_set_remote_reset(dl, ctx->val.vbool); + return rc; +} + static const struct devlink_param bnxt_dl_params[] = { DEVLINK_PARAM_GENERIC(ENABLE_SRIOV, BIT(DEVLINK_PARAM_CMODE_PERMANENT), @@ -885,17 +938,25 @@ static const struct devlink_param bnxt_dl_params[] = { BIT(DEVLINK_PARAM_CMODE_PERMANENT), bnxt_dl_nvm_param_get, bnxt_dl_nvm_param_set, NULL), + /* keep REMOTE_DEV_RESET last, it is excluded based on caps */ + DEVLINK_PARAM_GENERIC(ENABLE_REMOTE_DEV_RESET, + BIT(DEVLINK_PARAM_CMODE_RUNTIME), + bnxt_remote_dev_reset_get, + bnxt_remote_dev_reset_set, NULL), }; static int bnxt_dl_params_register(struct bnxt *bp) { + int num_params = ARRAY_SIZE(bnxt_dl_params); int rc; if (bp->hwrm_spec_code < 0x10600) return 0; - rc = devlink_params_register(bp->dl, bnxt_dl_params, - ARRAY_SIZE(bnxt_dl_params)); + if (~bp->fw_cap & BNXT_FW_CAP_HOT_RESET_IF) + num_params--; + + rc = devlink_params_register(bp->dl, bnxt_dl_params, num_params); if (rc) netdev_warn(bp->dev, "devlink_params_register failed. rc=%d\n", rc); @@ -904,11 +965,15 @@ static int bnxt_dl_params_register(struct bnxt *bp) static void bnxt_dl_params_unregister(struct bnxt *bp) { + int num_params = ARRAY_SIZE(bnxt_dl_params); + if (bp->hwrm_spec_code < 0x10600) return; - devlink_params_unregister(bp->dl, bnxt_dl_params, - ARRAY_SIZE(bnxt_dl_params)); + if (~bp->fw_cap & BNXT_FW_CAP_HOT_RESET_IF) + num_params--; + + devlink_params_unregister(bp->dl, bnxt_dl_params, num_params); } int bnxt_dl_register(struct bnxt *bp) @@ -933,6 +998,7 @@ int bnxt_dl_register(struct bnxt *bp) bp->dl = dl; bp_dl = devlink_priv(dl); bp_dl->bp = bp; + bnxt_dl_set_remote_reset(dl, true); /* Add switchdev eswitch mode setting, if SRIOV supported */ if (pci_find_ext_capability(bp->pdev, PCI_EXT_CAP_ID_SRIOV) && diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h index a189cfe1e441..456e18c4badf 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h @@ -13,6 +13,7 @@ /* Struct to hold housekeeping info needed by devlink interface */ struct bnxt_dl { struct bnxt *bp; /* back ptr to the controlling dev */ + bool remote_reset; }; static inline struct bnxt *bnxt_get_bp_from_dl(struct devlink *dl) @@ -27,6 +28,16 @@ static inline void bnxt_dl_remote_reload(struct bnxt *bp) BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE)); } +static inline bool bnxt_dl_get_remote_reset(struct devlink *dl) +{ + return ((struct bnxt_dl *)devlink_priv(dl))->remote_reset; +} + +static inline void bnxt_dl_set_remote_reset(struct devlink *dl, bool value) +{ + ((struct bnxt_dl *)devlink_priv(dl))->remote_reset = value; +} + #define NVM_OFF_MSIX_VEC_PER_PF_MAX 108 #define NVM_OFF_MSIX_VEC_PER_PF_MIN 114 #define NVM_OFF_IGNORE_ARI 164 diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index ac8df5c6906f..15c518024965 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -2187,6 +2187,11 @@ int bnxt_hwrm_firmware_reset(struct net_device *dev, u8 proc_type, struct hwrm_fw_reset_input *req; int rc; + if (!bnxt_hwrm_reset_permitted(bp)) { + netdev_warn(bp->dev, "Reset denied by firmware, it may be inhibited by remote driver"); + return -EPERM; + } + rc = hwrm_req_init(bp, req, HWRM_FW_RESET); if (rc) return rc; From 1596847d0f7b00147c4cb01158325d72c096cdde Mon Sep 17 00:00:00 2001 From: Edwin Peer Date: Fri, 29 Oct 2021 03:47:43 -0400 Subject: [PATCH 23/38] bnxt_en: improve error recovery information messages The recovery election messages are often mistaken for errors. Improve the wording to clarify the meaning of these frequent and expected events. Also, take the first step towards more inclusive language. Signed-off-by: Edwin Peer Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 20 ++++++++++++-------- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 2 +- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 80fff3d8b31f..517ce16ff7eb 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -2150,17 +2150,18 @@ static int bnxt_async_event_process(struct bnxt *bp, } case ASYNC_EVENT_CMPL_EVENT_ID_ERROR_RECOVERY: { struct bnxt_fw_health *fw_health = bp->fw_health; + char *status_desc = "healthy"; + u32 status; if (!fw_health) goto async_event_process_exit; if (!EVENT_DATA1_RECOVERY_ENABLED(data1)) { fw_health->enabled = false; - netif_info(bp, drv, bp->dev, - "Error recovery info: error recovery[0]\n"); + netif_info(bp, drv, bp->dev, "Driver recovery watchdog is disabled\n"); break; } - fw_health->master = EVENT_DATA1_RECOVERY_MASTER_FUNC(data1); + fw_health->primary = EVENT_DATA1_RECOVERY_MASTER_FUNC(data1); fw_health->tmr_multiplier = DIV_ROUND_UP(fw_health->polling_dsecs * HZ, bp->current_interval * 10); @@ -2170,10 +2171,13 @@ static int bnxt_async_event_process(struct bnxt *bp, bnxt_fw_health_readl(bp, BNXT_FW_HEARTBEAT_REG); fw_health->last_fw_reset_cnt = bnxt_fw_health_readl(bp, BNXT_FW_RESET_CNT_REG); + status = bnxt_fw_health_readl(bp, BNXT_FW_HEALTH_REG); + if (status != BNXT_FW_STATUS_HEALTHY) + status_desc = "unhealthy"; netif_info(bp, drv, bp->dev, - "Error recovery info: error recovery[1], master[%d], reset count[%u], health status: 0x%x\n", - fw_health->master, fw_health->last_fw_reset_cnt, - bnxt_fw_health_readl(bp, BNXT_FW_HEALTH_REG)); + "Driver recovery watchdog, role: %s, firmware status: 0x%x (%s), resets: %u\n", + fw_health->primary ? "primary" : "backup", status, + status_desc, fw_health->last_fw_reset_cnt); if (!fw_health->enabled) { /* Make sure tmr_counter is set and visible to * bnxt_health_check() before setting enabled to true. @@ -11469,7 +11473,7 @@ static void bnxt_force_fw_reset(struct bnxt *bp) } bnxt_fw_reset_close(bp); wait_dsecs = fw_health->master_func_wait_dsecs; - if (fw_health->master) { + if (fw_health->primary) { if (fw_health->flags & ERROR_RECOVERY_QCFG_RESP_FLAGS_CO_CPU) wait_dsecs = 0; bp->fw_reset_state = BNXT_FW_RESET_STATE_RESET_FW; @@ -12141,7 +12145,7 @@ static void bnxt_fw_reset_task(struct work_struct *work) return; } - if (!bp->fw_health->master) { + if (!bp->fw_health->primary) { u32 wait_dsecs = bp->fw_health->normal_func_wait_dsecs; bp->fw_reset_state = BNXT_FW_RESET_STATE_ENABLE_DEV; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index e56f2a27c67a..16d33d00973e 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -1536,7 +1536,7 @@ struct bnxt_fw_health { u32 last_fw_heartbeat; u32 last_fw_reset_cnt; u8 enabled:1; - u8 master:1; + u8 primary:1; u8 fatal:1; u8 status_reliable:1; u8 tmr_multiplier; From aadb0b1a0b3628291dff2dab8c8af1b63df1cae9 Mon Sep 17 00:00:00 2001 From: Edwin Peer Date: Fri, 29 Oct 2021 03:47:44 -0400 Subject: [PATCH 24/38] bnxt_en: remove fw_reset devlink health reporter Firmware resets initiated by the user are not errors and should not be reported via devlink. Once only unsolicited resets remain, it is no longer sensible to maintain a separate fw_reset reporter. Signed-off-by: Edwin Peer Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 33 +++-- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 12 +- .../net/ethernet/broadcom/bnxt/bnxt_devlink.c | 118 ++++-------------- .../net/ethernet/broadcom/bnxt/bnxt_devlink.h | 6 +- 4 files changed, 53 insertions(+), 116 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 517ce16ff7eb..1251d78ffd46 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -2122,7 +2122,7 @@ static int bnxt_async_event_process(struct bnxt *bp, set_bit(BNXT_RESET_TASK_SILENT_SP_EVENT, &bp->sp_event); break; case ASYNC_EVENT_CMPL_EVENT_ID_RESET_NOTIFY: { - char *fatal_str = "non-fatal"; + char *type_str = "Solicited"; if (!bp->fw_health) goto async_event_process_exit; @@ -2137,12 +2137,16 @@ static int bnxt_async_event_process(struct bnxt *bp, if (EVENT_DATA1_RESET_NOTIFY_FW_ACTIVATION(data1)) { set_bit(BNXT_STATE_FW_ACTIVATE_RESET, &bp->state); } else if (EVENT_DATA1_RESET_NOTIFY_FATAL(data1)) { - fatal_str = "fatal"; + type_str = "Fatal"; set_bit(BNXT_STATE_FW_FATAL_COND, &bp->state); + } else if (data2 && BNXT_FW_STATUS_HEALTHY != + EVENT_DATA2_RESET_NOTIFY_FW_STATUS_CODE(data2)) { + type_str = "Non-fatal"; + set_bit(BNXT_STATE_FW_NON_FATAL_COND, &bp->state); } netif_warn(bp, hw, bp->dev, - "Firmware %s reset event, data1: 0x%x, data2: 0x%x, min wait %u ms, max wait %u ms\n", - fatal_str, data1, data2, + "%s firmware reset event, data1: 0x%x, data2: 0x%x, min wait %u ms, max wait %u ms\n", + type_str, data1, data2, bp->fw_reset_min_dsecs * 100, bp->fw_reset_max_dsecs * 100); set_bit(BNXT_FW_RESET_NOTIFY_SP_EVENT, &bp->sp_event); @@ -11737,13 +11741,17 @@ static void bnxt_sp_task(struct work_struct *work) if (test_and_clear_bit(BNXT_RST_RING_SP_EVENT, &bp->sp_event)) bnxt_rx_ring_reset(bp); - if (test_and_clear_bit(BNXT_FW_RESET_NOTIFY_SP_EVENT, &bp->sp_event)) - bnxt_devlink_health_report(bp, BNXT_FW_RESET_NOTIFY_SP_EVENT); + if (test_and_clear_bit(BNXT_FW_RESET_NOTIFY_SP_EVENT, &bp->sp_event)) { + if (test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state) || + test_bit(BNXT_STATE_FW_NON_FATAL_COND, &bp->state)) + bnxt_devlink_health_fw_report(bp); + else + bnxt_fw_reset(bp); + } if (test_and_clear_bit(BNXT_FW_EXCEPTION_SP_EVENT, &bp->sp_event)) { if (!is_bnxt_fw_ok(bp)) - bnxt_devlink_health_report(bp, - BNXT_FW_EXCEPTION_SP_EVENT); + bnxt_devlink_health_fw_report(bp); } smp_mb__before_atomic(); @@ -12079,7 +12087,7 @@ static void bnxt_fw_reset_abort(struct bnxt *bp, int rc) clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state); if (bp->fw_reset_state != BNXT_FW_RESET_STATE_POLL_VF) { bnxt_ulp_start(bp, rc); - bnxt_dl_health_status_update(bp, false); + bnxt_dl_health_fw_status_update(bp, false); } bp->fw_reset_state = 0; dev_close(bp->dev); @@ -12178,6 +12186,7 @@ static void bnxt_fw_reset_task(struct work_struct *work) } } clear_bit(BNXT_STATE_FW_FATAL_COND, &bp->state); + clear_bit(BNXT_STATE_FW_NON_FATAL_COND, &bp->state); if (test_and_clear_bit(BNXT_STATE_FW_ACTIVATE_RESET, &bp->state) && !test_bit(BNXT_STATE_FW_ACTIVATE, &bp->state)) bnxt_dl_remote_reload(bp); @@ -12230,9 +12239,11 @@ static void bnxt_fw_reset_task(struct work_struct *work) bnxt_vf_reps_alloc(bp); bnxt_vf_reps_open(bp); bnxt_ptp_reapply_pps(bp); - bnxt_dl_health_recovery_done(bp); - bnxt_dl_health_status_update(bp, true); clear_bit(BNXT_STATE_FW_ACTIVATE, &bp->state); + if (test_and_clear_bit(BNXT_STATE_RECOVER, &bp->state)) { + bnxt_dl_health_fw_recovery_done(bp); + bnxt_dl_health_fw_status_update(bp, true); + } rtnl_unlock(); break; } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 16d33d00973e..e640df62d296 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -494,6 +494,10 @@ struct rx_tpa_end_cmp_ext { ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_MASK) ==\ ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_FW_ACTIVATION) +#define EVENT_DATA2_RESET_NOTIFY_FW_STATUS_CODE(data2) \ + ((data2) & \ + ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA2_FW_STATUS_CODE_MASK) + #define EVENT_DATA1_RECOVERY_MASTER_FUNC(data1) \ !!((data1) & \ ASYNC_EVENT_CMPL_ERROR_RECOVERY_EVENT_DATA1_FLAGS_MASTER_FUNC) @@ -1537,7 +1541,6 @@ struct bnxt_fw_health { u32 last_fw_reset_cnt; u8 enabled:1; u8 primary:1; - u8 fatal:1; u8 status_reliable:1; u8 tmr_multiplier; u8 tmr_counter; @@ -1548,14 +1551,9 @@ struct bnxt_fw_health { u32 echo_req_data1; u32 echo_req_data2; struct devlink_health_reporter *fw_reporter; - struct devlink_health_reporter *fw_reset_reporter; struct devlink_health_reporter *fw_fatal_reporter; }; -struct bnxt_fw_reporter_ctx { - unsigned long sp_event; -}; - #define BNXT_FW_HEALTH_REG_TYPE_MASK 3 #define BNXT_FW_HEALTH_REG_TYPE_CFG 0 #define BNXT_FW_HEALTH_REG_TYPE_GRC 1 @@ -1894,6 +1892,8 @@ struct bnxt { #define BNXT_STATE_PCI_CHANNEL_IO_FROZEN 8 #define BNXT_STATE_NAPI_DISABLED 9 #define BNXT_STATE_FW_ACTIVATE 11 +#define BNXT_STATE_RECOVER 12 +#define BNXT_STATE_FW_NON_FATAL_COND 13 #define BNXT_STATE_FW_ACTIVATE_RESET 14 #define BNXT_NO_FW_ACCESS(bp) \ diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c index 8673f3c4b581..2c72f3b3708f 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c @@ -19,6 +19,15 @@ #include "bnxt_ulp.h" #include "bnxt_ptp.h" +static void __bnxt_fw_recover(struct bnxt *bp) +{ + if (test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state) || + test_bit(BNXT_STATE_FW_NON_FATAL_COND, &bp->state)) + bnxt_fw_reset(bp); + else + bnxt_fw_exception(bp); +} + static int bnxt_dl_flash_update(struct devlink *dl, struct devlink_flash_update_params *params, @@ -106,42 +115,14 @@ static const struct devlink_health_reporter_ops bnxt_dl_fw_reporter_ops = { .diagnose = bnxt_fw_reporter_diagnose, }; -static int bnxt_fw_reset_recover(struct devlink_health_reporter *reporter, - void *priv_ctx, - struct netlink_ext_ack *extack) -{ - struct bnxt *bp = devlink_health_reporter_priv(reporter); - - if (!priv_ctx) - return -EOPNOTSUPP; - - bnxt_fw_reset(bp); - return -EINPROGRESS; -} - -static const -struct devlink_health_reporter_ops bnxt_dl_fw_reset_reporter_ops = { - .name = "fw_reset", - .recover = bnxt_fw_reset_recover, -}; - static int bnxt_fw_fatal_recover(struct devlink_health_reporter *reporter, void *priv_ctx, struct netlink_ext_ack *extack) { struct bnxt *bp = devlink_health_reporter_priv(reporter); - struct bnxt_fw_reporter_ctx *fw_reporter_ctx = priv_ctx; - unsigned long event; - if (!priv_ctx) - return -EOPNOTSUPP; - - bp->fw_health->fatal = true; - event = fw_reporter_ctx->sp_event; - if (event == BNXT_FW_RESET_NOTIFY_SP_EVENT) - bnxt_fw_reset(bp); - else if (event == BNXT_FW_EXCEPTION_SP_EVENT) - bnxt_fw_exception(bp); + set_bit(BNXT_STATE_RECOVER, &bp->state); + __bnxt_fw_recover(bp); return -EINPROGRESS; } @@ -159,24 +140,6 @@ void bnxt_dl_fw_reporters_create(struct bnxt *bp) if (!health) return; - if (!(bp->fw_cap & BNXT_FW_CAP_HOT_RESET) || health->fw_reset_reporter) - goto err_recovery; - - health->fw_reset_reporter = - devlink_health_reporter_create(bp->dl, - &bnxt_dl_fw_reset_reporter_ops, - 0, bp); - if (IS_ERR(health->fw_reset_reporter)) { - netdev_warn(bp->dev, "Failed to create FW fatal health reporter, rc = %ld\n", - PTR_ERR(health->fw_reset_reporter)); - health->fw_reset_reporter = NULL; - bp->fw_cap &= ~BNXT_FW_CAP_HOT_RESET; - } - -err_recovery: - if (!(bp->fw_cap & BNXT_FW_CAP_ERROR_RECOVERY)) - return; - if (!health->fw_reporter) { health->fw_reporter = devlink_health_reporter_create(bp->dl, @@ -186,7 +149,6 @@ err_recovery: netdev_warn(bp->dev, "Failed to create FW health reporter, rc = %ld\n", PTR_ERR(health->fw_reporter)); health->fw_reporter = NULL; - bp->fw_cap &= ~BNXT_FW_CAP_ERROR_RECOVERY; return; } } @@ -213,12 +175,6 @@ void bnxt_dl_fw_reporters_destroy(struct bnxt *bp, bool all) if (!health) return; - if ((all || !(bp->fw_cap & BNXT_FW_CAP_HOT_RESET)) && - health->fw_reset_reporter) { - devlink_health_reporter_destroy(health->fw_reset_reporter); - health->fw_reset_reporter = NULL; - } - if ((bp->fw_cap & BNXT_FW_CAP_ERROR_RECOVERY) && !all) return; @@ -233,43 +189,23 @@ void bnxt_dl_fw_reporters_destroy(struct bnxt *bp, bool all) } } -void bnxt_devlink_health_report(struct bnxt *bp, unsigned long event) +void bnxt_devlink_health_fw_report(struct bnxt *bp) { struct bnxt_fw_health *fw_health = bp->fw_health; - struct bnxt_fw_reporter_ctx fw_reporter_ctx; - fw_reporter_ctx.sp_event = event; - switch (event) { - case BNXT_FW_RESET_NOTIFY_SP_EVENT: - if (test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state)) { - if (!fw_health->fw_fatal_reporter) - return; - - devlink_health_report(fw_health->fw_fatal_reporter, - "FW fatal async event received", - &fw_reporter_ctx); - return; - } - if (!fw_health->fw_reset_reporter) - return; - - devlink_health_report(fw_health->fw_reset_reporter, - "FW non-fatal reset event received", - &fw_reporter_ctx); + if (!fw_health) return; - case BNXT_FW_EXCEPTION_SP_EVENT: - if (!fw_health->fw_fatal_reporter) - return; - - devlink_health_report(fw_health->fw_fatal_reporter, - "FW fatal error reported", - &fw_reporter_ctx); + if (!fw_health->fw_fatal_reporter) { + __bnxt_fw_recover(bp); return; } + + devlink_health_report(fw_health->fw_fatal_reporter, + "FW fatal error reported", NULL); } -void bnxt_dl_health_status_update(struct bnxt *bp, bool healthy) +void bnxt_dl_health_fw_status_update(struct bnxt *bp, bool healthy) { struct bnxt_fw_health *health = bp->fw_health; u8 state; @@ -279,25 +215,15 @@ void bnxt_dl_health_status_update(struct bnxt *bp, bool healthy) else state = DEVLINK_HEALTH_REPORTER_STATE_ERROR; - if (health->fatal) - devlink_health_reporter_state_update(health->fw_fatal_reporter, - state); - else - devlink_health_reporter_state_update(health->fw_reset_reporter, - state); - - health->fatal = false; + devlink_health_reporter_state_update(health->fw_fatal_reporter, state); } -void bnxt_dl_health_recovery_done(struct bnxt *bp) +void bnxt_dl_health_fw_recovery_done(struct bnxt *bp) { struct bnxt_fw_health *hlth = bp->fw_health; struct bnxt_dl *dl = devlink_priv(bp->dl); - if (hlth->fatal) - devlink_health_reporter_recovery_done(hlth->fw_fatal_reporter); - else - devlink_health_reporter_recovery_done(hlth->fw_reset_reporter); + devlink_health_reporter_recovery_done(hlth->fw_fatal_reporter); bnxt_hwrm_remote_dev_reset_set(bp, dl->remote_reset); } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h index 456e18c4badf..a715458abc30 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h @@ -71,9 +71,9 @@ enum bnxt_dl_version_type { BNXT_VERSION_STORED, }; -void bnxt_devlink_health_report(struct bnxt *bp, unsigned long event); -void bnxt_dl_health_status_update(struct bnxt *bp, bool healthy); -void bnxt_dl_health_recovery_done(struct bnxt *bp); +void bnxt_devlink_health_fw_report(struct bnxt *bp); +void bnxt_dl_health_fw_status_update(struct bnxt *bp, bool healthy); +void bnxt_dl_health_fw_recovery_done(struct bnxt *bp); void bnxt_dl_fw_reporters_create(struct bnxt *bp); void bnxt_dl_fw_reporters_destroy(struct bnxt *bp, bool all); int bnxt_dl_register(struct bnxt *bp); From 2bb21b8db5c0e515549d7d1d0de5dc905a32a338 Mon Sep 17 00:00:00 2001 From: Edwin Peer Date: Fri, 29 Oct 2021 03:47:45 -0400 Subject: [PATCH 25/38] bnxt_en: consolidate fw devlink health reporters Merge 'fw' and 'fw_fatal' health reporters. There is no longer a need to distinguish between firmware reporters. Only bonafide errors are reported now and no reports were being generated for the 'fw' reporter. Signed-off-by: Edwin Peer Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 1 - .../net/ethernet/broadcom/bnxt/bnxt_devlink.c | 71 ++++++------------- 2 files changed, 21 insertions(+), 51 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index e640df62d296..2873f600a7dd 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -1551,7 +1551,6 @@ struct bnxt_fw_health { u32 echo_req_data1; u32 echo_req_data2; struct devlink_health_reporter *fw_reporter; - struct devlink_health_reporter *fw_fatal_reporter; }; #define BNXT_FW_HEALTH_REG_TYPE_MASK 3 diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c index 2c72f3b3708f..a295d2042b6e 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c @@ -71,9 +71,9 @@ static int bnxt_hwrm_remote_dev_reset_set(struct bnxt *bp, bool remote_reset) return hwrm_req_send(bp, req); } -static int bnxt_fw_reporter_diagnose(struct devlink_health_reporter *reporter, - struct devlink_fmsg *fmsg, - struct netlink_ext_ack *extack) +static int bnxt_fw_diagnose(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, + struct netlink_ext_ack *extack) { struct bnxt *bp = devlink_health_reporter_priv(reporter); u32 val; @@ -110,14 +110,9 @@ static int bnxt_fw_reporter_diagnose(struct devlink_health_reporter *reporter, return 0; } -static const struct devlink_health_reporter_ops bnxt_dl_fw_reporter_ops = { - .name = "fw", - .diagnose = bnxt_fw_reporter_diagnose, -}; - -static int bnxt_fw_fatal_recover(struct devlink_health_reporter *reporter, - void *priv_ctx, - struct netlink_ext_ack *extack) +static int bnxt_fw_recover(struct devlink_health_reporter *reporter, + void *priv_ctx, + struct netlink_ext_ack *extack) { struct bnxt *bp = devlink_health_reporter_priv(reporter); @@ -127,43 +122,26 @@ static int bnxt_fw_fatal_recover(struct devlink_health_reporter *reporter, return -EINPROGRESS; } -static const -struct devlink_health_reporter_ops bnxt_dl_fw_fatal_reporter_ops = { - .name = "fw_fatal", - .recover = bnxt_fw_fatal_recover, +static const struct devlink_health_reporter_ops bnxt_dl_fw_reporter_ops = { + .name = "fw", + .diagnose = bnxt_fw_diagnose, + .recover = bnxt_fw_recover, }; void bnxt_dl_fw_reporters_create(struct bnxt *bp) { struct bnxt_fw_health *health = bp->fw_health; - if (!health) + if (!health || health->fw_reporter) return; - if (!health->fw_reporter) { - health->fw_reporter = - devlink_health_reporter_create(bp->dl, - &bnxt_dl_fw_reporter_ops, - 0, bp); - if (IS_ERR(health->fw_reporter)) { - netdev_warn(bp->dev, "Failed to create FW health reporter, rc = %ld\n", - PTR_ERR(health->fw_reporter)); - health->fw_reporter = NULL; - return; - } - } - - if (health->fw_fatal_reporter) - return; - - health->fw_fatal_reporter = - devlink_health_reporter_create(bp->dl, - &bnxt_dl_fw_fatal_reporter_ops, + health->fw_reporter = + devlink_health_reporter_create(bp->dl, &bnxt_dl_fw_reporter_ops, 0, bp); - if (IS_ERR(health->fw_fatal_reporter)) { - netdev_warn(bp->dev, "Failed to create FW fatal health reporter, rc = %ld\n", - PTR_ERR(health->fw_fatal_reporter)); - health->fw_fatal_reporter = NULL; + if (IS_ERR(health->fw_reporter)) { + netdev_warn(bp->dev, "Failed to create FW health reporter, rc = %ld\n", + PTR_ERR(health->fw_reporter)); + health->fw_reporter = NULL; bp->fw_cap &= ~BNXT_FW_CAP_ERROR_RECOVERY; } } @@ -182,11 +160,6 @@ void bnxt_dl_fw_reporters_destroy(struct bnxt *bp, bool all) devlink_health_reporter_destroy(health->fw_reporter); health->fw_reporter = NULL; } - - if (health->fw_fatal_reporter) { - devlink_health_reporter_destroy(health->fw_fatal_reporter); - health->fw_fatal_reporter = NULL; - } } void bnxt_devlink_health_fw_report(struct bnxt *bp) @@ -196,13 +169,12 @@ void bnxt_devlink_health_fw_report(struct bnxt *bp) if (!fw_health) return; - if (!fw_health->fw_fatal_reporter) { + if (!fw_health->fw_reporter) { __bnxt_fw_recover(bp); return; } - devlink_health_report(fw_health->fw_fatal_reporter, - "FW fatal error reported", NULL); + devlink_health_report(fw_health->fw_reporter, "FW error reported", NULL); } void bnxt_dl_health_fw_status_update(struct bnxt *bp, bool healthy) @@ -215,15 +187,14 @@ void bnxt_dl_health_fw_status_update(struct bnxt *bp, bool healthy) else state = DEVLINK_HEALTH_REPORTER_STATE_ERROR; - devlink_health_reporter_state_update(health->fw_fatal_reporter, state); + devlink_health_reporter_state_update(health->fw_reporter, state); } void bnxt_dl_health_fw_recovery_done(struct bnxt *bp) { - struct bnxt_fw_health *hlth = bp->fw_health; struct bnxt_dl *dl = devlink_priv(bp->dl); - devlink_health_reporter_recovery_done(hlth->fw_fatal_reporter); + devlink_health_reporter_recovery_done(bp->fw_health->fw_reporter); bnxt_hwrm_remote_dev_reset_set(bp, dl->remote_reset); } From 8cc95ceb7087d6910050286301d05f4824a0bf59 Mon Sep 17 00:00:00 2001 From: Edwin Peer Date: Fri, 29 Oct 2021 03:47:46 -0400 Subject: [PATCH 26/38] bnxt_en: improve fw diagnose devlink health messages Add firmware event counters as well as health state severity. In the unhealthy state, recommend a remedy and inform the user as to its impact. Readability of the devlink tool's output is negatively impacted by adding these fields to the diagnosis. The single line of text, as rendered by devlink health diagnose, benefits from more terse descriptions, which can be substituted without loss of clarity, even in pretty printed JSON mode. Signed-off-by: Edwin Peer Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 19 ++- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 25 ++++ .../net/ethernet/broadcom/bnxt/bnxt_devlink.c | 135 ++++++++++++++---- 3 files changed, 150 insertions(+), 29 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 1251d78ffd46..b4d9374548f8 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -2138,10 +2138,12 @@ static int bnxt_async_event_process(struct bnxt *bp, set_bit(BNXT_STATE_FW_ACTIVATE_RESET, &bp->state); } else if (EVENT_DATA1_RESET_NOTIFY_FATAL(data1)) { type_str = "Fatal"; + bp->fw_health->fatalities++; set_bit(BNXT_STATE_FW_FATAL_COND, &bp->state); } else if (data2 && BNXT_FW_STATUS_HEALTHY != EVENT_DATA2_RESET_NOTIFY_FW_STATUS_CODE(data2)) { type_str = "Non-fatal"; + bp->fw_health->survivals++; set_bit(BNXT_STATE_FW_NON_FATAL_COND, &bp->state); } netif_warn(bp, hw, bp->dev, @@ -7604,6 +7606,7 @@ static int __bnxt_alloc_fw_health(struct bnxt *bp) if (!bp->fw_health) return -ENOMEM; + mutex_init(&bp->fw_health->lock); return 0; } @@ -7650,12 +7653,16 @@ static void bnxt_inv_fw_health_reg(struct bnxt *bp) struct bnxt_fw_health *fw_health = bp->fw_health; u32 reg_type; - if (!fw_health || !fw_health->status_reliable) + if (!fw_health) return; reg_type = BNXT_FW_HEALTH_REG_TYPE(fw_health->regs[BNXT_FW_HEALTH_REG]); if (reg_type == BNXT_FW_HEALTH_REG_TYPE_GRC) fw_health->status_reliable = false; + + reg_type = BNXT_FW_HEALTH_REG_TYPE(fw_health->regs[BNXT_FW_RESET_CNT_REG]); + if (reg_type == BNXT_FW_HEALTH_REG_TYPE_GRC) + fw_health->resets_reliable = false; } static void bnxt_try_map_fw_health_reg(struct bnxt *bp) @@ -7712,6 +7719,7 @@ static int bnxt_map_fw_health_regs(struct bnxt *bp) int i; bp->fw_health->status_reliable = false; + bp->fw_health->resets_reliable = false; /* Only pre-map the monitoring GRC registers using window 3 */ for (i = 0; i < 4; i++) { u32 reg = fw_health->regs[i]; @@ -7725,6 +7733,7 @@ static int bnxt_map_fw_health_regs(struct bnxt *bp) fw_health->mapped_regs[i] = BNXT_FW_HEALTH_WIN_OFF(reg); } bp->fw_health->status_reliable = true; + bp->fw_health->resets_reliable = true; if (reg_base == 0xffffffff) return 0; @@ -11264,14 +11273,18 @@ static void bnxt_fw_health_check(struct bnxt *bp) } val = bnxt_fw_health_readl(bp, BNXT_FW_HEARTBEAT_REG); - if (val == fw_health->last_fw_heartbeat) + if (val == fw_health->last_fw_heartbeat) { + fw_health->arrests++; goto fw_reset; + } fw_health->last_fw_heartbeat = val; val = bnxt_fw_health_readl(bp, BNXT_FW_RESET_CNT_REG); - if (val != fw_health->last_fw_reset_cnt) + if (val != fw_health->last_fw_reset_cnt) { + fw_health->discoveries++; goto fw_reset; + } fw_health->tmr_counter = fw_health->tmr_multiplier; return; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 2873f600a7dd..bbbc63e882d1 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -1523,6 +1523,21 @@ struct bnxt_ctx_mem_info { struct bnxt_mem_init mem_init[BNXT_CTX_MEM_INIT_MAX]; }; +enum bnxt_health_severity { + SEVERITY_NORMAL = 0, + SEVERITY_WARNING, + SEVERITY_RECOVERABLE, + SEVERITY_FATAL, +}; + +enum bnxt_health_remedy { + REMEDY_DEVLINK_RECOVER, + REMEDY_POWER_CYCLE_DEVICE, + REMEDY_POWER_CYCLE_HOST, + REMEDY_FW_UPDATE, + REMEDY_HW_REPLACE, +}; + struct bnxt_fw_health { u32 flags; u32 polling_dsecs; @@ -1542,6 +1557,7 @@ struct bnxt_fw_health { u8 enabled:1; u8 primary:1; u8 status_reliable:1; + u8 resets_reliable:1; u8 tmr_multiplier; u8 tmr_counter; u8 fw_reset_seq_cnt; @@ -1551,6 +1567,15 @@ struct bnxt_fw_health { u32 echo_req_data1; u32 echo_req_data2; struct devlink_health_reporter *fw_reporter; + /* Protects severity and remedy */ + struct mutex lock; + enum bnxt_health_severity severity; + enum bnxt_health_remedy remedy; + u32 arrests; + u32 discoveries; + u32 survivals; + u32 fatalities; + u32 diagnoses; }; #define BNXT_FW_HEALTH_REG_TYPE_MASK 3 diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c index a295d2042b6e..930cbf1ca4e0 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c @@ -71,43 +71,110 @@ static int bnxt_hwrm_remote_dev_reset_set(struct bnxt *bp, bool remote_reset) return hwrm_req_send(bp, req); } +static char *bnxt_health_severity_str(enum bnxt_health_severity severity) +{ + switch (severity) { + case SEVERITY_NORMAL: return "normal"; + case SEVERITY_WARNING: return "warning"; + case SEVERITY_RECOVERABLE: return "recoverable"; + case SEVERITY_FATAL: return "fatal"; + default: return "unknown"; + } +} + +static char *bnxt_health_remedy_str(enum bnxt_health_remedy remedy) +{ + switch (remedy) { + case REMEDY_DEVLINK_RECOVER: return "devlink recover"; + case REMEDY_POWER_CYCLE_DEVICE: return "device power cycle"; + case REMEDY_POWER_CYCLE_HOST: return "host power cycle"; + case REMEDY_FW_UPDATE: return "update firmware"; + case REMEDY_HW_REPLACE: return "replace hardware"; + default: return "unknown"; + } +} + static int bnxt_fw_diagnose(struct devlink_health_reporter *reporter, struct devlink_fmsg *fmsg, struct netlink_ext_ack *extack) { struct bnxt *bp = devlink_health_reporter_priv(reporter); - u32 val; + struct bnxt_fw_health *h = bp->fw_health; + u32 fw_status, fw_resets; int rc; if (test_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) - return 0; + return devlink_fmsg_string_pair_put(fmsg, "Status", "recovering"); - val = bnxt_fw_health_readl(bp, BNXT_FW_HEALTH_REG); + if (!h->status_reliable) + return devlink_fmsg_string_pair_put(fmsg, "Status", "unknown"); - if (BNXT_FW_IS_BOOTING(val)) { - rc = devlink_fmsg_string_pair_put(fmsg, "Description", - "Not yet completed initialization"); + mutex_lock(&h->lock); + fw_status = bnxt_fw_health_readl(bp, BNXT_FW_HEALTH_REG); + if (BNXT_FW_IS_BOOTING(fw_status)) { + rc = devlink_fmsg_string_pair_put(fmsg, "Status", "initializing"); if (rc) - return rc; - } else if (BNXT_FW_IS_ERR(val)) { - rc = devlink_fmsg_string_pair_put(fmsg, "Description", - "Encountered fatal error and cannot recover"); + goto unlock; + } else if (h->severity || fw_status != BNXT_FW_STATUS_HEALTHY) { + if (!h->severity) { + h->severity = SEVERITY_FATAL; + h->remedy = REMEDY_POWER_CYCLE_DEVICE; + h->diagnoses++; + devlink_health_report(h->fw_reporter, + "FW error diagnosed", h); + } + rc = devlink_fmsg_string_pair_put(fmsg, "Status", "error"); if (rc) - return rc; + goto unlock; + rc = devlink_fmsg_u32_pair_put(fmsg, "Syndrome", fw_status); + if (rc) + goto unlock; + } else { + rc = devlink_fmsg_string_pair_put(fmsg, "Status", "healthy"); + if (rc) + goto unlock; } - if (val >> 16) { - rc = devlink_fmsg_u32_pair_put(fmsg, "Error code", val >> 16); - if (rc) - return rc; - } - - val = bnxt_fw_health_readl(bp, BNXT_FW_RESET_CNT_REG); - rc = devlink_fmsg_u32_pair_put(fmsg, "Reset count", val); + rc = devlink_fmsg_string_pair_put(fmsg, "Severity", + bnxt_health_severity_str(h->severity)); if (rc) + goto unlock; + + if (h->severity) { + rc = devlink_fmsg_string_pair_put(fmsg, "Remedy", + bnxt_health_remedy_str(h->remedy)); + if (rc) + goto unlock; + if (h->remedy == REMEDY_DEVLINK_RECOVER) { + rc = devlink_fmsg_string_pair_put(fmsg, "Impact", + "traffic+ntuple_cfg"); + if (rc) + goto unlock; + } + } + +unlock: + mutex_unlock(&h->lock); + if (rc || !h->resets_reliable) return rc; - return 0; + fw_resets = bnxt_fw_health_readl(bp, BNXT_FW_RESET_CNT_REG); + rc = devlink_fmsg_u32_pair_put(fmsg, "Resets", fw_resets); + if (rc) + return rc; + rc = devlink_fmsg_u32_pair_put(fmsg, "Arrests", h->arrests); + if (rc) + return rc; + rc = devlink_fmsg_u32_pair_put(fmsg, "Survivals", h->survivals); + if (rc) + return rc; + rc = devlink_fmsg_u32_pair_put(fmsg, "Discoveries", h->discoveries); + if (rc) + return rc; + rc = devlink_fmsg_u32_pair_put(fmsg, "Fatalities", h->fatalities); + if (rc) + return rc; + return devlink_fmsg_u32_pair_put(fmsg, "Diagnoses", h->diagnoses); } static int bnxt_fw_recover(struct devlink_health_reporter *reporter, @@ -116,6 +183,9 @@ static int bnxt_fw_recover(struct devlink_health_reporter *reporter, { struct bnxt *bp = devlink_health_reporter_priv(reporter); + if (bp->fw_health->severity == SEVERITY_FATAL) + return -ENODEV; + set_bit(BNXT_STATE_RECOVER, &bp->state); __bnxt_fw_recover(bp); @@ -165,6 +235,7 @@ void bnxt_dl_fw_reporters_destroy(struct bnxt *bp, bool all) void bnxt_devlink_health_fw_report(struct bnxt *bp) { struct bnxt_fw_health *fw_health = bp->fw_health; + int rc; if (!fw_health) return; @@ -174,20 +245,32 @@ void bnxt_devlink_health_fw_report(struct bnxt *bp) return; } - devlink_health_report(fw_health->fw_reporter, "FW error reported", NULL); + mutex_lock(&fw_health->lock); + fw_health->severity = SEVERITY_RECOVERABLE; + fw_health->remedy = REMEDY_DEVLINK_RECOVER; + mutex_unlock(&fw_health->lock); + rc = devlink_health_report(fw_health->fw_reporter, "FW error reported", + fw_health); + if (rc == -ECANCELED) + __bnxt_fw_recover(bp); } void bnxt_dl_health_fw_status_update(struct bnxt *bp, bool healthy) { - struct bnxt_fw_health *health = bp->fw_health; + struct bnxt_fw_health *fw_health = bp->fw_health; u8 state; - if (healthy) + mutex_lock(&fw_health->lock); + if (healthy) { + fw_health->severity = SEVERITY_NORMAL; state = DEVLINK_HEALTH_REPORTER_STATE_HEALTHY; - else + } else { + fw_health->severity = SEVERITY_FATAL; + fw_health->remedy = REMEDY_POWER_CYCLE_DEVICE; state = DEVLINK_HEALTH_REPORTER_STATE_ERROR; - - devlink_health_reporter_state_update(health->fw_reporter, state); + } + mutex_unlock(&fw_health->lock); + devlink_health_reporter_state_update(fw_health->fw_reporter, state); } void bnxt_dl_health_fw_recovery_done(struct bnxt *bp) From 9a575c8c25ae2372112db6d6b3e553cd90e9f02b Mon Sep 17 00:00:00 2001 From: Edwin Peer Date: Fri, 29 Oct 2021 03:47:47 -0400 Subject: [PATCH 27/38] bnxt_en: Refactor coredump functions The coredump functionality will be used by devlink health. Refactor these functions that get coredump and coredump length. There is no functional change, but the following checkpatch warnings were addressed: - strscpy is preferred over strlcpy. - sscanf results should be checked, with an additional warning. Signed-off-by: Edwin Peer Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- .../net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 48 ++++++++++++------- 1 file changed, 31 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 15c518024965..fe832f97f905 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -3808,7 +3808,7 @@ bnxt_fill_coredump_record(struct bnxt *bp, struct bnxt_coredump_record *record, record->low_version = 0; record->high_version = 1; record->asic_state = 0; - strlcpy(record->system_name, utsname()->nodename, + strscpy(record->system_name, utsname()->nodename, sizeof(record->system_name)); record->year = cpu_to_le16(tm.tm_year + 1900); record->month = cpu_to_le16(tm.tm_mon + 1); @@ -3820,11 +3820,12 @@ bnxt_fill_coredump_record(struct bnxt *bp, struct bnxt_coredump_record *record, strcpy(record->commandline, "ethtool -w"); record->total_segments = cpu_to_le32(total_segs); - sscanf(utsname()->release, "%u.%u", &os_ver_major, &os_ver_minor); + if (sscanf(utsname()->release, "%u.%u", &os_ver_major, &os_ver_minor) != 2) + netdev_warn(bp->dev, "Unknown OS release in coredump\n"); record->os_ver_major = cpu_to_le32(os_ver_major); record->os_ver_minor = cpu_to_le32(os_ver_minor); - strlcpy(record->os_name, utsname()->sysname, 32); + strscpy(record->os_name, utsname()->sysname, sizeof(record->os_name)); time64_to_tm(end, 0, &tm); record->end_year = cpu_to_le16(tm.tm_year + 1900); record->end_month = cpu_to_le16(tm.tm_mon + 1); @@ -3842,7 +3843,7 @@ bnxt_fill_coredump_record(struct bnxt *bp, struct bnxt_coredump_record *record, record->ioctl_high_version = 0; } -static int bnxt_get_coredump(struct bnxt *bp, void *buf, u32 *dump_len) +static int __bnxt_get_coredump(struct bnxt *bp, void *buf, u32 *dump_len) { u32 ver_get_resp_len = sizeof(struct hwrm_ver_get_output); u32 offset = 0, seg_hdr_len, seg_record_len, buf_len = 0; @@ -3945,6 +3946,30 @@ err: return rc; } +static int bnxt_get_coredump(struct bnxt *bp, u16 dump_type, void *buf, u32 *dump_len) +{ + if (dump_type == BNXT_DUMP_CRASH) { +#ifdef CONFIG_TEE_BNXT_FW + return tee_bnxt_copy_coredump(buf, 0, *dump_len); +#else + return -EOPNOTSUPP; +#endif + } else { + return __bnxt_get_coredump(bp, buf, dump_len); + } +} + +static u32 bnxt_get_coredump_length(struct bnxt *bp, u16 dump_type) +{ + u32 len = 0; + + if (dump_type == BNXT_DUMP_CRASH) + len = BNXT_CRASH_DUMP_LEN; + else + __bnxt_get_coredump(bp, NULL, &len); + return len; +} + static int bnxt_set_dump(struct net_device *dev, struct ethtool_dump *dump) { struct bnxt *bp = netdev_priv(dev); @@ -3976,10 +4001,7 @@ static int bnxt_get_dump_flag(struct net_device *dev, struct ethtool_dump *dump) bp->ver_resp.hwrm_fw_rsvd_8b; dump->flag = bp->dump_flag; - if (bp->dump_flag == BNXT_DUMP_CRASH) - dump->len = BNXT_CRASH_DUMP_LEN; - else - bnxt_get_coredump(bp, NULL, &dump->len); + dump->len = bnxt_get_coredump_length(bp, bp->dump_flag); return 0; } @@ -3994,15 +4016,7 @@ static int bnxt_get_dump_data(struct net_device *dev, struct ethtool_dump *dump, memset(buf, 0, dump->len); dump->flag = bp->dump_flag; - if (dump->flag == BNXT_DUMP_CRASH) { -#ifdef CONFIG_TEE_BNXT_FW - return tee_bnxt_copy_coredump(buf, 0, dump->len); -#endif - } else { - return bnxt_get_coredump(bp, buf, &dump->len); - } - - return 0; + return bnxt_get_coredump(bp, dump->flag, buf, &dump->len); } static int bnxt_get_ts_info(struct net_device *dev, From b032228e58ea2477955058ad4d70a636ce1dec51 Mon Sep 17 00:00:00 2001 From: Edwin Peer Date: Fri, 29 Oct 2021 03:47:48 -0400 Subject: [PATCH 28/38] bnxt_en: move coredump functions into dedicated file Change bnxt_get_coredump() and bnxt_get_coredump_length() to non-static functions. Signed-off-by: Edwin Peer Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/Makefile | 2 +- .../ethernet/broadcom/bnxt/bnxt_coredump.c | 372 ++++++++++++++++++ .../ethernet/broadcom/bnxt/bnxt_coredump.h | 51 +++ .../net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 356 ----------------- .../net/ethernet/broadcom/bnxt/bnxt_ethtool.h | 43 -- 5 files changed, 424 insertions(+), 400 deletions(-) create mode 100644 drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c diff --git a/drivers/net/ethernet/broadcom/bnxt/Makefile b/drivers/net/ethernet/broadcom/bnxt/Makefile index c6ef7ec2c115..2bc2b707d6ee 100644 --- a/drivers/net/ethernet/broadcom/bnxt/Makefile +++ b/drivers/net/ethernet/broadcom/bnxt/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_BNXT) += bnxt_en.o -bnxt_en-y := bnxt.o bnxt_hwrm.o bnxt_sriov.o bnxt_ethtool.o bnxt_dcb.o bnxt_ulp.o bnxt_xdp.o bnxt_ptp.o bnxt_vfr.o bnxt_devlink.o bnxt_dim.o +bnxt_en-y := bnxt.o bnxt_hwrm.o bnxt_sriov.o bnxt_ethtool.o bnxt_dcb.o bnxt_ulp.o bnxt_xdp.o bnxt_ptp.o bnxt_vfr.o bnxt_devlink.o bnxt_dim.o bnxt_coredump.o bnxt_en-$(CONFIG_BNXT_FLOWER_OFFLOAD) += bnxt_tc.o bnxt_en-$(CONFIG_DEBUG_FS) += bnxt_debugfs.o diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c new file mode 100644 index 000000000000..3e23fce3771e --- /dev/null +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c @@ -0,0 +1,372 @@ +/* Broadcom NetXtreme-C/E network driver. + * + * Copyright (c) 2021 Broadcom Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation. + */ + +#include +#include +#include +#include "bnxt_hsi.h" +#include "bnxt.h" +#include "bnxt_hwrm.h" +#include "bnxt_coredump.h" + +static int bnxt_hwrm_dbg_dma_data(struct bnxt *bp, void *msg, + struct bnxt_hwrm_dbg_dma_info *info) +{ + struct hwrm_dbg_cmn_input *cmn_req = msg; + __le16 *seq_ptr = msg + info->seq_off; + struct hwrm_dbg_cmn_output *cmn_resp; + u16 seq = 0, len, segs_off; + dma_addr_t dma_handle; + void *dma_buf, *resp; + int rc, off = 0; + + dma_buf = hwrm_req_dma_slice(bp, msg, info->dma_len, &dma_handle); + if (!dma_buf) { + hwrm_req_drop(bp, msg); + return -ENOMEM; + } + + hwrm_req_timeout(bp, msg, HWRM_COREDUMP_TIMEOUT); + cmn_resp = hwrm_req_hold(bp, msg); + resp = cmn_resp; + + segs_off = offsetof(struct hwrm_dbg_coredump_list_output, + total_segments); + cmn_req->host_dest_addr = cpu_to_le64(dma_handle); + cmn_req->host_buf_len = cpu_to_le32(info->dma_len); + while (1) { + *seq_ptr = cpu_to_le16(seq); + rc = hwrm_req_send(bp, msg); + if (rc) + break; + + len = le16_to_cpu(*((__le16 *)(resp + info->data_len_off))); + if (!seq && + cmn_req->req_type == cpu_to_le16(HWRM_DBG_COREDUMP_LIST)) { + info->segs = le16_to_cpu(*((__le16 *)(resp + + segs_off))); + if (!info->segs) { + rc = -EIO; + break; + } + + info->dest_buf_size = info->segs * + sizeof(struct coredump_segment_record); + info->dest_buf = kmalloc(info->dest_buf_size, + GFP_KERNEL); + if (!info->dest_buf) { + rc = -ENOMEM; + break; + } + } + + if (info->dest_buf) { + if ((info->seg_start + off + len) <= + BNXT_COREDUMP_BUF_LEN(info->buf_len)) { + memcpy(info->dest_buf + off, dma_buf, len); + } else { + rc = -ENOBUFS; + break; + } + } + + if (cmn_req->req_type == + cpu_to_le16(HWRM_DBG_COREDUMP_RETRIEVE)) + info->dest_buf_size += len; + + if (!(cmn_resp->flags & HWRM_DBG_CMN_FLAGS_MORE)) + break; + + seq++; + off += len; + } + hwrm_req_drop(bp, msg); + return rc; +} + +static int bnxt_hwrm_dbg_coredump_list(struct bnxt *bp, + struct bnxt_coredump *coredump) +{ + struct bnxt_hwrm_dbg_dma_info info = {NULL}; + struct hwrm_dbg_coredump_list_input *req; + int rc; + + rc = hwrm_req_init(bp, req, HWRM_DBG_COREDUMP_LIST); + if (rc) + return rc; + + info.dma_len = COREDUMP_LIST_BUF_LEN; + info.seq_off = offsetof(struct hwrm_dbg_coredump_list_input, seq_no); + info.data_len_off = offsetof(struct hwrm_dbg_coredump_list_output, + data_len); + + rc = bnxt_hwrm_dbg_dma_data(bp, req, &info); + if (!rc) { + coredump->data = info.dest_buf; + coredump->data_size = info.dest_buf_size; + coredump->total_segs = info.segs; + } + return rc; +} + +static int bnxt_hwrm_dbg_coredump_initiate(struct bnxt *bp, u16 component_id, + u16 segment_id) +{ + struct hwrm_dbg_coredump_initiate_input *req; + int rc; + + rc = hwrm_req_init(bp, req, HWRM_DBG_COREDUMP_INITIATE); + if (rc) + return rc; + + hwrm_req_timeout(bp, req, HWRM_COREDUMP_TIMEOUT); + req->component_id = cpu_to_le16(component_id); + req->segment_id = cpu_to_le16(segment_id); + + return hwrm_req_send(bp, req); +} + +static int bnxt_hwrm_dbg_coredump_retrieve(struct bnxt *bp, u16 component_id, + u16 segment_id, u32 *seg_len, + void *buf, u32 buf_len, u32 offset) +{ + struct hwrm_dbg_coredump_retrieve_input *req; + struct bnxt_hwrm_dbg_dma_info info = {NULL}; + int rc; + + rc = hwrm_req_init(bp, req, HWRM_DBG_COREDUMP_RETRIEVE); + if (rc) + return rc; + + req->component_id = cpu_to_le16(component_id); + req->segment_id = cpu_to_le16(segment_id); + + info.dma_len = COREDUMP_RETRIEVE_BUF_LEN; + info.seq_off = offsetof(struct hwrm_dbg_coredump_retrieve_input, + seq_no); + info.data_len_off = offsetof(struct hwrm_dbg_coredump_retrieve_output, + data_len); + if (buf) { + info.dest_buf = buf + offset; + info.buf_len = buf_len; + info.seg_start = offset; + } + + rc = bnxt_hwrm_dbg_dma_data(bp, req, &info); + if (!rc) + *seg_len = info.dest_buf_size; + + return rc; +} + +static void +bnxt_fill_coredump_seg_hdr(struct bnxt *bp, + struct bnxt_coredump_segment_hdr *seg_hdr, + struct coredump_segment_record *seg_rec, u32 seg_len, + int status, u32 duration, u32 instance) +{ + memset(seg_hdr, 0, sizeof(*seg_hdr)); + memcpy(seg_hdr->signature, "sEgM", 4); + if (seg_rec) { + seg_hdr->component_id = (__force __le32)seg_rec->component_id; + seg_hdr->segment_id = (__force __le32)seg_rec->segment_id; + seg_hdr->low_version = seg_rec->version_low; + seg_hdr->high_version = seg_rec->version_hi; + } else { + /* For hwrm_ver_get response Component id = 2 + * and Segment id = 0 + */ + seg_hdr->component_id = cpu_to_le32(2); + seg_hdr->segment_id = 0; + } + seg_hdr->function_id = cpu_to_le16(bp->pdev->devfn); + seg_hdr->length = cpu_to_le32(seg_len); + seg_hdr->status = cpu_to_le32(status); + seg_hdr->duration = cpu_to_le32(duration); + seg_hdr->data_offset = cpu_to_le32(sizeof(*seg_hdr)); + seg_hdr->instance = cpu_to_le32(instance); +} + +static void +bnxt_fill_coredump_record(struct bnxt *bp, struct bnxt_coredump_record *record, + time64_t start, s16 start_utc, u16 total_segs, + int status) +{ + time64_t end = ktime_get_real_seconds(); + u32 os_ver_major = 0, os_ver_minor = 0; + struct tm tm; + + time64_to_tm(start, 0, &tm); + memset(record, 0, sizeof(*record)); + memcpy(record->signature, "cOrE", 4); + record->flags = 0; + record->low_version = 0; + record->high_version = 1; + record->asic_state = 0; + strscpy(record->system_name, utsname()->nodename, + sizeof(record->system_name)); + record->year = cpu_to_le16(tm.tm_year + 1900); + record->month = cpu_to_le16(tm.tm_mon + 1); + record->day = cpu_to_le16(tm.tm_mday); + record->hour = cpu_to_le16(tm.tm_hour); + record->minute = cpu_to_le16(tm.tm_min); + record->second = cpu_to_le16(tm.tm_sec); + record->utc_bias = cpu_to_le16(start_utc); + strcpy(record->commandline, "ethtool -w"); + record->total_segments = cpu_to_le32(total_segs); + + if (sscanf(utsname()->release, "%u.%u", &os_ver_major, &os_ver_minor) != 2) + netdev_warn(bp->dev, "Unknown OS release in coredump\n"); + record->os_ver_major = cpu_to_le32(os_ver_major); + record->os_ver_minor = cpu_to_le32(os_ver_minor); + + strscpy(record->os_name, utsname()->sysname, sizeof(record->os_name)); + time64_to_tm(end, 0, &tm); + record->end_year = cpu_to_le16(tm.tm_year + 1900); + record->end_month = cpu_to_le16(tm.tm_mon + 1); + record->end_day = cpu_to_le16(tm.tm_mday); + record->end_hour = cpu_to_le16(tm.tm_hour); + record->end_minute = cpu_to_le16(tm.tm_min); + record->end_second = cpu_to_le16(tm.tm_sec); + record->end_utc_bias = cpu_to_le16(sys_tz.tz_minuteswest * 60); + record->asic_id1 = cpu_to_le32(bp->chip_num << 16 | + bp->ver_resp.chip_rev << 8 | + bp->ver_resp.chip_metal); + record->asic_id2 = 0; + record->coredump_status = cpu_to_le32(status); + record->ioctl_low_version = 0; + record->ioctl_high_version = 0; +} + +static int __bnxt_get_coredump(struct bnxt *bp, void *buf, u32 *dump_len) +{ + u32 ver_get_resp_len = sizeof(struct hwrm_ver_get_output); + u32 offset = 0, seg_hdr_len, seg_record_len, buf_len = 0; + struct coredump_segment_record *seg_record = NULL; + struct bnxt_coredump_segment_hdr seg_hdr; + struct bnxt_coredump coredump = {NULL}; + time64_t start_time; + u16 start_utc; + int rc = 0, i; + + if (buf) + buf_len = *dump_len; + + start_time = ktime_get_real_seconds(); + start_utc = sys_tz.tz_minuteswest * 60; + seg_hdr_len = sizeof(seg_hdr); + + /* First segment should be hwrm_ver_get response */ + *dump_len = seg_hdr_len + ver_get_resp_len; + if (buf) { + bnxt_fill_coredump_seg_hdr(bp, &seg_hdr, NULL, ver_get_resp_len, + 0, 0, 0); + memcpy(buf + offset, &seg_hdr, seg_hdr_len); + offset += seg_hdr_len; + memcpy(buf + offset, &bp->ver_resp, ver_get_resp_len); + offset += ver_get_resp_len; + } + + rc = bnxt_hwrm_dbg_coredump_list(bp, &coredump); + if (rc) { + netdev_err(bp->dev, "Failed to get coredump segment list\n"); + goto err; + } + + *dump_len += seg_hdr_len * coredump.total_segs; + + seg_record = (struct coredump_segment_record *)coredump.data; + seg_record_len = sizeof(*seg_record); + + for (i = 0; i < coredump.total_segs; i++) { + u16 comp_id = le16_to_cpu(seg_record->component_id); + u16 seg_id = le16_to_cpu(seg_record->segment_id); + u32 duration = 0, seg_len = 0; + unsigned long start, end; + + if (buf && ((offset + seg_hdr_len) > + BNXT_COREDUMP_BUF_LEN(buf_len))) { + rc = -ENOBUFS; + goto err; + } + + start = jiffies; + + rc = bnxt_hwrm_dbg_coredump_initiate(bp, comp_id, seg_id); + if (rc) { + netdev_err(bp->dev, + "Failed to initiate coredump for seg = %d\n", + seg_record->segment_id); + goto next_seg; + } + + /* Write segment data into the buffer */ + rc = bnxt_hwrm_dbg_coredump_retrieve(bp, comp_id, seg_id, + &seg_len, buf, buf_len, + offset + seg_hdr_len); + if (rc && rc == -ENOBUFS) + goto err; + else if (rc) + netdev_err(bp->dev, + "Failed to retrieve coredump for seg = %d\n", + seg_record->segment_id); + +next_seg: + end = jiffies; + duration = jiffies_to_msecs(end - start); + bnxt_fill_coredump_seg_hdr(bp, &seg_hdr, seg_record, seg_len, + rc, duration, 0); + + if (buf) { + /* Write segment header into the buffer */ + memcpy(buf + offset, &seg_hdr, seg_hdr_len); + offset += seg_hdr_len + seg_len; + } + + *dump_len += seg_len; + seg_record = + (struct coredump_segment_record *)((u8 *)seg_record + + seg_record_len); + } + +err: + if (buf) + bnxt_fill_coredump_record(bp, buf + offset, start_time, + start_utc, coredump.total_segs + 1, + rc); + kfree(coredump.data); + *dump_len += sizeof(struct bnxt_coredump_record); + if (rc == -ENOBUFS) + netdev_err(bp->dev, "Firmware returned large coredump buffer\n"); + return rc; +} + +int bnxt_get_coredump(struct bnxt *bp, u16 dump_type, void *buf, u32 *dump_len) +{ + if (dump_type == BNXT_DUMP_CRASH) { +#ifdef CONFIG_TEE_BNXT_FW + return tee_bnxt_copy_coredump(buf, 0, *dump_len); +#else + return -EOPNOTSUPP; +#endif + } else { + return __bnxt_get_coredump(bp, buf, dump_len); + } +} + +u32 bnxt_get_coredump_length(struct bnxt *bp, u16 dump_type) +{ + u32 len = 0; + + if (dump_type == BNXT_DUMP_CRASH) + len = BNXT_CRASH_DUMP_LEN; + else + __bnxt_get_coredump(bp, NULL, &len); + return len; +} diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.h index 09c22f8fe399..b1a1b2fffb19 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.h @@ -10,6 +10,10 @@ #ifndef BNXT_COREDUMP_H #define BNXT_COREDUMP_H +#include +#include +#include + struct bnxt_coredump_segment_hdr { __u8 signature[4]; __le32 component_id; @@ -63,4 +67,51 @@ struct bnxt_coredump_record { __u8 ioctl_high_version; __le16 rsvd3[313]; }; + +#define BNXT_CRASH_DUMP_LEN (8 << 20) + +#define COREDUMP_LIST_BUF_LEN 2048 +#define COREDUMP_RETRIEVE_BUF_LEN 4096 + +struct bnxt_coredump { + void *data; + int data_size; + u16 total_segs; +}; + +#define BNXT_COREDUMP_BUF_LEN(len) ((len) - sizeof(struct bnxt_coredump_record)) + +struct bnxt_hwrm_dbg_dma_info { + void *dest_buf; + int dest_buf_size; + u16 dma_len; + u16 seq_off; + u16 data_len_off; + u16 segs; + u32 seg_start; + u32 buf_len; +}; + +struct hwrm_dbg_cmn_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le64 host_dest_addr; + __le32 host_buf_len; +}; + +struct hwrm_dbg_cmn_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 flags; + #define HWRM_DBG_CMN_FLAGS_MORE 1 +}; + +int bnxt_get_coredump(struct bnxt *bp, u16 dump_type, void *buf, u32 *dump_len); +u32 bnxt_get_coredump_length(struct bnxt *bp, u16 dump_type); + #endif diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index fe832f97f905..bb3f3529987b 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -3614,362 +3614,6 @@ static int bnxt_reset(struct net_device *dev, u32 *flags) return 0; } -static int bnxt_hwrm_dbg_dma_data(struct bnxt *bp, void *msg, - struct bnxt_hwrm_dbg_dma_info *info) -{ - struct hwrm_dbg_cmn_input *cmn_req = msg; - __le16 *seq_ptr = msg + info->seq_off; - struct hwrm_dbg_cmn_output *cmn_resp; - u16 seq = 0, len, segs_off; - dma_addr_t dma_handle; - void *dma_buf, *resp; - int rc, off = 0; - - dma_buf = hwrm_req_dma_slice(bp, msg, info->dma_len, &dma_handle); - if (!dma_buf) { - hwrm_req_drop(bp, msg); - return -ENOMEM; - } - - hwrm_req_timeout(bp, msg, HWRM_COREDUMP_TIMEOUT); - cmn_resp = hwrm_req_hold(bp, msg); - resp = cmn_resp; - - segs_off = offsetof(struct hwrm_dbg_coredump_list_output, - total_segments); - cmn_req->host_dest_addr = cpu_to_le64(dma_handle); - cmn_req->host_buf_len = cpu_to_le32(info->dma_len); - while (1) { - *seq_ptr = cpu_to_le16(seq); - rc = hwrm_req_send(bp, msg); - if (rc) - break; - - len = le16_to_cpu(*((__le16 *)(resp + info->data_len_off))); - if (!seq && - cmn_req->req_type == cpu_to_le16(HWRM_DBG_COREDUMP_LIST)) { - info->segs = le16_to_cpu(*((__le16 *)(resp + - segs_off))); - if (!info->segs) { - rc = -EIO; - break; - } - - info->dest_buf_size = info->segs * - sizeof(struct coredump_segment_record); - info->dest_buf = kmalloc(info->dest_buf_size, - GFP_KERNEL); - if (!info->dest_buf) { - rc = -ENOMEM; - break; - } - } - - if (info->dest_buf) { - if ((info->seg_start + off + len) <= - BNXT_COREDUMP_BUF_LEN(info->buf_len)) { - memcpy(info->dest_buf + off, dma_buf, len); - } else { - rc = -ENOBUFS; - break; - } - } - - if (cmn_req->req_type == - cpu_to_le16(HWRM_DBG_COREDUMP_RETRIEVE)) - info->dest_buf_size += len; - - if (!(cmn_resp->flags & HWRM_DBG_CMN_FLAGS_MORE)) - break; - - seq++; - off += len; - } - hwrm_req_drop(bp, msg); - return rc; -} - -static int bnxt_hwrm_dbg_coredump_list(struct bnxt *bp, - struct bnxt_coredump *coredump) -{ - struct bnxt_hwrm_dbg_dma_info info = {NULL}; - struct hwrm_dbg_coredump_list_input *req; - int rc; - - rc = hwrm_req_init(bp, req, HWRM_DBG_COREDUMP_LIST); - if (rc) - return rc; - - info.dma_len = COREDUMP_LIST_BUF_LEN; - info.seq_off = offsetof(struct hwrm_dbg_coredump_list_input, seq_no); - info.data_len_off = offsetof(struct hwrm_dbg_coredump_list_output, - data_len); - - rc = bnxt_hwrm_dbg_dma_data(bp, req, &info); - if (!rc) { - coredump->data = info.dest_buf; - coredump->data_size = info.dest_buf_size; - coredump->total_segs = info.segs; - } - return rc; -} - -static int bnxt_hwrm_dbg_coredump_initiate(struct bnxt *bp, u16 component_id, - u16 segment_id) -{ - struct hwrm_dbg_coredump_initiate_input *req; - int rc; - - rc = hwrm_req_init(bp, req, HWRM_DBG_COREDUMP_INITIATE); - if (rc) - return rc; - - hwrm_req_timeout(bp, req, HWRM_COREDUMP_TIMEOUT); - req->component_id = cpu_to_le16(component_id); - req->segment_id = cpu_to_le16(segment_id); - - return hwrm_req_send(bp, req); -} - -static int bnxt_hwrm_dbg_coredump_retrieve(struct bnxt *bp, u16 component_id, - u16 segment_id, u32 *seg_len, - void *buf, u32 buf_len, u32 offset) -{ - struct hwrm_dbg_coredump_retrieve_input *req; - struct bnxt_hwrm_dbg_dma_info info = {NULL}; - int rc; - - rc = hwrm_req_init(bp, req, HWRM_DBG_COREDUMP_RETRIEVE); - if (rc) - return rc; - - req->component_id = cpu_to_le16(component_id); - req->segment_id = cpu_to_le16(segment_id); - - info.dma_len = COREDUMP_RETRIEVE_BUF_LEN; - info.seq_off = offsetof(struct hwrm_dbg_coredump_retrieve_input, - seq_no); - info.data_len_off = offsetof(struct hwrm_dbg_coredump_retrieve_output, - data_len); - if (buf) { - info.dest_buf = buf + offset; - info.buf_len = buf_len; - info.seg_start = offset; - } - - rc = bnxt_hwrm_dbg_dma_data(bp, req, &info); - if (!rc) - *seg_len = info.dest_buf_size; - - return rc; -} - -static void -bnxt_fill_coredump_seg_hdr(struct bnxt *bp, - struct bnxt_coredump_segment_hdr *seg_hdr, - struct coredump_segment_record *seg_rec, u32 seg_len, - int status, u32 duration, u32 instance) -{ - memset(seg_hdr, 0, sizeof(*seg_hdr)); - memcpy(seg_hdr->signature, "sEgM", 4); - if (seg_rec) { - seg_hdr->component_id = (__force __le32)seg_rec->component_id; - seg_hdr->segment_id = (__force __le32)seg_rec->segment_id; - seg_hdr->low_version = seg_rec->version_low; - seg_hdr->high_version = seg_rec->version_hi; - } else { - /* For hwrm_ver_get response Component id = 2 - * and Segment id = 0 - */ - seg_hdr->component_id = cpu_to_le32(2); - seg_hdr->segment_id = 0; - } - seg_hdr->function_id = cpu_to_le16(bp->pdev->devfn); - seg_hdr->length = cpu_to_le32(seg_len); - seg_hdr->status = cpu_to_le32(status); - seg_hdr->duration = cpu_to_le32(duration); - seg_hdr->data_offset = cpu_to_le32(sizeof(*seg_hdr)); - seg_hdr->instance = cpu_to_le32(instance); -} - -static void -bnxt_fill_coredump_record(struct bnxt *bp, struct bnxt_coredump_record *record, - time64_t start, s16 start_utc, u16 total_segs, - int status) -{ - time64_t end = ktime_get_real_seconds(); - u32 os_ver_major = 0, os_ver_minor = 0; - struct tm tm; - - time64_to_tm(start, 0, &tm); - memset(record, 0, sizeof(*record)); - memcpy(record->signature, "cOrE", 4); - record->flags = 0; - record->low_version = 0; - record->high_version = 1; - record->asic_state = 0; - strscpy(record->system_name, utsname()->nodename, - sizeof(record->system_name)); - record->year = cpu_to_le16(tm.tm_year + 1900); - record->month = cpu_to_le16(tm.tm_mon + 1); - record->day = cpu_to_le16(tm.tm_mday); - record->hour = cpu_to_le16(tm.tm_hour); - record->minute = cpu_to_le16(tm.tm_min); - record->second = cpu_to_le16(tm.tm_sec); - record->utc_bias = cpu_to_le16(start_utc); - strcpy(record->commandline, "ethtool -w"); - record->total_segments = cpu_to_le32(total_segs); - - if (sscanf(utsname()->release, "%u.%u", &os_ver_major, &os_ver_minor) != 2) - netdev_warn(bp->dev, "Unknown OS release in coredump\n"); - record->os_ver_major = cpu_to_le32(os_ver_major); - record->os_ver_minor = cpu_to_le32(os_ver_minor); - - strscpy(record->os_name, utsname()->sysname, sizeof(record->os_name)); - time64_to_tm(end, 0, &tm); - record->end_year = cpu_to_le16(tm.tm_year + 1900); - record->end_month = cpu_to_le16(tm.tm_mon + 1); - record->end_day = cpu_to_le16(tm.tm_mday); - record->end_hour = cpu_to_le16(tm.tm_hour); - record->end_minute = cpu_to_le16(tm.tm_min); - record->end_second = cpu_to_le16(tm.tm_sec); - record->end_utc_bias = cpu_to_le16(sys_tz.tz_minuteswest * 60); - record->asic_id1 = cpu_to_le32(bp->chip_num << 16 | - bp->ver_resp.chip_rev << 8 | - bp->ver_resp.chip_metal); - record->asic_id2 = 0; - record->coredump_status = cpu_to_le32(status); - record->ioctl_low_version = 0; - record->ioctl_high_version = 0; -} - -static int __bnxt_get_coredump(struct bnxt *bp, void *buf, u32 *dump_len) -{ - u32 ver_get_resp_len = sizeof(struct hwrm_ver_get_output); - u32 offset = 0, seg_hdr_len, seg_record_len, buf_len = 0; - struct coredump_segment_record *seg_record = NULL; - struct bnxt_coredump_segment_hdr seg_hdr; - struct bnxt_coredump coredump = {NULL}; - time64_t start_time; - u16 start_utc; - int rc = 0, i; - - if (buf) - buf_len = *dump_len; - - start_time = ktime_get_real_seconds(); - start_utc = sys_tz.tz_minuteswest * 60; - seg_hdr_len = sizeof(seg_hdr); - - /* First segment should be hwrm_ver_get response */ - *dump_len = seg_hdr_len + ver_get_resp_len; - if (buf) { - bnxt_fill_coredump_seg_hdr(bp, &seg_hdr, NULL, ver_get_resp_len, - 0, 0, 0); - memcpy(buf + offset, &seg_hdr, seg_hdr_len); - offset += seg_hdr_len; - memcpy(buf + offset, &bp->ver_resp, ver_get_resp_len); - offset += ver_get_resp_len; - } - - rc = bnxt_hwrm_dbg_coredump_list(bp, &coredump); - if (rc) { - netdev_err(bp->dev, "Failed to get coredump segment list\n"); - goto err; - } - - *dump_len += seg_hdr_len * coredump.total_segs; - - seg_record = (struct coredump_segment_record *)coredump.data; - seg_record_len = sizeof(*seg_record); - - for (i = 0; i < coredump.total_segs; i++) { - u16 comp_id = le16_to_cpu(seg_record->component_id); - u16 seg_id = le16_to_cpu(seg_record->segment_id); - u32 duration = 0, seg_len = 0; - unsigned long start, end; - - if (buf && ((offset + seg_hdr_len) > - BNXT_COREDUMP_BUF_LEN(buf_len))) { - rc = -ENOBUFS; - goto err; - } - - start = jiffies; - - rc = bnxt_hwrm_dbg_coredump_initiate(bp, comp_id, seg_id); - if (rc) { - netdev_err(bp->dev, - "Failed to initiate coredump for seg = %d\n", - seg_record->segment_id); - goto next_seg; - } - - /* Write segment data into the buffer */ - rc = bnxt_hwrm_dbg_coredump_retrieve(bp, comp_id, seg_id, - &seg_len, buf, buf_len, - offset + seg_hdr_len); - if (rc && rc == -ENOBUFS) - goto err; - else if (rc) - netdev_err(bp->dev, - "Failed to retrieve coredump for seg = %d\n", - seg_record->segment_id); - -next_seg: - end = jiffies; - duration = jiffies_to_msecs(end - start); - bnxt_fill_coredump_seg_hdr(bp, &seg_hdr, seg_record, seg_len, - rc, duration, 0); - - if (buf) { - /* Write segment header into the buffer */ - memcpy(buf + offset, &seg_hdr, seg_hdr_len); - offset += seg_hdr_len + seg_len; - } - - *dump_len += seg_len; - seg_record = - (struct coredump_segment_record *)((u8 *)seg_record + - seg_record_len); - } - -err: - if (buf) - bnxt_fill_coredump_record(bp, buf + offset, start_time, - start_utc, coredump.total_segs + 1, - rc); - kfree(coredump.data); - *dump_len += sizeof(struct bnxt_coredump_record); - if (rc == -ENOBUFS) - netdev_err(bp->dev, "Firmware returned large coredump buffer\n"); - return rc; -} - -static int bnxt_get_coredump(struct bnxt *bp, u16 dump_type, void *buf, u32 *dump_len) -{ - if (dump_type == BNXT_DUMP_CRASH) { -#ifdef CONFIG_TEE_BNXT_FW - return tee_bnxt_copy_coredump(buf, 0, *dump_len); -#else - return -EOPNOTSUPP; -#endif - } else { - return __bnxt_get_coredump(bp, buf, dump_len); - } -} - -static u32 bnxt_get_coredump_length(struct bnxt *bp, u16 dump_type) -{ - u32 len = 0; - - if (dump_type == BNXT_DUMP_CRASH) - len = BNXT_CRASH_DUMP_LEN; - else - __bnxt_get_coredump(bp, NULL, &len); - return len; -} - static int bnxt_set_dump(struct net_device *dev, struct ethtool_dump *dump) { struct bnxt *bp = netdev_priv(dev); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h index bbf184c63b0a..4f7eaba65dcb 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h @@ -22,49 +22,6 @@ struct bnxt_led_cfg { u8 rsvd; }; -#define COREDUMP_LIST_BUF_LEN 2048 -#define COREDUMP_RETRIEVE_BUF_LEN 4096 - -struct bnxt_coredump { - void *data; - int data_size; - u16 total_segs; -}; - -#define BNXT_COREDUMP_BUF_LEN(len) ((len) - sizeof(struct bnxt_coredump_record)) - -struct bnxt_hwrm_dbg_dma_info { - void *dest_buf; - int dest_buf_size; - u16 dma_len; - u16 seq_off; - u16 data_len_off; - u16 segs; - u32 seg_start; - u32 buf_len; -}; - -struct hwrm_dbg_cmn_input { - __le16 req_type; - __le16 cmpl_ring; - __le16 seq_id; - __le16 target_id; - __le64 resp_addr; - __le64 host_dest_addr; - __le32 host_buf_len; -}; - -struct hwrm_dbg_cmn_output { - __le16 error_code; - __le16 req_type; - __le16 seq_id; - __le16 resp_len; - u8 flags; - #define HWRM_DBG_CMN_FLAGS_MORE 1 -}; - -#define BNXT_CRASH_DUMP_LEN (8 << 20) - #define BNXT_LED_DFLT_ENA \ (PORT_LED_CFG_REQ_ENABLES_LED0_ID | \ PORT_LED_CFG_REQ_ENABLES_LED0_STATE | \ From 80f62ba9d53d40e7a71b79543026e8e20afe4ec1 Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Fri, 29 Oct 2021 03:47:49 -0400 Subject: [PATCH 29/38] bnxt_en: Add compression flags information in coredump segment header Firmware sets compression flags for each segment, add this information while filling segment header. Signed-off-by: Vasundhara Volam Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c index 3e23fce3771e..05896bf9750d 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c @@ -178,6 +178,7 @@ bnxt_fill_coredump_seg_hdr(struct bnxt *bp, seg_hdr->segment_id = (__force __le32)seg_rec->segment_id; seg_hdr->low_version = seg_rec->version_low; seg_hdr->high_version = seg_rec->version_hi; + seg_hdr->flags = cpu_to_le32(seg_rec->compress_flags); } else { /* For hwrm_ver_get response Component id = 2 * and Segment id = 0 From 80194db9f53bc8877468f96734133b7a8d28aa4c Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Fri, 29 Oct 2021 03:47:50 -0400 Subject: [PATCH 30/38] bnxt_en: Retrieve coredump and crashdump size via FW command Recent firmware provides coredump and crashdump size info via DBG_QCFG command. Read the dump sizes from firmware, instead of computing in the driver. This patch reduces the time taken to collect the dump via ethtool. Signed-off-by: Vasundhara Volam Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 31 +++++++++++ drivers/net/ethernet/broadcom/bnxt/bnxt.h | 3 + .../ethernet/broadcom/bnxt/bnxt_coredump.c | 55 +++++++++++++++++-- 3 files changed, 85 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index b4d9374548f8..a5d4dbee7683 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -7480,6 +7480,8 @@ static int __bnxt_hwrm_func_qcaps(struct bnxt *bp) bp->fw_cap |= BNXT_FW_CAP_ERR_RECOVER_RELOAD; if (!(flags & FUNC_QCAPS_RESP_FLAGS_VLAN_ACCELERATION_TX_DISABLED)) bp->fw_cap |= BNXT_FW_CAP_VLAN_TX_INSERT; + if (flags & FUNC_QCAPS_RESP_FLAGS_DBG_QCAPS_CMD_SUPPORTED) + bp->fw_cap |= BNXT_FW_CAP_DBG_QCAPS; flags_ext = le32_to_cpu(resp->flags_ext); if (flags_ext & FUNC_QCAPS_RESP_FLAGS_EXT_EXT_HW_STATS_SUPPORTED) @@ -7543,6 +7545,32 @@ hwrm_func_qcaps_exit: return rc; } +static void bnxt_hwrm_dbg_qcaps(struct bnxt *bp) +{ + struct hwrm_dbg_qcaps_output *resp; + struct hwrm_dbg_qcaps_input *req; + int rc; + + bp->fw_dbg_cap = 0; + if (!(bp->fw_cap & BNXT_FW_CAP_DBG_QCAPS)) + return; + + rc = hwrm_req_init(bp, req, HWRM_DBG_QCAPS); + if (rc) + return; + + req->fid = cpu_to_le16(0xffff); + resp = hwrm_req_hold(bp, req); + rc = hwrm_req_send(bp, req); + if (rc) + goto hwrm_dbg_qcaps_exit; + + bp->fw_dbg_cap = le32_to_cpu(resp->flags); + +hwrm_dbg_qcaps_exit: + hwrm_req_drop(bp, req); +} + static int bnxt_hwrm_queue_qportcfg(struct bnxt *bp); static int bnxt_hwrm_func_qcaps(struct bnxt *bp) @@ -7552,6 +7580,9 @@ static int bnxt_hwrm_func_qcaps(struct bnxt *bp) rc = __bnxt_hwrm_func_qcaps(bp); if (rc) return rc; + + bnxt_hwrm_dbg_qcaps(bp); + rc = bnxt_hwrm_queue_qportcfg(bp); if (rc) { netdev_err(bp->dev, "hwrm query qportcfg failure rc: %d\n", rc); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index bbbc63e882d1..4165fffec886 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -1961,6 +1961,9 @@ struct bnxt { #define BNXT_FW_CAP_PTP_PPS 0x10000000 #define BNXT_FW_CAP_HOT_RESET_IF 0x20000000 #define BNXT_FW_CAP_RING_MONITOR 0x40000000 + #define BNXT_FW_CAP_DBG_QCAPS 0x80000000 + + u32 fw_dbg_cap; #define BNXT_NEW_RM(bp) ((bp)->fw_cap & BNXT_FW_CAP_NEW_RM) u32 hwrm_spec_code; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c index 05896bf9750d..8961a6ffae87 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c @@ -361,13 +361,60 @@ int bnxt_get_coredump(struct bnxt *bp, u16 dump_type, void *buf, u32 *dump_len) } } +static int bnxt_hwrm_get_dump_len(struct bnxt *bp, u16 dump_type, u32 *dump_len) +{ + struct hwrm_dbg_qcfg_output *resp; + struct hwrm_dbg_qcfg_input *req; + int rc, hdr_len = 0; + + if (!(bp->fw_cap & BNXT_FW_CAP_DBG_QCAPS)) + return -EOPNOTSUPP; + + if (dump_type == BNXT_DUMP_CRASH && + !(bp->fw_dbg_cap & DBG_QCAPS_RESP_FLAGS_CRASHDUMP_SOC_DDR)) + return -EOPNOTSUPP; + + rc = hwrm_req_init(bp, req, HWRM_DBG_QCFG); + if (rc) + return rc; + + req->fid = cpu_to_le16(0xffff); + if (dump_type == BNXT_DUMP_CRASH) + req->flags = cpu_to_le16(DBG_QCFG_REQ_FLAGS_CRASHDUMP_SIZE_FOR_DEST_DEST_SOC_DDR); + + resp = hwrm_req_hold(bp, req); + rc = hwrm_req_send(bp, req); + if (rc) + goto get_dump_len_exit; + + if (dump_type == BNXT_DUMP_CRASH) { + *dump_len = le32_to_cpu(resp->crashdump_size); + } else { + /* Driver adds coredump header and "HWRM_VER_GET response" + * segment additionally to coredump. + */ + hdr_len = sizeof(struct bnxt_coredump_segment_hdr) + + sizeof(struct hwrm_ver_get_output) + + sizeof(struct bnxt_coredump_record); + *dump_len = le32_to_cpu(resp->coredump_size) + hdr_len; + } + if (*dump_len <= hdr_len) + rc = -EINVAL; + +get_dump_len_exit: + hwrm_req_drop(bp, req); + return rc; +} + u32 bnxt_get_coredump_length(struct bnxt *bp, u16 dump_type) { u32 len = 0; - if (dump_type == BNXT_DUMP_CRASH) - len = BNXT_CRASH_DUMP_LEN; - else - __bnxt_get_coredump(bp, NULL, &len); + if (bnxt_hwrm_get_dump_len(bp, dump_type, &len)) { + if (dump_type == BNXT_DUMP_CRASH) + len = BNXT_CRASH_DUMP_LEN; + else + __bnxt_get_coredump(bp, NULL, &len); + } return len; } From 4e59f0600790cc205192203570a677375671d1d7 Mon Sep 17 00:00:00 2001 From: Edwin Peer Date: Fri, 29 Oct 2021 03:47:51 -0400 Subject: [PATCH 31/38] bnxt_en: extract coredump command line from current task Tools other than 'ethtool -w' may be used to produce a coredump. For devlink health, such dumps could even be driver initiated in response to a health event. In these cases, the kernel thread information will be placed in the coredump record instead. v2: use min_t() instead of min() to fix the mismatched type warning Signed-off-by: Edwin Peer Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- .../ethernet/broadcom/bnxt/bnxt_coredump.c | 26 ++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c index 8961a6ffae87..d3cb2f21946d 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c @@ -194,6 +194,30 @@ bnxt_fill_coredump_seg_hdr(struct bnxt *bp, seg_hdr->instance = cpu_to_le32(instance); } +static void bnxt_fill_cmdline(struct bnxt_coredump_record *record) +{ + struct mm_struct *mm = current->mm; + int i, len, last = 0; + + if (mm) { + len = min_t(int, mm->arg_end - mm->arg_start, + sizeof(record->commandline) - 1); + if (len && !copy_from_user(record->commandline, + (char __user *)mm->arg_start, len)) { + for (i = 0; i < len; i++) { + if (record->commandline[i]) + last = i; + else + record->commandline[i] = ' '; + } + record->commandline[last + 1] = 0; + return; + } + } + + strscpy(record->commandline, current->comm, TASK_COMM_LEN); +} + static void bnxt_fill_coredump_record(struct bnxt *bp, struct bnxt_coredump_record *record, time64_t start, s16 start_utc, u16 total_segs, @@ -219,7 +243,7 @@ bnxt_fill_coredump_record(struct bnxt *bp, struct bnxt_coredump_record *record, record->minute = cpu_to_le16(tm.tm_min); record->second = cpu_to_le16(tm.tm_sec); record->utc_bias = cpu_to_le16(start_utc); - strcpy(record->commandline, "ethtool -w"); + bnxt_fill_cmdline(record); record->total_segments = cpu_to_le32(total_segs); if (sscanf(utsname()->release, "%u.%u", &os_ver_major, &os_ver_minor) != 2) From 188876db04a3524aa81ced7475686e7c44ca1a5e Mon Sep 17 00:00:00 2001 From: Edwin Peer Date: Fri, 29 Oct 2021 03:47:52 -0400 Subject: [PATCH 32/38] bnxt_en: implement dump callback for fw health reporter Populate the dump with firmware 'live' coredump data. This includes the information stored in NVRAM by the firmware exception handler prior to recovery. Thus, the live dump includes the desired crash context. Firmware does not support HWRM calls after RESET_NOTIFY, so there is no supported way to capture a coredump during the auto dump phase. Detect this and abort when called from devlink_health_report(). Signed-off-by: Edwin Peer Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- .../net/ethernet/broadcom/bnxt/bnxt_devlink.c | 42 +++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c index 930cbf1ca4e0..106f4249e47b 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c @@ -18,6 +18,7 @@ #include "bnxt_ethtool.h" #include "bnxt_ulp.h" #include "bnxt_ptp.h" +#include "bnxt_coredump.h" static void __bnxt_fw_recover(struct bnxt *bp) { @@ -177,6 +178,46 @@ unlock: return devlink_fmsg_u32_pair_put(fmsg, "Diagnoses", h->diagnoses); } +static int bnxt_fw_dump(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, void *priv_ctx, + struct netlink_ext_ack *extack) +{ + struct bnxt *bp = devlink_health_reporter_priv(reporter); + u32 dump_len; + void *data; + int rc; + + /* TODO: no firmware dump support in devlink_health_report() context */ + if (priv_ctx) + return -EOPNOTSUPP; + + dump_len = bnxt_get_coredump_length(bp, BNXT_DUMP_LIVE); + if (!dump_len) + return -EIO; + + data = vmalloc(dump_len); + if (!data) + return -ENOMEM; + + rc = bnxt_get_coredump(bp, BNXT_DUMP_LIVE, data, &dump_len); + if (!rc) { + rc = devlink_fmsg_pair_nest_start(fmsg, "core"); + if (rc) + goto exit; + rc = devlink_fmsg_binary_pair_put(fmsg, "data", data, dump_len); + if (rc) + goto exit; + rc = devlink_fmsg_u32_pair_put(fmsg, "size", dump_len); + if (rc) + goto exit; + rc = devlink_fmsg_pair_nest_end(fmsg); + } + +exit: + vfree(data); + return rc; +} + static int bnxt_fw_recover(struct devlink_health_reporter *reporter, void *priv_ctx, struct netlink_ext_ack *extack) @@ -195,6 +236,7 @@ static int bnxt_fw_recover(struct devlink_health_reporter *reporter, static const struct devlink_health_reporter_ops bnxt_dl_fw_reporter_ops = { .name = "fw", .diagnose = bnxt_fw_diagnose, + .dump = bnxt_fw_dump, .recover = bnxt_fw_recover, }; From 21e70778d0d4e677bf4b1882a3280cd05c80d559 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Fri, 29 Oct 2021 03:47:53 -0400 Subject: [PATCH 33/38] bnxt_en: Update firmware interface to 1.10.2.63 The main changes are firmware live patch support and 2 additional FEC standard counters. Add the matching FEC counters to ethtool counter array. Firmware older than 220 does not return the proper size of the extended RX counters so we need to cap it at the smaller legacy size. Otherwise the new FEC counters may show up with garbage values. Reviewed-by: Edwin Peer Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 4 + drivers/net/ethernet/broadcom/bnxt/bnxt.h | 3 + .../net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 2 + drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h | 155 +++++++++++++++++- 4 files changed, 156 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index a5d4dbee7683..0e5bab75c64c 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -8210,6 +8210,10 @@ static int bnxt_hwrm_port_qstats_ext(struct bnxt *bp, u8 flags) if (!rc) { bp->fw_rx_stats_ext_size = le16_to_cpu(resp_qs->rx_stat_size) / 8; + if (BNXT_FW_MAJ(bp) < 220 && + bp->fw_rx_stats_ext_size > BNXT_RX_STATS_EXT_NUM_LEGACY) + bp->fw_rx_stats_ext_size = BNXT_RX_STATS_EXT_NUM_LEGACY; + bp->fw_tx_stats_ext_size = tx_stat_size ? le16_to_cpu(resp_qs->tx_stat_size) / 8 : 0; } else { diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 4165fffec886..4fecfdb430b3 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -2174,6 +2174,9 @@ struct bnxt { #define BNXT_RX_STATS_EXT_OFFSET(counter) \ (offsetof(struct rx_port_stats_ext, counter) / 8) +#define BNXT_RX_STATS_EXT_NUM_LEGACY \ + BNXT_RX_STATS_EXT_OFFSET(rx_fec_corrected_blocks) + #define BNXT_TX_STATS_EXT_OFFSET(counter) \ (offsetof(struct tx_port_stats_ext, counter) / 8) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index bb3f3529987b..334ada053246 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -427,6 +427,8 @@ static const struct { BNXT_RX_STATS_EXT_ENTRY(rx_pcs_symbol_err), BNXT_RX_STATS_EXT_ENTRY(rx_corrected_bits), BNXT_RX_STATS_EXT_DISCARD_COS_ENTRIES, + BNXT_RX_STATS_EXT_ENTRY(rx_fec_corrected_blocks), + BNXT_RX_STATS_EXT_ENTRY(rx_fec_uncorrectable_blocks), }; static const struct { diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h index 94d07a9f7034..ea86c54247c7 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h @@ -532,8 +532,8 @@ struct hwrm_err_output { #define HWRM_VERSION_MAJOR 1 #define HWRM_VERSION_MINOR 10 #define HWRM_VERSION_UPDATE 2 -#define HWRM_VERSION_RSVD 52 -#define HWRM_VERSION_STR "1.10.2.52" +#define HWRM_VERSION_RSVD 63 +#define HWRM_VERSION_STR "1.10.2.63" /* hwrm_ver_get_input (size:192b/24B) */ struct hwrm_ver_get_input { @@ -1587,6 +1587,8 @@ struct hwrm_func_qcaps_output { #define FUNC_QCAPS_RESP_FLAGS_EXT_DFLT_VLAN_TPID_PCP_SUPPORTED 0x200000UL #define FUNC_QCAPS_RESP_FLAGS_EXT_KTLS_SUPPORTED 0x400000UL #define FUNC_QCAPS_RESP_FLAGS_EXT_EP_RATE_CONTROL 0x800000UL + #define FUNC_QCAPS_RESP_FLAGS_EXT_MIN_BW_SUPPORTED 0x1000000UL + #define FUNC_QCAPS_RESP_FLAGS_EXT_TX_COAL_CMPL_CAP 0x2000000UL u8 max_schqs; u8 mpc_chnls_cap; #define FUNC_QCAPS_RESP_MPC_CHNLS_CAP_TCE 0x1UL @@ -1956,6 +1958,18 @@ struct hwrm_func_cfg_output { u8 valid; }; +/* hwrm_func_cfg_cmd_err (size:64b/8B) */ +struct hwrm_func_cfg_cmd_err { + u8 code; + #define FUNC_CFG_CMD_ERR_CODE_UNKNOWN 0x0UL + #define FUNC_CFG_CMD_ERR_CODE_PARTITION_MIN_BW_RANGE 0x1UL + #define FUNC_CFG_CMD_ERR_CODE_PARTITION_MIN_MORE_THAN_MAX 0x2UL + #define FUNC_CFG_CMD_ERR_CODE_PARTITION_MIN_BW_UNSUPPORTED 0x3UL + #define FUNC_CFG_CMD_ERR_CODE_PARTITION_BW_PERCENT 0x4UL + #define FUNC_CFG_CMD_ERR_CODE_LAST FUNC_CFG_CMD_ERR_CODE_PARTITION_BW_PERCENT + u8 unused_0[7]; +}; + /* hwrm_func_qstats_input (size:192b/24B) */ struct hwrm_func_qstats_input { __le16 req_type; @@ -3601,7 +3615,15 @@ struct hwrm_port_phy_qcfg_output { #define PORT_PHY_QCFG_RESP_PHY_TYPE_200G_BASESR4 0x1dUL #define PORT_PHY_QCFG_RESP_PHY_TYPE_200G_BASELR4 0x1eUL #define PORT_PHY_QCFG_RESP_PHY_TYPE_200G_BASEER4 0x1fUL - #define PORT_PHY_QCFG_RESP_PHY_TYPE_LAST PORT_PHY_QCFG_RESP_PHY_TYPE_200G_BASEER4 + #define PORT_PHY_QCFG_RESP_PHY_TYPE_50G_BASECR 0x20UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_50G_BASESR 0x21UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_50G_BASELR 0x22UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_50G_BASEER 0x23UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_100G_BASECR2 0x24UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_100G_BASESR2 0x25UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_100G_BASELR2 0x26UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_100G_BASEER2 0x27UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_LAST PORT_PHY_QCFG_RESP_PHY_TYPE_100G_BASEER2 u8 media_type; #define PORT_PHY_QCFG_RESP_MEDIA_TYPE_UNKNOWN 0x0UL #define PORT_PHY_QCFG_RESP_MEDIA_TYPE_TP 0x1UL @@ -4040,7 +4062,7 @@ struct tx_port_stats_ext { __le64 pfc_pri7_tx_transitions; }; -/* rx_port_stats_ext (size:3648b/456B) */ +/* rx_port_stats_ext (size:3776b/472B) */ struct rx_port_stats_ext { __le64 link_down_events; __le64 continuous_pause_events; @@ -4099,6 +4121,8 @@ struct rx_port_stats_ext { __le64 rx_discard_packets_cos5; __le64 rx_discard_packets_cos6; __le64 rx_discard_packets_cos7; + __le64 rx_fec_corrected_blocks; + __le64 rx_fec_uncorrectable_blocks; }; /* hwrm_port_qstats_ext_input (size:320b/40B) */ @@ -4372,7 +4396,10 @@ struct hwrm_port_phy_qcaps_output { #define PORT_PHY_QCAPS_RESP_SUPPORTED_PAM4_SPEEDS_FORCE_MODE_50G 0x1UL #define PORT_PHY_QCAPS_RESP_SUPPORTED_PAM4_SPEEDS_FORCE_MODE_100G 0x2UL #define PORT_PHY_QCAPS_RESP_SUPPORTED_PAM4_SPEEDS_FORCE_MODE_200G 0x4UL - u8 unused_0[3]; + __le16 flags2; + #define PORT_PHY_QCAPS_RESP_FLAGS2_PAUSE_UNSUPPORTED 0x1UL + #define PORT_PHY_QCAPS_RESP_FLAGS2_PFC_UNSUPPORTED 0x2UL + u8 unused_0[1]; u8 valid; }; @@ -6076,6 +6103,11 @@ struct hwrm_vnic_qcaps_output { #define VNIC_QCAPS_RESP_FLAGS_VIRTIO_NET_VNIC_ALLOC_CAP 0x800UL #define VNIC_QCAPS_RESP_FLAGS_METADATA_FORMAT_CAP 0x1000UL #define VNIC_QCAPS_RESP_FLAGS_RSS_STRICT_HASH_TYPE_CAP 0x2000UL + #define VNIC_QCAPS_RESP_FLAGS_RSS_HASH_TYPE_DELTA_CAP 0x4000UL + #define VNIC_QCAPS_RESP_FLAGS_RSS_HASH_FUNCTION_TOEPLITZ_CAP 0x8000UL + #define VNIC_QCAPS_RESP_FLAGS_RSS_HASH_FUNCTION_XOR_CAP 0x10000UL + #define VNIC_QCAPS_RESP_FLAGS_RSS_HASH_FUNCTION_CHKSM_CAP 0x20000UL + #define VNIC_QCAPS_RESP_FLAGS_RSS_IPV6_FLOW_LABEL_CAP 0x40000UL __le16 max_aggs_supported; u8 unused_1[5]; u8 valid; @@ -6206,7 +6238,15 @@ struct hwrm_vnic_rss_cfg_input { __le64 ring_grp_tbl_addr; __le64 hash_key_tbl_addr; __le16 rss_ctx_idx; - u8 unused_1[6]; + u8 flags; + #define VNIC_RSS_CFG_REQ_FLAGS_HASH_TYPE_INCLUDE 0x1UL + #define VNIC_RSS_CFG_REQ_FLAGS_HASH_TYPE_EXCLUDE 0x2UL + u8 rss_hash_function; + #define VNIC_RSS_CFG_REQ_RSS_HASH_FUNCTION_TOEPLITZ 0x0UL + #define VNIC_RSS_CFG_REQ_RSS_HASH_FUNCTION_XOR 0x1UL + #define VNIC_RSS_CFG_REQ_RSS_HASH_FUNCTION_CHECKSUM 0x2UL + #define VNIC_RSS_CFG_REQ_RSS_HASH_FUNCTION_LAST VNIC_RSS_CFG_REQ_RSS_HASH_FUNCTION_CHECKSUM + u8 unused_1[4]; }; /* hwrm_vnic_rss_cfg_output (size:128b/16B) */ @@ -6331,7 +6371,24 @@ struct hwrm_ring_alloc_input { #define RING_ALLOC_REQ_RING_TYPE_RX_AGG 0x4UL #define RING_ALLOC_REQ_RING_TYPE_NQ 0x5UL #define RING_ALLOC_REQ_RING_TYPE_LAST RING_ALLOC_REQ_RING_TYPE_NQ - u8 unused_0; + u8 cmpl_coal_cnt; + #define RING_ALLOC_REQ_CMPL_COAL_CNT_COAL_OFF 0x0UL + #define RING_ALLOC_REQ_CMPL_COAL_CNT_COAL_4 0x1UL + #define RING_ALLOC_REQ_CMPL_COAL_CNT_COAL_8 0x2UL + #define RING_ALLOC_REQ_CMPL_COAL_CNT_COAL_12 0x3UL + #define RING_ALLOC_REQ_CMPL_COAL_CNT_COAL_16 0x4UL + #define RING_ALLOC_REQ_CMPL_COAL_CNT_COAL_24 0x5UL + #define RING_ALLOC_REQ_CMPL_COAL_CNT_COAL_32 0x6UL + #define RING_ALLOC_REQ_CMPL_COAL_CNT_COAL_48 0x7UL + #define RING_ALLOC_REQ_CMPL_COAL_CNT_COAL_64 0x8UL + #define RING_ALLOC_REQ_CMPL_COAL_CNT_COAL_96 0x9UL + #define RING_ALLOC_REQ_CMPL_COAL_CNT_COAL_128 0xaUL + #define RING_ALLOC_REQ_CMPL_COAL_CNT_COAL_192 0xbUL + #define RING_ALLOC_REQ_CMPL_COAL_CNT_COAL_256 0xcUL + #define RING_ALLOC_REQ_CMPL_COAL_CNT_COAL_320 0xdUL + #define RING_ALLOC_REQ_CMPL_COAL_CNT_COAL_384 0xeUL + #define RING_ALLOC_REQ_CMPL_COAL_CNT_COAL_MAX 0xfUL + #define RING_ALLOC_REQ_CMPL_COAL_CNT_LAST RING_ALLOC_REQ_CMPL_COAL_CNT_COAL_MAX __le16 flags; #define RING_ALLOC_REQ_FLAGS_RX_SOP_PAD 0x1UL __le64 page_tbl_addr; @@ -7099,6 +7156,7 @@ struct hwrm_cfa_ntuple_filter_alloc_input { #define CFA_NTUPLE_FILTER_ALLOC_REQ_FLAGS_DEST_FID 0x8UL #define CFA_NTUPLE_FILTER_ALLOC_REQ_FLAGS_ARP_REPLY 0x10UL #define CFA_NTUPLE_FILTER_ALLOC_REQ_FLAGS_DEST_RFS_RING_IDX 0x20UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_FLAGS_NO_L2_CONTEXT 0x40UL __le32 enables; #define CFA_NTUPLE_FILTER_ALLOC_REQ_ENABLES_L2_FILTER_ID 0x1UL #define CFA_NTUPLE_FILTER_ALLOC_REQ_ENABLES_ETHERTYPE 0x2UL @@ -7234,6 +7292,7 @@ struct hwrm_cfa_ntuple_filter_cfg_input { __le32 flags; #define CFA_NTUPLE_FILTER_CFG_REQ_FLAGS_DEST_FID 0x1UL #define CFA_NTUPLE_FILTER_CFG_REQ_FLAGS_DEST_RFS_RING_IDX 0x2UL + #define CFA_NTUPLE_FILTER_CFG_REQ_FLAGS_NO_L2_CONTEXT 0x4UL __le64 ntuple_filter_id; __le32 new_dst_id; __le32 new_mirror_vnic_id; @@ -7834,11 +7893,11 @@ struct hwrm_cfa_adv_flow_mgnt_qcaps_output { #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_TRUFLOW_CAPABLE 0x8000UL #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_L2_FILTER_TRAFFIC_TYPE_L2_ROCE_SUPPORTED 0x10000UL #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_LAG_SUPPORTED 0x20000UL + #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_NTUPLE_FLOW_NO_L2CTX_SUPPORTED 0x40000UL u8 unused_0[3]; u8 valid; }; -/* hwrm_tunnel_dst_port_query_input (size:192b/24B) */ struct hwrm_tunnel_dst_port_query_input { __le16 req_type; __le16 cmpl_ring; @@ -8414,6 +8473,86 @@ struct hwrm_fw_get_structured_data_cmd_err { u8 unused_0[7]; }; +/* hwrm_fw_livepatch_query_input (size:192b/24B) */ +struct hwrm_fw_livepatch_query_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + u8 fw_target; + #define FW_LIVEPATCH_QUERY_REQ_FW_TARGET_COMMON_FW 0x1UL + #define FW_LIVEPATCH_QUERY_REQ_FW_TARGET_SECURE_FW 0x2UL + #define FW_LIVEPATCH_QUERY_REQ_FW_TARGET_LAST FW_LIVEPATCH_QUERY_REQ_FW_TARGET_SECURE_FW + u8 unused_0[7]; +}; + +/* hwrm_fw_livepatch_query_output (size:640b/80B) */ +struct hwrm_fw_livepatch_query_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + char install_ver[32]; + char active_ver[32]; + __le16 status_flags; + #define FW_LIVEPATCH_QUERY_RESP_STATUS_FLAGS_INSTALL 0x1UL + #define FW_LIVEPATCH_QUERY_RESP_STATUS_FLAGS_ACTIVE 0x2UL + u8 unused_0[5]; + u8 valid; +}; + +/* hwrm_fw_livepatch_input (size:256b/32B) */ +struct hwrm_fw_livepatch_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + u8 opcode; + #define FW_LIVEPATCH_REQ_OPCODE_ACTIVATE 0x1UL + #define FW_LIVEPATCH_REQ_OPCODE_DEACTIVATE 0x2UL + #define FW_LIVEPATCH_REQ_OPCODE_LAST FW_LIVEPATCH_REQ_OPCODE_DEACTIVATE + u8 fw_target; + #define FW_LIVEPATCH_REQ_FW_TARGET_COMMON_FW 0x1UL + #define FW_LIVEPATCH_REQ_FW_TARGET_SECURE_FW 0x2UL + #define FW_LIVEPATCH_REQ_FW_TARGET_LAST FW_LIVEPATCH_REQ_FW_TARGET_SECURE_FW + u8 loadtype; + #define FW_LIVEPATCH_REQ_LOADTYPE_NVM_INSTALL 0x1UL + #define FW_LIVEPATCH_REQ_LOADTYPE_MEMORY_DIRECT 0x2UL + #define FW_LIVEPATCH_REQ_LOADTYPE_LAST FW_LIVEPATCH_REQ_LOADTYPE_MEMORY_DIRECT + u8 flags; + __le32 patch_len; + __le64 host_addr; +}; + +/* hwrm_fw_livepatch_output (size:128b/16B) */ +struct hwrm_fw_livepatch_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 unused_0[7]; + u8 valid; +}; + +/* hwrm_fw_livepatch_cmd_err (size:64b/8B) */ +struct hwrm_fw_livepatch_cmd_err { + u8 code; + #define FW_LIVEPATCH_CMD_ERR_CODE_UNKNOWN 0x0UL + #define FW_LIVEPATCH_CMD_ERR_CODE_INVALID_OPCODE 0x1UL + #define FW_LIVEPATCH_CMD_ERR_CODE_INVALID_TARGET 0x2UL + #define FW_LIVEPATCH_CMD_ERR_CODE_NOT_SUPPORTED 0x3UL + #define FW_LIVEPATCH_CMD_ERR_CODE_NOT_INSTALLED 0x4UL + #define FW_LIVEPATCH_CMD_ERR_CODE_NOT_PATCHED 0x5UL + #define FW_LIVEPATCH_CMD_ERR_CODE_AUTH_FAIL 0x6UL + #define FW_LIVEPATCH_CMD_ERR_CODE_INVALID_HEADER 0x7UL + #define FW_LIVEPATCH_CMD_ERR_CODE_INVALID_SIZE 0x8UL + #define FW_LIVEPATCH_CMD_ERR_CODE_ALREADY_PATCHED 0x9UL + #define FW_LIVEPATCH_CMD_ERR_CODE_LAST FW_LIVEPATCH_CMD_ERR_CODE_ALREADY_PATCHED + u8 unused_0[7]; +}; + /* hwrm_exec_fwd_resp_input (size:1024b/128B) */ struct hwrm_exec_fwd_resp_input { __le16 req_type; From 3c4153394e2c749b415947b86eb560114ec0f64d Mon Sep 17 00:00:00 2001 From: Edwin Peer Date: Fri, 29 Oct 2021 03:47:54 -0400 Subject: [PATCH 34/38] bnxt_en: implement firmware live patching Live patches are activated by using the 'limit no_reset' option when performing a devlink dev reload fw_activate operation. These packages must first be installed on the device in the usual way. For example, via devlink dev flash or ethtool -f. The devlink device info has also been enhanced to render stored and running live patch versions. Signed-off-by: Edwin Peer Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 + drivers/net/ethernet/broadcom/bnxt/bnxt.h | 1 + .../net/ethernet/broadcom/bnxt/bnxt_devlink.c | 173 +++++++++++++++++- 3 files changed, 174 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 0e5bab75c64c..c04ea83188e2 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -7490,6 +7490,8 @@ static int __bnxt_hwrm_func_qcaps(struct bnxt *bp) bp->fw_cap |= BNXT_FW_CAP_PTP_PPS; if (BNXT_PF(bp) && (flags_ext & FUNC_QCAPS_RESP_FLAGS_EXT_HOT_RESET_IF_SUPPORT)) bp->fw_cap |= BNXT_FW_CAP_HOT_RESET_IF; + if (BNXT_PF(bp) && (flags_ext & FUNC_QCAPS_RESP_FLAGS_EXT_FW_LIVEPATCH_SUPPORTED)) + bp->fw_cap |= BNXT_FW_CAP_LIVEPATCH; bp->tx_push_thresh = 0; if ((flags & FUNC_QCAPS_RESP_FLAGS_PUSH_MODE_SUPPORTED) && diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 4fecfdb430b3..d0d5da9b78f8 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -1958,6 +1958,7 @@ struct bnxt { #define BNXT_FW_CAP_VLAN_RX_STRIP 0x01000000 #define BNXT_FW_CAP_VLAN_TX_INSERT 0x02000000 #define BNXT_FW_CAP_EXT_HW_STATS_SUPPORTED 0x04000000 + #define BNXT_FW_CAP_LIVEPATCH 0x08000000 #define BNXT_FW_CAP_PTP_PPS 0x10000000 #define BNXT_FW_CAP_HOT_RESET_IF 0x20000000 #define BNXT_FW_CAP_RING_MONITOR 0x40000000 diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c index 106f4249e47b..4007b2ac8ca4 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c @@ -326,6 +326,111 @@ void bnxt_dl_health_fw_recovery_done(struct bnxt *bp) static int bnxt_dl_info_get(struct devlink *dl, struct devlink_info_req *req, struct netlink_ext_ack *extack); +static void +bnxt_dl_livepatch_report_err(struct bnxt *bp, struct netlink_ext_ack *extack, + struct hwrm_fw_livepatch_output *resp) +{ + int err = ((struct hwrm_err_output *)resp)->cmd_err; + + switch (err) { + case FW_LIVEPATCH_CMD_ERR_CODE_INVALID_OPCODE: + netdev_err(bp->dev, "Illegal live patch opcode"); + NL_SET_ERR_MSG_MOD(extack, "Invalid opcode"); + break; + case FW_LIVEPATCH_CMD_ERR_CODE_NOT_SUPPORTED: + NL_SET_ERR_MSG_MOD(extack, "Live patch operation not supported"); + break; + case FW_LIVEPATCH_CMD_ERR_CODE_NOT_INSTALLED: + NL_SET_ERR_MSG_MOD(extack, "Live patch not found"); + break; + case FW_LIVEPATCH_CMD_ERR_CODE_NOT_PATCHED: + NL_SET_ERR_MSG_MOD(extack, + "Live patch deactivation failed. Firmware not patched."); + break; + case FW_LIVEPATCH_CMD_ERR_CODE_AUTH_FAIL: + NL_SET_ERR_MSG_MOD(extack, "Live patch not authenticated"); + break; + case FW_LIVEPATCH_CMD_ERR_CODE_INVALID_HEADER: + NL_SET_ERR_MSG_MOD(extack, "Incompatible live patch"); + break; + case FW_LIVEPATCH_CMD_ERR_CODE_INVALID_SIZE: + NL_SET_ERR_MSG_MOD(extack, "Live patch has invalid size"); + break; + case FW_LIVEPATCH_CMD_ERR_CODE_ALREADY_PATCHED: + NL_SET_ERR_MSG_MOD(extack, "Live patch already applied"); + break; + default: + netdev_err(bp->dev, "Unexpected live patch error: %hhd\n", err); + NL_SET_ERR_MSG_MOD(extack, "Failed to activate live patch"); + break; + } +} + +static int +bnxt_dl_livepatch_activate(struct bnxt *bp, struct netlink_ext_ack *extack) +{ + struct hwrm_fw_livepatch_query_output *query_resp; + struct hwrm_fw_livepatch_query_input *query_req; + struct hwrm_fw_livepatch_output *patch_resp; + struct hwrm_fw_livepatch_input *patch_req; + u32 installed = 0; + u16 flags; + u8 target; + int rc; + + if (~bp->fw_cap & BNXT_FW_CAP_LIVEPATCH) { + NL_SET_ERR_MSG_MOD(extack, "Device does not support live patch"); + return -EOPNOTSUPP; + } + + rc = hwrm_req_init(bp, query_req, HWRM_FW_LIVEPATCH_QUERY); + if (rc) + return rc; + query_resp = hwrm_req_hold(bp, query_req); + + rc = hwrm_req_init(bp, patch_req, HWRM_FW_LIVEPATCH); + if (rc) { + hwrm_req_drop(bp, query_req); + return rc; + } + patch_req->opcode = FW_LIVEPATCH_REQ_OPCODE_ACTIVATE; + patch_req->loadtype = FW_LIVEPATCH_REQ_LOADTYPE_NVM_INSTALL; + patch_resp = hwrm_req_hold(bp, patch_req); + + for (target = 1; target <= FW_LIVEPATCH_REQ_FW_TARGET_LAST; target++) { + query_req->fw_target = target; + rc = hwrm_req_send(bp, query_req); + if (rc) { + NL_SET_ERR_MSG_MOD(extack, "Failed to query packages"); + break; + } + + flags = le16_to_cpu(query_resp->status_flags); + if (~flags & FW_LIVEPATCH_QUERY_RESP_STATUS_FLAGS_INSTALL) + continue; + if ((flags & FW_LIVEPATCH_QUERY_RESP_STATUS_FLAGS_ACTIVE) && + !strncmp(query_resp->active_ver, query_resp->install_ver, + sizeof(query_resp->active_ver))) + continue; + + patch_req->fw_target = target; + rc = hwrm_req_send(bp, patch_req); + if (rc) { + bnxt_dl_livepatch_report_err(bp, extack, patch_resp); + break; + } + installed++; + } + + if (!rc && !installed) { + NL_SET_ERR_MSG_MOD(extack, "No live patches found"); + rc = -ENOENT; + } + hwrm_req_drop(bp, query_req); + hwrm_req_drop(bp, patch_req); + return rc; +} + static int bnxt_dl_reload_down(struct devlink *dl, bool netns_change, enum devlink_reload_action action, enum devlink_reload_limit limit, @@ -372,6 +477,8 @@ static int bnxt_dl_reload_down(struct devlink *dl, bool netns_change, break; } case DEVLINK_RELOAD_ACTION_FW_ACTIVATE: { + if (limit == DEVLINK_RELOAD_LIMIT_NO_RESET) + return bnxt_dl_livepatch_activate(bp, extack); if (~bp->fw_cap & BNXT_FW_CAP_HOT_RESET) { NL_SET_ERR_MSG_MOD(extack, "Device not capable, requires reboot"); return -EOPNOTSUPP; @@ -432,6 +539,8 @@ static int bnxt_dl_reload_up(struct devlink *dl, enum devlink_reload_action acti unsigned long start = jiffies; unsigned long timeout = start + BNXT_DFLT_FW_RST_MAX_DSECS * HZ / 10; + if (limit == DEVLINK_RELOAD_LIMIT_NO_RESET) + break; if (bp->fw_cap & BNXT_FW_CAP_ERROR_RECOVERY) timeout = start + bp->fw_health->normal_func_wait_dsecs * HZ / 10; if (!netif_running(bp->dev)) @@ -485,6 +594,7 @@ static const struct devlink_ops bnxt_dl_ops = { .flash_update = bnxt_dl_flash_update, .reload_actions = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT) | BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE), + .reload_limits = BIT(DEVLINK_RELOAD_LIMIT_NO_RESET), .reload_down = bnxt_dl_reload_down, .reload_up = bnxt_dl_reload_up, }; @@ -630,6 +740,57 @@ static int bnxt_dl_info_put(struct bnxt *bp, struct devlink_info_req *req, return 0; } +#define BNXT_FW_SRT_PATCH "fw.srt.patch" +#define BNXT_FW_CRT_PATCH "fw.crt.patch" + +static int bnxt_dl_livepatch_info_put(struct bnxt *bp, + struct devlink_info_req *req, + const char *key) +{ + struct hwrm_fw_livepatch_query_input *query; + struct hwrm_fw_livepatch_query_output *resp; + u16 flags; + int rc; + + if (~bp->fw_cap & BNXT_FW_CAP_LIVEPATCH) + return 0; + + rc = hwrm_req_init(bp, query, HWRM_FW_LIVEPATCH_QUERY); + if (rc) + return rc; + + if (!strcmp(key, BNXT_FW_SRT_PATCH)) + query->fw_target = FW_LIVEPATCH_QUERY_REQ_FW_TARGET_SECURE_FW; + else if (!strcmp(key, BNXT_FW_CRT_PATCH)) + query->fw_target = FW_LIVEPATCH_QUERY_REQ_FW_TARGET_COMMON_FW; + else + goto exit; + + resp = hwrm_req_hold(bp, query); + rc = hwrm_req_send(bp, query); + if (rc) + goto exit; + + flags = le16_to_cpu(resp->status_flags); + if (flags & FW_LIVEPATCH_QUERY_RESP_STATUS_FLAGS_ACTIVE) { + resp->active_ver[sizeof(resp->active_ver) - 1] = '\0'; + rc = devlink_info_version_running_put(req, key, resp->active_ver); + if (rc) + goto exit; + } + + if (flags & FW_LIVEPATCH_QUERY_RESP_STATUS_FLAGS_INSTALL) { + resp->install_ver[sizeof(resp->install_ver) - 1] = '\0'; + rc = devlink_info_version_stored_put(req, key, resp->install_ver); + if (rc) + goto exit; + } + +exit: + hwrm_req_drop(bp, query); + return rc; +} + #define HWRM_FW_VER_STR_LEN 16 static int bnxt_dl_info_get(struct devlink *dl, struct devlink_info_req *req, @@ -783,8 +944,16 @@ static int bnxt_dl_info_get(struct devlink *dl, struct devlink_info_req *req, snprintf(roce_ver, FW_VER_STR_LEN, "%d.%d.%d.%d", nvm_dev_info.roce_fw_major, nvm_dev_info.roce_fw_minor, nvm_dev_info.roce_fw_build, nvm_dev_info.roce_fw_patch); - return bnxt_dl_info_put(bp, req, BNXT_VERSION_STORED, - DEVLINK_INFO_VERSION_GENERIC_FW_ROCE, roce_ver); + rc = bnxt_dl_info_put(bp, req, BNXT_VERSION_STORED, + DEVLINK_INFO_VERSION_GENERIC_FW_ROCE, roce_ver); + if (rc) + return rc; + + rc = bnxt_dl_livepatch_info_put(bp, req, BNXT_FW_SRT_PATCH); + if (rc) + return rc; + return bnxt_dl_livepatch_info_put(bp, req, BNXT_FW_CRT_PATCH); + } static int bnxt_hwrm_nvm_req(struct bnxt *bp, u32 param_id, void *msg, From 63185eb3aa267f2844580bbd8c9c1c97516f5dbb Mon Sep 17 00:00:00 2001 From: Vikas Gupta Date: Fri, 29 Oct 2021 03:47:55 -0400 Subject: [PATCH 35/38] bnxt_en: Provide stored devlink "fw" version on older firmware On older firmware that doesn't support the HWRM_NVM_GET_DEV_INFO command that returns detailed stored firmware versions, fallback to use the same firmware package version that is reported to ethtool. Refactor bnxt_get_pkgver() in bnxt_ethtool.c so that devlink can call and get the package version. Signed-off-by: Vikas Gupta Reviewed-by: Edwin Peer Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- .../net/ethernet/broadcom/bnxt/bnxt_devlink.c | 7 ++- .../net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 43 +++++++++++++------ .../net/ethernet/broadcom/bnxt/bnxt_ethtool.h | 1 + 3 files changed, 37 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c index 4007b2ac8ca4..ce790e9b45c3 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c @@ -915,8 +915,13 @@ static int bnxt_dl_info_get(struct devlink *dl, struct devlink_info_req *req, rc = bnxt_hwrm_nvm_get_dev_info(bp, &nvm_dev_info); if (rc || - !(nvm_dev_info.flags & NVM_GET_DEV_INFO_RESP_FLAGS_FW_VER_VALID)) + !(nvm_dev_info.flags & NVM_GET_DEV_INFO_RESP_FLAGS_FW_VER_VALID)) { + if (!bnxt_get_pkginfo(bp->dev, buf, sizeof(buf))) + return bnxt_dl_info_put(bp, req, BNXT_VERSION_STORED, + DEVLINK_INFO_VERSION_GENERIC_FW, + buf); return 0; + } buf[0] = 0; strncat(buf, nvm_dev_info.pkg_name, HWRM_FW_VER_STR_LEN); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 334ada053246..8188d55722e4 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -2832,39 +2832,56 @@ static char *bnxt_parse_pkglog(int desired_field, u8 *data, size_t datalen) return retval; } -static void bnxt_get_pkgver(struct net_device *dev) +int bnxt_get_pkginfo(struct net_device *dev, char *ver, int size) { struct bnxt *bp = netdev_priv(dev); u16 index = 0; char *pkgver; u32 pkglen; u8 *pkgbuf; - int len; + int rc; - if (bnxt_find_nvram_item(dev, BNX_DIR_TYPE_PKG_LOG, - BNX_DIR_ORDINAL_FIRST, BNX_DIR_EXT_NONE, - &index, NULL, &pkglen) != 0) - return; + rc = bnxt_find_nvram_item(dev, BNX_DIR_TYPE_PKG_LOG, + BNX_DIR_ORDINAL_FIRST, BNX_DIR_EXT_NONE, + &index, NULL, &pkglen); + if (rc) + return rc; pkgbuf = kzalloc(pkglen, GFP_KERNEL); if (!pkgbuf) { dev_err(&bp->pdev->dev, "Unable to allocate memory for pkg version, length = %u\n", pkglen); - return; + return -ENOMEM; } - if (bnxt_get_nvram_item(dev, index, 0, pkglen, pkgbuf)) + rc = bnxt_get_nvram_item(dev, index, 0, pkglen, pkgbuf); + if (rc) goto err; pkgver = bnxt_parse_pkglog(BNX_PKG_LOG_FIELD_IDX_PKG_VERSION, pkgbuf, pkglen); - if (pkgver && *pkgver != 0 && isdigit(*pkgver)) { - len = strlen(bp->fw_ver_str); - snprintf(bp->fw_ver_str + len, FW_VER_STR_LEN - len - 1, - "/pkg %s", pkgver); - } + if (pkgver && *pkgver != 0 && isdigit(*pkgver)) + strscpy(ver, pkgver, size); + else + rc = -ENOENT; + err: kfree(pkgbuf); + + return rc; +} + +static void bnxt_get_pkgver(struct net_device *dev) +{ + struct bnxt *bp = netdev_priv(dev); + char buf[FW_VER_STR_LEN]; + int len; + + if (!bnxt_get_pkginfo(dev, buf, sizeof(buf))) { + len = strlen(bp->fw_ver_str); + snprintf(bp->fw_ver_str + len, FW_VER_STR_LEN - len - 1, + "/pkg %s", buf); + } } static int bnxt_get_eeprom(struct net_device *dev, diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h index 4f7eaba65dcb..6aa44840f13a 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h @@ -55,6 +55,7 @@ int bnxt_hwrm_firmware_reset(struct net_device *dev, u8 proc_type, u8 self_reset, u8 flags); int bnxt_flash_package_from_fw_obj(struct net_device *dev, const struct firmware *fw, u32 install_type); +int bnxt_get_pkginfo(struct net_device *dev, char *ver, int size); void bnxt_ethtool_init(struct bnxt *bp); void bnxt_ethtool_free(struct bnxt *bp); From eff441f3b5972fd5c012ca471d471e025d7cfd5c Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Fri, 29 Oct 2021 03:47:56 -0400 Subject: [PATCH 36/38] bnxt_en: Update bnxt.rst devlink documentation Add 'enable_remote_dev_reset' documentation to bnxt.rst. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- Documentation/networking/devlink/bnxt.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/networking/devlink/bnxt.rst b/Documentation/networking/devlink/bnxt.rst index 3dfd84ccb1c7..a4fb27663cd6 100644 --- a/Documentation/networking/devlink/bnxt.rst +++ b/Documentation/networking/devlink/bnxt.rst @@ -22,6 +22,8 @@ Parameters - Permanent * - ``msix_vec_per_pf_min`` - Permanent + * - ``enable_remote_dev_reset`` + - Runtime The ``bnxt`` driver also implements the following driver-specific parameters. From 7e553c44f09a8f536090904c6db5b8c9dbafa03b Mon Sep 17 00:00:00 2001 From: Aleksander Jan Bajkowski Date: Tue, 26 Oct 2021 22:59:01 +0200 Subject: [PATCH 37/38] net: lantiq_xrx200: Hardcode the burst length value All SoCs with this IP core support 8 burst length. Hauke suggested to hardcode this value and simplify the driver. Link: https://lkml.org/lkml/2021/9/14/1533 Signed-off-by: Aleksander Jan Bajkowski Signed-off-by: David S. Miller --- drivers/net/ethernet/lantiq_xrx200.c | 21 ++++----------------- 1 file changed, 4 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/lantiq_xrx200.c b/drivers/net/ethernet/lantiq_xrx200.c index ecf1e11d9b91..0da09ea81980 100644 --- a/drivers/net/ethernet/lantiq_xrx200.c +++ b/drivers/net/ethernet/lantiq_xrx200.c @@ -25,6 +25,7 @@ #define XRX200_DMA_DATA_LEN (SZ_64K - 1) #define XRX200_DMA_RX 0 #define XRX200_DMA_TX 1 +#define XRX200_DMA_BURST_LEN 8 /* cpu port mac */ #define PMAC_RX_IPG 0x0024 @@ -73,9 +74,6 @@ struct xrx200_priv { struct net_device *net_dev; struct device *dev; - int tx_burst_len; - int rx_burst_len; - __iomem void *pmac_reg; }; @@ -323,7 +321,7 @@ static netdev_tx_t xrx200_start_xmit(struct sk_buff *skb, goto err_drop; /* dma needs to start on a burst length value aligned address */ - byte_offset = mapping % (priv->tx_burst_len * 4); + byte_offset = mapping % (XRX200_DMA_BURST_LEN * 4); desc->addr = mapping - byte_offset; /* Make sure the address is written before we give it to HW */ @@ -422,7 +420,8 @@ static int xrx200_dma_init(struct xrx200_priv *priv) int ret = 0; int i; - ltq_dma_init_port(DMA_PORT_ETOP, priv->tx_burst_len, rx_burst_len); + ltq_dma_init_port(DMA_PORT_ETOP, XRX200_DMA_BURST_LEN, + XRX200_DMA_BURST_LEN); ch_rx->dma.nr = XRX200_DMA_RX; ch_rx->dma.dev = priv->dev; @@ -531,18 +530,6 @@ static int xrx200_probe(struct platform_device *pdev) if (err) eth_hw_addr_random(net_dev); - err = device_property_read_u32(dev, "lantiq,tx-burst-length", &priv->tx_burst_len); - if (err < 0) { - dev_err(dev, "unable to read tx-burst-length property\n"); - return err; - } - - err = device_property_read_u32(dev, "lantiq,rx-burst-length", &priv->rx_burst_len); - if (err < 0) { - dev_err(dev, "unable to read rx-burst-length property\n"); - return err; - } - /* bring up the dma engine and IP core */ err = xrx200_dma_init(priv); if (err) From 0b3f86397feebe00732ad3e04daefdacc483a7f0 Mon Sep 17 00:00:00 2001 From: Aleksander Jan Bajkowski Date: Tue, 26 Oct 2021 22:59:02 +0200 Subject: [PATCH 38/38] dt-bindings: net: lantiq-xrx200-net: Remove the burst length properties All SoCs with this IP core support 8 burst length. Hauke suggested to hardcode this value and simplify the driver. Link: https://lkml.org/lkml/2021/9/14/1533 Signed-off-by: Aleksander Jan Bajkowski Signed-off-by: David S. Miller --- .../bindings/net/lantiq,xrx200-net.yaml | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/Documentation/devicetree/bindings/net/lantiq,xrx200-net.yaml b/Documentation/devicetree/bindings/net/lantiq,xrx200-net.yaml index 16d831f22063..7bc074a42369 100644 --- a/Documentation/devicetree/bindings/net/lantiq,xrx200-net.yaml +++ b/Documentation/devicetree/bindings/net/lantiq,xrx200-net.yaml @@ -29,18 +29,6 @@ properties: - const: tx - const: rx - lantiq,tx-burst-length: - $ref: /schemas/types.yaml#/definitions/uint32 - description: | - TX programmable burst length. - enum: [2, 4, 8] - - lantiq,rx-burst-length: - $ref: /schemas/types.yaml#/definitions/uint32 - description: | - RX programmable burst length. - enum: [2, 4, 8] - '#address-cells': const: 1 @@ -53,8 +41,6 @@ required: - interrupt-parent - interrupts - interrupt-names - - lantiq,tx-burst-length - - lantiq,rx-burst-length - "#address-cells" - "#size-cells" @@ -70,6 +56,4 @@ examples: interrupt-parent = <&icu0>; interrupts = <73>, <72>; interrupt-names = "tx", "rx"; - lantiq,tx-burst-length = <8>; - lantiq,rx-burst-length = <8>; };