From 6fa4a6d20c168fa83810f03b1c43e0cec69bbdbc Mon Sep 17 00:00:00 2001 From: Xiang wangx Date: Sat, 4 Jun 2022 12:09:17 +0800 Subject: [PATCH 01/21] WAN: Fix syntax errors in comments Delete the redundant word 'the'. Signed-off-by: Xiang wangx Link: https://lore.kernel.org/r/20220604040917.8926-1-wangxiang@cdjrlc.com Signed-off-by: Paolo Abeni --- drivers/net/wan/farsync.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wan/farsync.h b/drivers/net/wan/farsync.h index 5f43568a9715..63908dbbb02d 100644 --- a/drivers/net/wan/farsync.h +++ b/drivers/net/wan/farsync.h @@ -43,7 +43,7 @@ * This version number is incremented with each official release of the * package and is a simplified number for normal user reference. * Individual files are tracked by the version control system and may - * have individual versions (or IDs) that move much faster than the + * have individual versions (or IDs) that move much faster than * the release version as individual updates are tracked. */ #define FST_USER_VERSION "1.04" From 12de1ebd2ae3999b5454b5d6efd0c7365ce61c41 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 4 Jun 2022 08:54:20 +0200 Subject: [PATCH 02/21] net: altera: Replace kernel.h with the necessary inclusions When kernel.h is used in the headers it adds a lot into dependency hell, especially when there are circular dependencies are involved. Replace kernel.h inclusion with the list of what is really being used. While at it, move these includes below the include guard. Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/18731e4f6430100d6500d6c4732ee028a729c085.1654325651.git.christophe.jaillet@wanadoo.fr Signed-off-by: Paolo Abeni --- drivers/net/ethernet/altera/altera_utils.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/altera/altera_utils.h b/drivers/net/ethernet/altera/altera_utils.h index b7d772f2dcbb..3c2e32fb7389 100644 --- a/drivers/net/ethernet/altera/altera_utils.h +++ b/drivers/net/ethernet/altera/altera_utils.h @@ -3,11 +3,12 @@ * Copyright (C) 2014 Altera Corporation. All rights reserved */ -#include - #ifndef __ALTERA_UTILS_H__ #define __ALTERA_UTILS_H__ +#include +#include + void tse_set_bit(void __iomem *ioaddr, size_t offs, u32 bit_mask); void tse_clear_bit(void __iomem *ioaddr, size_t offs, u32 bit_mask); int tse_bit_is_set(void __iomem *ioaddr, size_t offs, u32 bit_mask); From ff8372a467fa28752610f57a6512c5365413faa2 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Mon, 6 Jun 2022 10:24:34 +0800 Subject: [PATCH 03/21] net: skb: move enum skb_drop_reason to standalone header file As the skb drop reasons are getting more and more, move the enum 'skb_drop_reason' and related function to the standalone header 'dropreason.h', as Jakub Kicinski suggested. Signed-off-by: Menglong Dong Signed-off-by: Paolo Abeni --- include/linux/skbuff.h | 179 +------------------------------------ include/net/dropreason.h | 184 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 185 insertions(+), 178 deletions(-) create mode 100644 include/net/dropreason.h diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index d3d10556f0fa..82edf0359ab3 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -43,6 +43,7 @@ #include #endif #include +#include /** * DOC: skb checksums @@ -337,184 +338,6 @@ struct sk_buff_head { struct sk_buff; -/* The reason of skb drop, which is used in kfree_skb_reason(). - * en...maybe they should be splited by group? - * - * Each item here should also be in 'TRACE_SKB_DROP_REASON', which is - * used to translate the reason to string. - */ -enum skb_drop_reason { - SKB_NOT_DROPPED_YET = 0, - SKB_DROP_REASON_NOT_SPECIFIED, /* drop reason is not specified */ - SKB_DROP_REASON_NO_SOCKET, /* socket not found */ - SKB_DROP_REASON_PKT_TOO_SMALL, /* packet size is too small */ - SKB_DROP_REASON_TCP_CSUM, /* TCP checksum error */ - SKB_DROP_REASON_SOCKET_FILTER, /* dropped by socket filter */ - SKB_DROP_REASON_UDP_CSUM, /* UDP checksum error */ - SKB_DROP_REASON_NETFILTER_DROP, /* dropped by netfilter */ - SKB_DROP_REASON_OTHERHOST, /* packet don't belong to current - * host (interface is in promisc - * mode) - */ - SKB_DROP_REASON_IP_CSUM, /* IP checksum error */ - SKB_DROP_REASON_IP_INHDR, /* there is something wrong with - * IP header (see - * IPSTATS_MIB_INHDRERRORS) - */ - SKB_DROP_REASON_IP_RPFILTER, /* IP rpfilter validate failed. - * see the document for rp_filter - * in ip-sysctl.rst for more - * information - */ - SKB_DROP_REASON_UNICAST_IN_L2_MULTICAST, /* destination address of L2 - * is multicast, but L3 is - * unicast. - */ - SKB_DROP_REASON_XFRM_POLICY, /* xfrm policy check failed */ - SKB_DROP_REASON_IP_NOPROTO, /* no support for IP protocol */ - SKB_DROP_REASON_SOCKET_RCVBUFF, /* socket receive buff is full */ - SKB_DROP_REASON_PROTO_MEM, /* proto memory limition, such as - * udp packet drop out of - * udp_memory_allocated. - */ - SKB_DROP_REASON_TCP_MD5NOTFOUND, /* no MD5 hash and one - * expected, corresponding - * to LINUX_MIB_TCPMD5NOTFOUND - */ - SKB_DROP_REASON_TCP_MD5UNEXPECTED, /* MD5 hash and we're not - * expecting one, corresponding - * to LINUX_MIB_TCPMD5UNEXPECTED - */ - SKB_DROP_REASON_TCP_MD5FAILURE, /* MD5 hash and its wrong, - * corresponding to - * LINUX_MIB_TCPMD5FAILURE - */ - SKB_DROP_REASON_SOCKET_BACKLOG, /* failed to add skb to socket - * backlog (see - * LINUX_MIB_TCPBACKLOGDROP) - */ - SKB_DROP_REASON_TCP_FLAGS, /* TCP flags invalid */ - SKB_DROP_REASON_TCP_ZEROWINDOW, /* TCP receive window size is zero, - * see LINUX_MIB_TCPZEROWINDOWDROP - */ - SKB_DROP_REASON_TCP_OLD_DATA, /* the TCP data reveived is already - * received before (spurious retrans - * may happened), see - * LINUX_MIB_DELAYEDACKLOST - */ - SKB_DROP_REASON_TCP_OVERWINDOW, /* the TCP data is out of window, - * the seq of the first byte exceed - * the right edges of receive - * window - */ - SKB_DROP_REASON_TCP_OFOMERGE, /* the data of skb is already in - * the ofo queue, corresponding to - * LINUX_MIB_TCPOFOMERGE - */ - SKB_DROP_REASON_TCP_RFC7323_PAWS, /* PAWS check, corresponding to - * LINUX_MIB_PAWSESTABREJECTED - */ - SKB_DROP_REASON_TCP_INVALID_SEQUENCE, /* Not acceptable SEQ field */ - SKB_DROP_REASON_TCP_RESET, /* Invalid RST packet */ - SKB_DROP_REASON_TCP_INVALID_SYN, /* Incoming packet has unexpected SYN flag */ - SKB_DROP_REASON_TCP_CLOSE, /* TCP socket in CLOSE state */ - SKB_DROP_REASON_TCP_FASTOPEN, /* dropped by FASTOPEN request socket */ - SKB_DROP_REASON_TCP_OLD_ACK, /* TCP ACK is old, but in window */ - SKB_DROP_REASON_TCP_TOO_OLD_ACK, /* TCP ACK is too old */ - SKB_DROP_REASON_TCP_ACK_UNSENT_DATA, /* TCP ACK for data we haven't sent yet */ - SKB_DROP_REASON_TCP_OFO_QUEUE_PRUNE, /* pruned from TCP OFO queue */ - SKB_DROP_REASON_TCP_OFO_DROP, /* data already in receive queue */ - SKB_DROP_REASON_IP_OUTNOROUTES, /* route lookup failed */ - SKB_DROP_REASON_BPF_CGROUP_EGRESS, /* dropped by - * BPF_PROG_TYPE_CGROUP_SKB - * eBPF program - */ - SKB_DROP_REASON_IPV6DISABLED, /* IPv6 is disabled on the device */ - SKB_DROP_REASON_NEIGH_CREATEFAIL, /* failed to create neigh - * entry - */ - SKB_DROP_REASON_NEIGH_FAILED, /* neigh entry in failed state */ - SKB_DROP_REASON_NEIGH_QUEUEFULL, /* arp_queue for neigh - * entry is full - */ - SKB_DROP_REASON_NEIGH_DEAD, /* neigh entry is dead */ - SKB_DROP_REASON_TC_EGRESS, /* dropped in TC egress HOOK */ - SKB_DROP_REASON_QDISC_DROP, /* dropped by qdisc when packet - * outputting (failed to enqueue to - * current qdisc) - */ - SKB_DROP_REASON_CPU_BACKLOG, /* failed to enqueue the skb to - * the per CPU backlog queue. This - * can be caused by backlog queue - * full (see netdev_max_backlog in - * net.rst) or RPS flow limit - */ - SKB_DROP_REASON_XDP, /* dropped by XDP in input path */ - SKB_DROP_REASON_TC_INGRESS, /* dropped in TC ingress HOOK */ - SKB_DROP_REASON_UNHANDLED_PROTO, /* protocol not implemented - * or not supported - */ - SKB_DROP_REASON_SKB_CSUM, /* sk_buff checksum computation - * error - */ - SKB_DROP_REASON_SKB_GSO_SEG, /* gso segmentation error */ - SKB_DROP_REASON_SKB_UCOPY_FAULT, /* failed to copy data from - * user space, e.g., via - * zerocopy_sg_from_iter() - * or skb_orphan_frags_rx() - */ - SKB_DROP_REASON_DEV_HDR, /* device driver specific - * header/metadata is invalid - */ - /* the device is not ready to xmit/recv due to any of its data - * structure that is not up/ready/initialized, e.g., the IFF_UP is - * not set, or driver specific tun->tfiles[txq] is not initialized - */ - SKB_DROP_REASON_DEV_READY, - SKB_DROP_REASON_FULL_RING, /* ring buffer is full */ - SKB_DROP_REASON_NOMEM, /* error due to OOM */ - SKB_DROP_REASON_HDR_TRUNC, /* failed to trunc/extract the header - * from networking data, e.g., failed - * to pull the protocol header from - * frags via pskb_may_pull() - */ - SKB_DROP_REASON_TAP_FILTER, /* dropped by (ebpf) filter directly - * attached to tun/tap, e.g., via - * TUNSETFILTEREBPF - */ - SKB_DROP_REASON_TAP_TXFILTER, /* dropped by tx filter implemented - * at tun/tap, e.g., check_filter() - */ - SKB_DROP_REASON_ICMP_CSUM, /* ICMP checksum error */ - SKB_DROP_REASON_INVALID_PROTO, /* the packet doesn't follow RFC - * 2211, such as a broadcasts - * ICMP_TIMESTAMP - */ - SKB_DROP_REASON_IP_INADDRERRORS, /* host unreachable, corresponding - * to IPSTATS_MIB_INADDRERRORS - */ - SKB_DROP_REASON_IP_INNOROUTES, /* network unreachable, corresponding - * to IPSTATS_MIB_INADDRERRORS - */ - SKB_DROP_REASON_PKT_TOO_BIG, /* packet size is too big (maybe exceed - * the MTU) - */ - SKB_DROP_REASON_MAX, -}; - -#define SKB_DR_INIT(name, reason) \ - enum skb_drop_reason name = SKB_DROP_REASON_##reason -#define SKB_DR(name) \ - SKB_DR_INIT(name, NOT_SPECIFIED) -#define SKB_DR_SET(name, reason) \ - (name = SKB_DROP_REASON_##reason) -#define SKB_DR_OR(name, reason) \ - do { \ - if (name == SKB_DROP_REASON_NOT_SPECIFIED || \ - name == SKB_NOT_DROPPED_YET) \ - SKB_DR_SET(name, reason); \ - } while (0) - /* To allow 64K frame to be packed as single skb without frag_list we * require 64K/PAGE_SIZE pages plus 1 additional page to allow for * buffers which do not start on a page boundary. diff --git a/include/net/dropreason.h b/include/net/dropreason.h new file mode 100644 index 000000000000..ecd18b7b1364 --- /dev/null +++ b/include/net/dropreason.h @@ -0,0 +1,184 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#ifndef _LINUX_DROPREASON_H +#define _LINUX_DROPREASON_H + +/* The reason of skb drop, which is used in kfree_skb_reason(). + * en...maybe they should be splited by group? + * + * Each item here should also be in 'TRACE_SKB_DROP_REASON', which is + * used to translate the reason to string. + */ +enum skb_drop_reason { + SKB_NOT_DROPPED_YET = 0, + SKB_DROP_REASON_NOT_SPECIFIED, /* drop reason is not specified */ + SKB_DROP_REASON_NO_SOCKET, /* socket not found */ + SKB_DROP_REASON_PKT_TOO_SMALL, /* packet size is too small */ + SKB_DROP_REASON_TCP_CSUM, /* TCP checksum error */ + SKB_DROP_REASON_SOCKET_FILTER, /* dropped by socket filter */ + SKB_DROP_REASON_UDP_CSUM, /* UDP checksum error */ + SKB_DROP_REASON_NETFILTER_DROP, /* dropped by netfilter */ + SKB_DROP_REASON_OTHERHOST, /* packet don't belong to current + * host (interface is in promisc + * mode) + */ + SKB_DROP_REASON_IP_CSUM, /* IP checksum error */ + SKB_DROP_REASON_IP_INHDR, /* there is something wrong with + * IP header (see + * IPSTATS_MIB_INHDRERRORS) + */ + SKB_DROP_REASON_IP_RPFILTER, /* IP rpfilter validate failed. + * see the document for rp_filter + * in ip-sysctl.rst for more + * information + */ + SKB_DROP_REASON_UNICAST_IN_L2_MULTICAST, /* destination address of L2 + * is multicast, but L3 is + * unicast. + */ + SKB_DROP_REASON_XFRM_POLICY, /* xfrm policy check failed */ + SKB_DROP_REASON_IP_NOPROTO, /* no support for IP protocol */ + SKB_DROP_REASON_SOCKET_RCVBUFF, /* socket receive buff is full */ + SKB_DROP_REASON_PROTO_MEM, /* proto memory limition, such as + * udp packet drop out of + * udp_memory_allocated. + */ + SKB_DROP_REASON_TCP_MD5NOTFOUND, /* no MD5 hash and one + * expected, corresponding + * to LINUX_MIB_TCPMD5NOTFOUND + */ + SKB_DROP_REASON_TCP_MD5UNEXPECTED, /* MD5 hash and we're not + * expecting one, corresponding + * to LINUX_MIB_TCPMD5UNEXPECTED + */ + SKB_DROP_REASON_TCP_MD5FAILURE, /* MD5 hash and its wrong, + * corresponding to + * LINUX_MIB_TCPMD5FAILURE + */ + SKB_DROP_REASON_SOCKET_BACKLOG, /* failed to add skb to socket + * backlog (see + * LINUX_MIB_TCPBACKLOGDROP) + */ + SKB_DROP_REASON_TCP_FLAGS, /* TCP flags invalid */ + SKB_DROP_REASON_TCP_ZEROWINDOW, /* TCP receive window size is zero, + * see LINUX_MIB_TCPZEROWINDOWDROP + */ + SKB_DROP_REASON_TCP_OLD_DATA, /* the TCP data reveived is already + * received before (spurious retrans + * may happened), see + * LINUX_MIB_DELAYEDACKLOST + */ + SKB_DROP_REASON_TCP_OVERWINDOW, /* the TCP data is out of window, + * the seq of the first byte exceed + * the right edges of receive + * window + */ + SKB_DROP_REASON_TCP_OFOMERGE, /* the data of skb is already in + * the ofo queue, corresponding to + * LINUX_MIB_TCPOFOMERGE + */ + SKB_DROP_REASON_TCP_RFC7323_PAWS, /* PAWS check, corresponding to + * LINUX_MIB_PAWSESTABREJECTED + */ + SKB_DROP_REASON_TCP_INVALID_SEQUENCE, /* Not acceptable SEQ field */ + SKB_DROP_REASON_TCP_RESET, /* Invalid RST packet */ + SKB_DROP_REASON_TCP_INVALID_SYN, /* Incoming packet has unexpected SYN flag */ + SKB_DROP_REASON_TCP_CLOSE, /* TCP socket in CLOSE state */ + SKB_DROP_REASON_TCP_FASTOPEN, /* dropped by FASTOPEN request socket */ + SKB_DROP_REASON_TCP_OLD_ACK, /* TCP ACK is old, but in window */ + SKB_DROP_REASON_TCP_TOO_OLD_ACK, /* TCP ACK is too old */ + SKB_DROP_REASON_TCP_ACK_UNSENT_DATA, /* TCP ACK for data we haven't sent yet */ + SKB_DROP_REASON_TCP_OFO_QUEUE_PRUNE, /* pruned from TCP OFO queue */ + SKB_DROP_REASON_TCP_OFO_DROP, /* data already in receive queue */ + SKB_DROP_REASON_IP_OUTNOROUTES, /* route lookup failed */ + SKB_DROP_REASON_BPF_CGROUP_EGRESS, /* dropped by + * BPF_PROG_TYPE_CGROUP_SKB + * eBPF program + */ + SKB_DROP_REASON_IPV6DISABLED, /* IPv6 is disabled on the device */ + SKB_DROP_REASON_NEIGH_CREATEFAIL, /* failed to create neigh + * entry + */ + SKB_DROP_REASON_NEIGH_FAILED, /* neigh entry in failed state */ + SKB_DROP_REASON_NEIGH_QUEUEFULL, /* arp_queue for neigh + * entry is full + */ + SKB_DROP_REASON_NEIGH_DEAD, /* neigh entry is dead */ + SKB_DROP_REASON_TC_EGRESS, /* dropped in TC egress HOOK */ + SKB_DROP_REASON_QDISC_DROP, /* dropped by qdisc when packet + * outputting (failed to enqueue to + * current qdisc) + */ + SKB_DROP_REASON_CPU_BACKLOG, /* failed to enqueue the skb to + * the per CPU backlog queue. This + * can be caused by backlog queue + * full (see netdev_max_backlog in + * net.rst) or RPS flow limit + */ + SKB_DROP_REASON_XDP, /* dropped by XDP in input path */ + SKB_DROP_REASON_TC_INGRESS, /* dropped in TC ingress HOOK */ + SKB_DROP_REASON_UNHANDLED_PROTO, /* protocol not implemented + * or not supported + */ + SKB_DROP_REASON_SKB_CSUM, /* sk_buff checksum computation + * error + */ + SKB_DROP_REASON_SKB_GSO_SEG, /* gso segmentation error */ + SKB_DROP_REASON_SKB_UCOPY_FAULT, /* failed to copy data from + * user space, e.g., via + * zerocopy_sg_from_iter() + * or skb_orphan_frags_rx() + */ + SKB_DROP_REASON_DEV_HDR, /* device driver specific + * header/metadata is invalid + */ + /* the device is not ready to xmit/recv due to any of its data + * structure that is not up/ready/initialized, e.g., the IFF_UP is + * not set, or driver specific tun->tfiles[txq] is not initialized + */ + SKB_DROP_REASON_DEV_READY, + SKB_DROP_REASON_FULL_RING, /* ring buffer is full */ + SKB_DROP_REASON_NOMEM, /* error due to OOM */ + SKB_DROP_REASON_HDR_TRUNC, /* failed to trunc/extract the header + * from networking data, e.g., failed + * to pull the protocol header from + * frags via pskb_may_pull() + */ + SKB_DROP_REASON_TAP_FILTER, /* dropped by (ebpf) filter directly + * attached to tun/tap, e.g., via + * TUNSETFILTEREBPF + */ + SKB_DROP_REASON_TAP_TXFILTER, /* dropped by tx filter implemented + * at tun/tap, e.g., check_filter() + */ + SKB_DROP_REASON_ICMP_CSUM, /* ICMP checksum error */ + SKB_DROP_REASON_INVALID_PROTO, /* the packet doesn't follow RFC + * 2211, such as a broadcasts + * ICMP_TIMESTAMP + */ + SKB_DROP_REASON_IP_INADDRERRORS, /* host unreachable, corresponding + * to IPSTATS_MIB_INADDRERRORS + */ + SKB_DROP_REASON_IP_INNOROUTES, /* network unreachable, corresponding + * to IPSTATS_MIB_INADDRERRORS + */ + SKB_DROP_REASON_PKT_TOO_BIG, /* packet size is too big (maybe exceed + * the MTU) + */ + SKB_DROP_REASON_MAX, +}; + +#define SKB_DR_INIT(name, reason) \ + enum skb_drop_reason name = SKB_DROP_REASON_##reason +#define SKB_DR(name) \ + SKB_DR_INIT(name, NOT_SPECIFIED) +#define SKB_DR_SET(name, reason) \ + (name = SKB_DROP_REASON_##reason) +#define SKB_DR_OR(name, reason) \ + do { \ + if (name == SKB_DROP_REASON_NOT_SPECIFIED || \ + name == SKB_NOT_DROPPED_YET) \ + SKB_DR_SET(name, reason); \ + } while (0) + +#endif From ec43908dd556b2292f028c6e412261689405ba6e Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Mon, 6 Jun 2022 10:24:35 +0800 Subject: [PATCH 04/21] net: skb: use auto-generation to convert skb drop reason to string It is annoying to add new skb drop reasons to 'enum skb_drop_reason' and TRACE_SKB_DROP_REASON in trace/event/skb.h, and it's easy to forget to add the new reasons we added to TRACE_SKB_DROP_REASON. TRACE_SKB_DROP_REASON is used to convert drop reason of type number to string. For now, the string we passed to user space is exactly the same as the name in 'enum skb_drop_reason' with a 'SKB_DROP_REASON_' prefix. Therefore, we can use 'auto-generation' to generate these drop reasons to string at build time. The new source 'dropreason_str.c' will be auto generated during build time, which contains the string array 'const char * const drop_reasons[]'. Signed-off-by: Menglong Dong Signed-off-by: Paolo Abeni --- include/net/dropreason.h | 2 + include/trace/events/skb.h | 89 +------------------------------------- net/core/.gitignore | 1 + net/core/Makefile | 23 +++++++++- net/core/drop_monitor.c | 13 ------ net/core/skbuff.c | 3 ++ 6 files changed, 29 insertions(+), 102 deletions(-) create mode 100644 net/core/.gitignore diff --git a/include/net/dropreason.h b/include/net/dropreason.h index ecd18b7b1364..013ff0f2543e 100644 --- a/include/net/dropreason.h +++ b/include/net/dropreason.h @@ -181,4 +181,6 @@ enum skb_drop_reason { SKB_DR_SET(name, reason); \ } while (0) +extern const char * const drop_reasons[]; + #endif diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h index a477bf907498..45264e4bb254 100644 --- a/include/trace/events/skb.h +++ b/include/trace/events/skb.h @@ -9,92 +9,6 @@ #include #include -#define TRACE_SKB_DROP_REASON \ - EM(SKB_DROP_REASON_NOT_SPECIFIED, NOT_SPECIFIED) \ - EM(SKB_DROP_REASON_NO_SOCKET, NO_SOCKET) \ - EM(SKB_DROP_REASON_PKT_TOO_SMALL, PKT_TOO_SMALL) \ - EM(SKB_DROP_REASON_TCP_CSUM, TCP_CSUM) \ - EM(SKB_DROP_REASON_SOCKET_FILTER, SOCKET_FILTER) \ - EM(SKB_DROP_REASON_UDP_CSUM, UDP_CSUM) \ - EM(SKB_DROP_REASON_NETFILTER_DROP, NETFILTER_DROP) \ - EM(SKB_DROP_REASON_OTHERHOST, OTHERHOST) \ - EM(SKB_DROP_REASON_IP_CSUM, IP_CSUM) \ - EM(SKB_DROP_REASON_IP_INHDR, IP_INHDR) \ - EM(SKB_DROP_REASON_IP_RPFILTER, IP_RPFILTER) \ - EM(SKB_DROP_REASON_UNICAST_IN_L2_MULTICAST, \ - UNICAST_IN_L2_MULTICAST) \ - EM(SKB_DROP_REASON_XFRM_POLICY, XFRM_POLICY) \ - EM(SKB_DROP_REASON_IP_NOPROTO, IP_NOPROTO) \ - EM(SKB_DROP_REASON_SOCKET_RCVBUFF, SOCKET_RCVBUFF) \ - EM(SKB_DROP_REASON_PROTO_MEM, PROTO_MEM) \ - EM(SKB_DROP_REASON_TCP_MD5NOTFOUND, TCP_MD5NOTFOUND) \ - EM(SKB_DROP_REASON_TCP_MD5UNEXPECTED, \ - TCP_MD5UNEXPECTED) \ - EM(SKB_DROP_REASON_TCP_MD5FAILURE, TCP_MD5FAILURE) \ - EM(SKB_DROP_REASON_SOCKET_BACKLOG, SOCKET_BACKLOG) \ - EM(SKB_DROP_REASON_TCP_FLAGS, TCP_FLAGS) \ - EM(SKB_DROP_REASON_TCP_ZEROWINDOW, TCP_ZEROWINDOW) \ - EM(SKB_DROP_REASON_TCP_OLD_DATA, TCP_OLD_DATA) \ - EM(SKB_DROP_REASON_TCP_OVERWINDOW, TCP_OVERWINDOW) \ - EM(SKB_DROP_REASON_TCP_OFOMERGE, TCP_OFOMERGE) \ - EM(SKB_DROP_REASON_TCP_OFO_DROP, TCP_OFO_DROP) \ - EM(SKB_DROP_REASON_TCP_RFC7323_PAWS, TCP_RFC7323_PAWS) \ - EM(SKB_DROP_REASON_TCP_INVALID_SEQUENCE, \ - TCP_INVALID_SEQUENCE) \ - EM(SKB_DROP_REASON_TCP_RESET, TCP_RESET) \ - EM(SKB_DROP_REASON_TCP_INVALID_SYN, TCP_INVALID_SYN) \ - EM(SKB_DROP_REASON_TCP_CLOSE, TCP_CLOSE) \ - EM(SKB_DROP_REASON_TCP_FASTOPEN, TCP_FASTOPEN) \ - EM(SKB_DROP_REASON_TCP_OLD_ACK, TCP_OLD_ACK) \ - EM(SKB_DROP_REASON_TCP_TOO_OLD_ACK, TCP_TOO_OLD_ACK) \ - EM(SKB_DROP_REASON_TCP_ACK_UNSENT_DATA, \ - TCP_ACK_UNSENT_DATA) \ - EM(SKB_DROP_REASON_TCP_OFO_QUEUE_PRUNE, \ - TCP_OFO_QUEUE_PRUNE) \ - EM(SKB_DROP_REASON_IP_OUTNOROUTES, IP_OUTNOROUTES) \ - EM(SKB_DROP_REASON_BPF_CGROUP_EGRESS, \ - BPF_CGROUP_EGRESS) \ - EM(SKB_DROP_REASON_IPV6DISABLED, IPV6DISABLED) \ - EM(SKB_DROP_REASON_NEIGH_CREATEFAIL, NEIGH_CREATEFAIL) \ - EM(SKB_DROP_REASON_NEIGH_FAILED, NEIGH_FAILED) \ - EM(SKB_DROP_REASON_NEIGH_QUEUEFULL, NEIGH_QUEUEFULL) \ - EM(SKB_DROP_REASON_NEIGH_DEAD, NEIGH_DEAD) \ - EM(SKB_DROP_REASON_TC_EGRESS, TC_EGRESS) \ - EM(SKB_DROP_REASON_QDISC_DROP, QDISC_DROP) \ - EM(SKB_DROP_REASON_CPU_BACKLOG, CPU_BACKLOG) \ - EM(SKB_DROP_REASON_XDP, XDP) \ - EM(SKB_DROP_REASON_TC_INGRESS, TC_INGRESS) \ - EM(SKB_DROP_REASON_UNHANDLED_PROTO, UNHANDLED_PROTO) \ - EM(SKB_DROP_REASON_SKB_CSUM, SKB_CSUM) \ - EM(SKB_DROP_REASON_SKB_GSO_SEG, SKB_GSO_SEG) \ - EM(SKB_DROP_REASON_SKB_UCOPY_FAULT, SKB_UCOPY_FAULT) \ - EM(SKB_DROP_REASON_DEV_HDR, DEV_HDR) \ - EM(SKB_DROP_REASON_DEV_READY, DEV_READY) \ - EM(SKB_DROP_REASON_FULL_RING, FULL_RING) \ - EM(SKB_DROP_REASON_NOMEM, NOMEM) \ - EM(SKB_DROP_REASON_HDR_TRUNC, HDR_TRUNC) \ - EM(SKB_DROP_REASON_TAP_FILTER, TAP_FILTER) \ - EM(SKB_DROP_REASON_TAP_TXFILTER, TAP_TXFILTER) \ - EM(SKB_DROP_REASON_ICMP_CSUM, ICMP_CSUM) \ - EM(SKB_DROP_REASON_INVALID_PROTO, INVALID_PROTO) \ - EM(SKB_DROP_REASON_IP_INADDRERRORS, IP_INADDRERRORS) \ - EM(SKB_DROP_REASON_IP_INNOROUTES, IP_INNOROUTES) \ - EM(SKB_DROP_REASON_PKT_TOO_BIG, PKT_TOO_BIG) \ - EMe(SKB_DROP_REASON_MAX, MAX) - -#undef EM -#undef EMe - -#define EM(a, b) TRACE_DEFINE_ENUM(a); -#define EMe(a, b) TRACE_DEFINE_ENUM(a); - -TRACE_SKB_DROP_REASON - -#undef EM -#undef EMe -#define EM(a, b) { a, #b }, -#define EMe(a, b) { a, #b } - /* * Tracepoint for free an sk_buff: */ @@ -121,8 +35,7 @@ TRACE_EVENT(kfree_skb, TP_printk("skbaddr=%p protocol=%u location=%p reason: %s", __entry->skbaddr, __entry->protocol, __entry->location, - __print_symbolic(__entry->reason, - TRACE_SKB_DROP_REASON)) + drop_reasons[__entry->reason]) ); TRACE_EVENT(consume_skb, diff --git a/net/core/.gitignore b/net/core/.gitignore new file mode 100644 index 000000000000..df1e74372cce --- /dev/null +++ b/net/core/.gitignore @@ -0,0 +1 @@ +dropreason_str.c diff --git a/net/core/Makefile b/net/core/Makefile index a8e4f737692b..e8ce3bd283a6 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -4,7 +4,8 @@ # obj-y := sock.o request_sock.o skbuff.o datagram.o stream.o scm.o \ - gen_stats.o gen_estimator.o net_namespace.o secure_seq.o flow_dissector.o + gen_stats.o gen_estimator.o net_namespace.o secure_seq.o \ + flow_dissector.o dropreason_str.o obj-$(CONFIG_SYSCTL) += sysctl_net_core.o @@ -39,3 +40,23 @@ obj-$(CONFIG_NET_SOCK_MSG) += skmsg.o obj-$(CONFIG_BPF_SYSCALL) += sock_map.o obj-$(CONFIG_BPF_SYSCALL) += bpf_sk_storage.o obj-$(CONFIG_OF) += of_net.o + +clean-files := dropreason_str.c + +quiet_cmd_dropreason_str = GEN $@ +cmd_dropreason_str = awk -F ',' 'BEGIN{ print "\#include \n"; \ + print "const char * const drop_reasons[] = {" }\ + /^enum skb_drop/ { dr=1; }\ + /^\};/ { dr=0; }\ + /^\tSKB_DROP_REASON_/ {\ + if (dr) {\ + sub(/\tSKB_DROP_REASON_/, "", $$1);\ + printf "\t[SKB_DROP_REASON_%s] = \"%s\",\n", $$1, $$1;\ + }\ + }\ + END{ print "};" }' $< > $@ + +$(obj)/dropreason_str.c: $(srctree)/include/net/dropreason.h + $(call cmd,dropreason_str) + +$(obj)/dropreason_str.o: $(obj)/dropreason_str.c diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index 41cac0e4834e..4ad1decce724 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -48,19 +48,6 @@ static int trace_state = TRACE_OFF; static bool monitor_hw; -#undef EM -#undef EMe - -#define EM(a, b) [a] = #b, -#define EMe(a, b) [a] = #b - -/* drop_reasons is used to translate 'enum skb_drop_reason' to string, - * which is reported to user space. - */ -static const char * const drop_reasons[] = { - TRACE_SKB_DROP_REASON -}; - /* net_dm_mutex * * An overall lock guarding every operation coming from userspace. diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 5b3559cb1d82..b661040c100e 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -91,6 +91,9 @@ static struct kmem_cache *skbuff_ext_cache __ro_after_init; int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS; EXPORT_SYMBOL(sysctl_max_skb_frags); +/* The array 'drop_reasons' is auto-generated in dropreason_str.c */ +EXPORT_SYMBOL(drop_reasons); + /** * skb_panic - private function for out-of-line support * @skb: buffer From b160f7270e6df0f4ccadcf6750696b380eb3b811 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Mon, 6 Jun 2022 10:24:36 +0800 Subject: [PATCH 05/21] net: dropreason: reformat the comment fo skb drop reasons To make the code clear, reformat the comment in dropreason.h to k-doc style. Now, the comment can pass the check of kernel-doc without warnning: $ ./scripts/kernel-doc -v -none include/linux/dropreason.h include/linux/dropreason.h:7: info: Scanning doc for enum skb_drop_reason Signed-off-by: Menglong Dong Signed-off-by: Paolo Abeni --- include/net/dropreason.h | 382 +++++++++++++++++++++++---------------- 1 file changed, 226 insertions(+), 156 deletions(-) diff --git a/include/net/dropreason.h b/include/net/dropreason.h index 013ff0f2543e..fae9b40e54fa 100644 --- a/include/net/dropreason.h +++ b/include/net/dropreason.h @@ -3,168 +3,238 @@ #ifndef _LINUX_DROPREASON_H #define _LINUX_DROPREASON_H -/* The reason of skb drop, which is used in kfree_skb_reason(). - * en...maybe they should be splited by group? +/** + * enum skb_drop_reason - the reasons of skb drops * - * Each item here should also be in 'TRACE_SKB_DROP_REASON', which is - * used to translate the reason to string. + * The reason of skb drop, which is used in kfree_skb_reason(). */ enum skb_drop_reason { + /** + * @SKB_NOT_DROPPED_YET: skb is not dropped yet (used for no-drop case) + */ SKB_NOT_DROPPED_YET = 0, - SKB_DROP_REASON_NOT_SPECIFIED, /* drop reason is not specified */ - SKB_DROP_REASON_NO_SOCKET, /* socket not found */ - SKB_DROP_REASON_PKT_TOO_SMALL, /* packet size is too small */ - SKB_DROP_REASON_TCP_CSUM, /* TCP checksum error */ - SKB_DROP_REASON_SOCKET_FILTER, /* dropped by socket filter */ - SKB_DROP_REASON_UDP_CSUM, /* UDP checksum error */ - SKB_DROP_REASON_NETFILTER_DROP, /* dropped by netfilter */ - SKB_DROP_REASON_OTHERHOST, /* packet don't belong to current - * host (interface is in promisc - * mode) - */ - SKB_DROP_REASON_IP_CSUM, /* IP checksum error */ - SKB_DROP_REASON_IP_INHDR, /* there is something wrong with - * IP header (see - * IPSTATS_MIB_INHDRERRORS) - */ - SKB_DROP_REASON_IP_RPFILTER, /* IP rpfilter validate failed. - * see the document for rp_filter - * in ip-sysctl.rst for more - * information - */ - SKB_DROP_REASON_UNICAST_IN_L2_MULTICAST, /* destination address of L2 - * is multicast, but L3 is - * unicast. - */ - SKB_DROP_REASON_XFRM_POLICY, /* xfrm policy check failed */ - SKB_DROP_REASON_IP_NOPROTO, /* no support for IP protocol */ - SKB_DROP_REASON_SOCKET_RCVBUFF, /* socket receive buff is full */ - SKB_DROP_REASON_PROTO_MEM, /* proto memory limition, such as - * udp packet drop out of - * udp_memory_allocated. - */ - SKB_DROP_REASON_TCP_MD5NOTFOUND, /* no MD5 hash and one - * expected, corresponding - * to LINUX_MIB_TCPMD5NOTFOUND - */ - SKB_DROP_REASON_TCP_MD5UNEXPECTED, /* MD5 hash and we're not - * expecting one, corresponding - * to LINUX_MIB_TCPMD5UNEXPECTED - */ - SKB_DROP_REASON_TCP_MD5FAILURE, /* MD5 hash and its wrong, - * corresponding to - * LINUX_MIB_TCPMD5FAILURE - */ - SKB_DROP_REASON_SOCKET_BACKLOG, /* failed to add skb to socket - * backlog (see - * LINUX_MIB_TCPBACKLOGDROP) - */ - SKB_DROP_REASON_TCP_FLAGS, /* TCP flags invalid */ - SKB_DROP_REASON_TCP_ZEROWINDOW, /* TCP receive window size is zero, - * see LINUX_MIB_TCPZEROWINDOWDROP - */ - SKB_DROP_REASON_TCP_OLD_DATA, /* the TCP data reveived is already - * received before (spurious retrans - * may happened), see - * LINUX_MIB_DELAYEDACKLOST - */ - SKB_DROP_REASON_TCP_OVERWINDOW, /* the TCP data is out of window, - * the seq of the first byte exceed - * the right edges of receive - * window - */ - SKB_DROP_REASON_TCP_OFOMERGE, /* the data of skb is already in - * the ofo queue, corresponding to - * LINUX_MIB_TCPOFOMERGE - */ - SKB_DROP_REASON_TCP_RFC7323_PAWS, /* PAWS check, corresponding to - * LINUX_MIB_PAWSESTABREJECTED - */ - SKB_DROP_REASON_TCP_INVALID_SEQUENCE, /* Not acceptable SEQ field */ - SKB_DROP_REASON_TCP_RESET, /* Invalid RST packet */ - SKB_DROP_REASON_TCP_INVALID_SYN, /* Incoming packet has unexpected SYN flag */ - SKB_DROP_REASON_TCP_CLOSE, /* TCP socket in CLOSE state */ - SKB_DROP_REASON_TCP_FASTOPEN, /* dropped by FASTOPEN request socket */ - SKB_DROP_REASON_TCP_OLD_ACK, /* TCP ACK is old, but in window */ - SKB_DROP_REASON_TCP_TOO_OLD_ACK, /* TCP ACK is too old */ - SKB_DROP_REASON_TCP_ACK_UNSENT_DATA, /* TCP ACK for data we haven't sent yet */ - SKB_DROP_REASON_TCP_OFO_QUEUE_PRUNE, /* pruned from TCP OFO queue */ - SKB_DROP_REASON_TCP_OFO_DROP, /* data already in receive queue */ - SKB_DROP_REASON_IP_OUTNOROUTES, /* route lookup failed */ - SKB_DROP_REASON_BPF_CGROUP_EGRESS, /* dropped by - * BPF_PROG_TYPE_CGROUP_SKB - * eBPF program - */ - SKB_DROP_REASON_IPV6DISABLED, /* IPv6 is disabled on the device */ - SKB_DROP_REASON_NEIGH_CREATEFAIL, /* failed to create neigh - * entry - */ - SKB_DROP_REASON_NEIGH_FAILED, /* neigh entry in failed state */ - SKB_DROP_REASON_NEIGH_QUEUEFULL, /* arp_queue for neigh - * entry is full - */ - SKB_DROP_REASON_NEIGH_DEAD, /* neigh entry is dead */ - SKB_DROP_REASON_TC_EGRESS, /* dropped in TC egress HOOK */ - SKB_DROP_REASON_QDISC_DROP, /* dropped by qdisc when packet - * outputting (failed to enqueue to - * current qdisc) - */ - SKB_DROP_REASON_CPU_BACKLOG, /* failed to enqueue the skb to - * the per CPU backlog queue. This - * can be caused by backlog queue - * full (see netdev_max_backlog in - * net.rst) or RPS flow limit - */ - SKB_DROP_REASON_XDP, /* dropped by XDP in input path */ - SKB_DROP_REASON_TC_INGRESS, /* dropped in TC ingress HOOK */ - SKB_DROP_REASON_UNHANDLED_PROTO, /* protocol not implemented - * or not supported - */ - SKB_DROP_REASON_SKB_CSUM, /* sk_buff checksum computation - * error - */ - SKB_DROP_REASON_SKB_GSO_SEG, /* gso segmentation error */ - SKB_DROP_REASON_SKB_UCOPY_FAULT, /* failed to copy data from - * user space, e.g., via - * zerocopy_sg_from_iter() - * or skb_orphan_frags_rx() - */ - SKB_DROP_REASON_DEV_HDR, /* device driver specific - * header/metadata is invalid - */ - /* the device is not ready to xmit/recv due to any of its data - * structure that is not up/ready/initialized, e.g., the IFF_UP is - * not set, or driver specific tun->tfiles[txq] is not initialized + /** @SKB_DROP_REASON_NOT_SPECIFIED: drop reason is not specified */ + SKB_DROP_REASON_NOT_SPECIFIED, + /** @SKB_DROP_REASON_NO_SOCKET: socket not found */ + SKB_DROP_REASON_NO_SOCKET, + /** @SKB_DROP_REASON_PKT_TOO_SMALL: packet size is too small */ + SKB_DROP_REASON_PKT_TOO_SMALL, + /** @SKB_DROP_REASON_TCP_CSUM: TCP checksum error */ + SKB_DROP_REASON_TCP_CSUM, + /** @SKB_DROP_REASON_SOCKET_FILTER: dropped by socket filter */ + SKB_DROP_REASON_SOCKET_FILTER, + /** @SKB_DROP_REASON_UDP_CSUM: UDP checksum error */ + SKB_DROP_REASON_UDP_CSUM, + /** @SKB_DROP_REASON_NETFILTER_DROP: dropped by netfilter */ + SKB_DROP_REASON_NETFILTER_DROP, + /** + * @SKB_DROP_REASON_OTHERHOST: packet don't belong to current host + * (interface is in promisc mode) + */ + SKB_DROP_REASON_OTHERHOST, + /** @SKB_DROP_REASON_IP_CSUM: IP checksum error */ + SKB_DROP_REASON_IP_CSUM, + /** + * @SKB_DROP_REASON_IP_INHDR: there is something wrong with IP header (see + * IPSTATS_MIB_INHDRERRORS) + */ + SKB_DROP_REASON_IP_INHDR, + /** + * @SKB_DROP_REASON_IP_RPFILTER: IP rpfilter validate failed. see the + * document for rp_filter in ip-sysctl.rst for more information + */ + SKB_DROP_REASON_IP_RPFILTER, + /** + * @SKB_DROP_REASON_UNICAST_IN_L2_MULTICAST: destination address of L2 is + * multicast, but L3 is unicast. + */ + SKB_DROP_REASON_UNICAST_IN_L2_MULTICAST, + /** @SKB_DROP_REASON_XFRM_POLICY: xfrm policy check failed */ + SKB_DROP_REASON_XFRM_POLICY, + /** @SKB_DROP_REASON_IP_NOPROTO: no support for IP protocol */ + SKB_DROP_REASON_IP_NOPROTO, + /** @SKB_DROP_REASON_SOCKET_RCVBUFF: socket receive buff is full */ + SKB_DROP_REASON_SOCKET_RCVBUFF, + /** + * @SKB_DROP_REASON_PROTO_MEM: proto memory limition, such as udp packet + * drop out of udp_memory_allocated. + */ + SKB_DROP_REASON_PROTO_MEM, + /** + * @SKB_DROP_REASON_TCP_MD5NOTFOUND: no MD5 hash and one expected, + * corresponding to LINUX_MIB_TCPMD5NOTFOUND + */ + SKB_DROP_REASON_TCP_MD5NOTFOUND, + /** + * @SKB_DROP_REASON_TCP_MD5UNEXPECTED: MD5 hash and we're not expecting + * one, corresponding to LINUX_MIB_TCPMD5UNEXPECTED + */ + SKB_DROP_REASON_TCP_MD5UNEXPECTED, + /** + * @SKB_DROP_REASON_TCP_MD5FAILURE: MD5 hash and its wrong, corresponding + * to LINUX_MIB_TCPMD5FAILURE + */ + SKB_DROP_REASON_TCP_MD5FAILURE, + /** + * @SKB_DROP_REASON_SOCKET_BACKLOG: failed to add skb to socket backlog ( + * see LINUX_MIB_TCPBACKLOGDROP) + */ + SKB_DROP_REASON_SOCKET_BACKLOG, + /** @SKB_DROP_REASON_TCP_FLAGS: TCP flags invalid */ + SKB_DROP_REASON_TCP_FLAGS, + /** + * @SKB_DROP_REASON_TCP_ZEROWINDOW: TCP receive window size is zero, + * see LINUX_MIB_TCPZEROWINDOWDROP + */ + SKB_DROP_REASON_TCP_ZEROWINDOW, + /** + * @SKB_DROP_REASON_TCP_OLD_DATA: the TCP data reveived is already + * received before (spurious retrans may happened), see + * LINUX_MIB_DELAYEDACKLOST + */ + SKB_DROP_REASON_TCP_OLD_DATA, + /** + * @SKB_DROP_REASON_TCP_OVERWINDOW: the TCP data is out of window, + * the seq of the first byte exceed the right edges of receive + * window + */ + SKB_DROP_REASON_TCP_OVERWINDOW, + /** + * @SKB_DROP_REASON_TCP_OFOMERGE: the data of skb is already in the ofo + * queue, corresponding to LINUX_MIB_TCPOFOMERGE + */ + SKB_DROP_REASON_TCP_OFOMERGE, + /** + * @SKB_DROP_REASON_TCP_RFC7323_PAWS: PAWS check, corresponding to + * LINUX_MIB_PAWSESTABREJECTED + */ + SKB_DROP_REASON_TCP_RFC7323_PAWS, + /** @SKB_DROP_REASON_TCP_INVALID_SEQUENCE: Not acceptable SEQ field */ + SKB_DROP_REASON_TCP_INVALID_SEQUENCE, + /** @SKB_DROP_REASON_TCP_RESET: Invalid RST packet */ + SKB_DROP_REASON_TCP_RESET, + /** + * @SKB_DROP_REASON_TCP_INVALID_SYN: Incoming packet has unexpected + * SYN flag + */ + SKB_DROP_REASON_TCP_INVALID_SYN, + /** @SKB_DROP_REASON_TCP_CLOSE: TCP socket in CLOSE state */ + SKB_DROP_REASON_TCP_CLOSE, + /** @SKB_DROP_REASON_TCP_FASTOPEN: dropped by FASTOPEN request socket */ + SKB_DROP_REASON_TCP_FASTOPEN, + /** @SKB_DROP_REASON_TCP_OLD_ACK: TCP ACK is old, but in window */ + SKB_DROP_REASON_TCP_OLD_ACK, + /** @SKB_DROP_REASON_TCP_TOO_OLD_ACK: TCP ACK is too old */ + SKB_DROP_REASON_TCP_TOO_OLD_ACK, + /** + * @SKB_DROP_REASON_TCP_ACK_UNSENT_DATA: TCP ACK for data we haven't + * sent yet + */ + SKB_DROP_REASON_TCP_ACK_UNSENT_DATA, + /** @SKB_DROP_REASON_TCP_OFO_QUEUE_PRUNE: pruned from TCP OFO queue */ + SKB_DROP_REASON_TCP_OFO_QUEUE_PRUNE, + /** @SKB_DROP_REASON_TCP_OFO_DROP: data already in receive queue */ + SKB_DROP_REASON_TCP_OFO_DROP, + /** @SKB_DROP_REASON_IP_OUTNOROUTES: route lookup failed */ + SKB_DROP_REASON_IP_OUTNOROUTES, + /** + * @SKB_DROP_REASON_BPF_CGROUP_EGRESS: dropped by BPF_PROG_TYPE_CGROUP_SKB + * eBPF program + */ + SKB_DROP_REASON_BPF_CGROUP_EGRESS, + /** @SKB_DROP_REASON_IPV6DISABLED: IPv6 is disabled on the device */ + SKB_DROP_REASON_IPV6DISABLED, + /** @SKB_DROP_REASON_NEIGH_CREATEFAIL: failed to create neigh entry */ + SKB_DROP_REASON_NEIGH_CREATEFAIL, + /** @SKB_DROP_REASON_NEIGH_FAILED: neigh entry in failed state */ + SKB_DROP_REASON_NEIGH_FAILED, + /** @SKB_DROP_REASON_NEIGH_QUEUEFULL: arp_queue for neigh entry is full */ + SKB_DROP_REASON_NEIGH_QUEUEFULL, + /** @SKB_DROP_REASON_NEIGH_DEAD: neigh entry is dead */ + SKB_DROP_REASON_NEIGH_DEAD, + /** @SKB_DROP_REASON_TC_EGRESS: dropped in TC egress HOOK */ + SKB_DROP_REASON_TC_EGRESS, + /** + * @SKB_DROP_REASON_QDISC_DROP: dropped by qdisc when packet outputting ( + * failed to enqueue to current qdisc) + */ + SKB_DROP_REASON_QDISC_DROP, + /** + * @SKB_DROP_REASON_CPU_BACKLOG: failed to enqueue the skb to the per CPU + * backlog queue. This can be caused by backlog queue full (see + * netdev_max_backlog in net.rst) or RPS flow limit + */ + SKB_DROP_REASON_CPU_BACKLOG, + /** @SKB_DROP_REASON_XDP: dropped by XDP in input path */ + SKB_DROP_REASON_XDP, + /** @SKB_DROP_REASON_TC_INGRESS: dropped in TC ingress HOOK */ + SKB_DROP_REASON_TC_INGRESS, + /** @SKB_DROP_REASON_UNHANDLED_PROTO: protocol not implemented or not supported */ + SKB_DROP_REASON_UNHANDLED_PROTO, + /** @SKB_DROP_REASON_SKB_CSUM: sk_buff checksum computation error */ + SKB_DROP_REASON_SKB_CSUM, + /** @SKB_DROP_REASON_SKB_GSO_SEG: gso segmentation error */ + SKB_DROP_REASON_SKB_GSO_SEG, + /** + * @SKB_DROP_REASON_SKB_UCOPY_FAULT: failed to copy data from user space, + * e.g., via zerocopy_sg_from_iter() or skb_orphan_frags_rx() + */ + SKB_DROP_REASON_SKB_UCOPY_FAULT, + /** @SKB_DROP_REASON_DEV_HDR: device driver specific header/metadata is invalid */ + SKB_DROP_REASON_DEV_HDR, + /** + * @SKB_DROP_REASON_DEV_READY: the device is not ready to xmit/recv due to + * any of its data structure that is not up/ready/initialized, + * e.g., the IFF_UP is not set, or driver specific tun->tfiles[txq] + * is not initialized */ SKB_DROP_REASON_DEV_READY, - SKB_DROP_REASON_FULL_RING, /* ring buffer is full */ - SKB_DROP_REASON_NOMEM, /* error due to OOM */ - SKB_DROP_REASON_HDR_TRUNC, /* failed to trunc/extract the header - * from networking data, e.g., failed - * to pull the protocol header from - * frags via pskb_may_pull() - */ - SKB_DROP_REASON_TAP_FILTER, /* dropped by (ebpf) filter directly - * attached to tun/tap, e.g., via - * TUNSETFILTEREBPF - */ - SKB_DROP_REASON_TAP_TXFILTER, /* dropped by tx filter implemented - * at tun/tap, e.g., check_filter() - */ - SKB_DROP_REASON_ICMP_CSUM, /* ICMP checksum error */ - SKB_DROP_REASON_INVALID_PROTO, /* the packet doesn't follow RFC - * 2211, such as a broadcasts - * ICMP_TIMESTAMP - */ - SKB_DROP_REASON_IP_INADDRERRORS, /* host unreachable, corresponding - * to IPSTATS_MIB_INADDRERRORS - */ - SKB_DROP_REASON_IP_INNOROUTES, /* network unreachable, corresponding - * to IPSTATS_MIB_INADDRERRORS - */ - SKB_DROP_REASON_PKT_TOO_BIG, /* packet size is too big (maybe exceed - * the MTU) - */ + /** @SKB_DROP_REASON_FULL_RING: ring buffer is full */ + SKB_DROP_REASON_FULL_RING, + /** @SKB_DROP_REASON_NOMEM: error due to OOM */ + SKB_DROP_REASON_NOMEM, + /** + * @SKB_DROP_REASON_HDR_TRUNC: failed to trunc/extract the header from + * networking data, e.g., failed to pull the protocol header from + * frags via pskb_may_pull() + */ + SKB_DROP_REASON_HDR_TRUNC, + /** + * @SKB_DROP_REASON_TAP_FILTER: dropped by (ebpf) filter directly attached + * to tun/tap, e.g., via TUNSETFILTEREBPF + */ + SKB_DROP_REASON_TAP_FILTER, + /** + * @SKB_DROP_REASON_TAP_TXFILTER: dropped by tx filter implemented at + * tun/tap, e.g., check_filter() + */ + SKB_DROP_REASON_TAP_TXFILTER, + /** @SKB_DROP_REASON_ICMP_CSUM: ICMP checksum error */ + SKB_DROP_REASON_ICMP_CSUM, + /** + * @SKB_DROP_REASON_INVALID_PROTO: the packet doesn't follow RFC 2211, + * such as a broadcasts ICMP_TIMESTAMP + */ + SKB_DROP_REASON_INVALID_PROTO, + /** + * @SKB_DROP_REASON_IP_INADDRERRORS: host unreachable, corresponding to + * IPSTATS_MIB_INADDRERRORS + */ + SKB_DROP_REASON_IP_INADDRERRORS, + /** + * @SKB_DROP_REASON_IP_INNOROUTES: network unreachable, corresponding to + * IPSTATS_MIB_INADDRERRORS + */ + SKB_DROP_REASON_IP_INNOROUTES, + /** + * @SKB_DROP_REASON_PKT_TOO_BIG: packet size is too big (maybe exceed the + * MTU) + */ + SKB_DROP_REASON_PKT_TOO_BIG, + /** + * @SKB_DROP_REASON_MAX: the maximum of drop reason, which shouldn't be + * used as a real 'reason' + */ SKB_DROP_REASON_MAX, }; From c87c938f62d8f1f7c24620859d67f2e3eca23afc Mon Sep 17 00:00:00 2001 From: Mateusz Palczewski Date: Mon, 11 Apr 2022 14:07:14 +0200 Subject: [PATCH 06/21] i40e: Add VF VLAN pruning VFs by default are able to see all tagged traffic regardless of trust and VLAN filters configured. Add new private flag vf-vlan-pruning that allows changing of default VF behavior for tagged traffic. When the flag is turned on untrusted VF will only be able to receive untagged traffic or traffic with VLAN tags it has created interfaces for The flag is off by default and can only be changed if there are no VFs spawned on the PF. This flag will only be effective when no PVID is set on VF and VF is not trusted. Add new function that computes the correct VLAN ID for VF VLAN filters based on trust, PVID, vf-vlan-prune-disable flag and current VLAN ID. Testing Hints: Test 1: vf-vlan-pruning == off ============================== 1. Set the private flag > ethtool --set-priv-flag eth0 vf-vlan-pruning off (default setting) 2. Use scapy to send any VLAN tagged traffic and make sure the VF receives all VLAN tagged traffic that matches its destination MAC filters (unicast, multicast, and broadcast). Test 2: vf-vlan-pruning == on ============================== 1. Set the private flag > ethtool --set-priv-flag eth0 vf-vlan-pruning on 2. Use scapy to send any VLAN tagged traffic and make sure the VF does not receive any VLAN tagged traffic that matches its destination MAC filters (unicast, multicast, and broadcast). 3. Add a VLAN filter on the VF netdev > ip link add link eth0v0 name vlan10 type vlan id 10 4. Bring the VLAN netdev up > ip link set vlan10 up 4. Use scapy to send traffic with VLAN 10, VLAN 11 (anything not VLAN 10), and untagged traffic. Make sure the VF only receives VLAN 10 and untagged traffic when the link partner is sending. Test 3: vf-vlan-pruning == off && VF is in a port VLAN ============================== 1. Set the private flag > ethtool --set-priv-flag eth0 vf-vlan-pruning off (default setting) 2. Create a VF > echo 1 > sriov_numvfs 3. Put the VF in a port VLAN > ip link set eth0 vf 0 vlan 10 4. Use scapy to send traffic with VLAN 10 and VLAN 11 (anything not VLAN 10) and make sure the VF only receives untagged traffic when the link partner is sending VLAN 10 tagged traffic as the VLAN tag is expected to be stripped by HW for port VLANs and not visible to the VF. Test 4: Change vf-vlan-pruning while VFs are created ============================== echo 0 > sriov_numvfs ethtool --set-priv-flag eth0 vf-vlan-pruning off echo 1 > sriov_numvfs ethtool --set-priv-flag eth0 vf-vlan-pruning on (expect failure) Signed-off-by: Sylwester Dziedziuch Signed-off-by: Przemyslaw Patynowski Signed-off-by: Mateusz Palczewski Tested-by: Konrad Jankowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e.h | 1 + .../net/ethernet/intel/i40e/i40e_ethtool.c | 9 ++ drivers/net/ethernet/intel/i40e/i40e_main.c | 135 +++++++++++++++++- .../ethernet/intel/i40e/i40e_virtchnl_pf.c | 8 +- 4 files changed, 147 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 18558a019353..57f4ec4f8d2f 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -565,6 +565,7 @@ struct i40e_pf { #define I40E_FLAG_DISABLE_FW_LLDP BIT(24) #define I40E_FLAG_RS_FEC BIT(25) #define I40E_FLAG_BASE_R_FEC BIT(26) +#define I40E_FLAG_VF_VLAN_PRUNING BIT(27) /* TOTAL_PORT_SHUTDOWN * Allows to physically disable the link on the NIC's port. * If enabled, (after link down request from the OS) diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 610f00cbaff9..c65e9e2dcb42 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -457,6 +457,8 @@ static const struct i40e_priv_flags i40e_gstrings_priv_flags[] = { I40E_PRIV_FLAG("disable-fw-lldp", I40E_FLAG_DISABLE_FW_LLDP, 0), I40E_PRIV_FLAG("rs-fec", I40E_FLAG_RS_FEC, 0), I40E_PRIV_FLAG("base-r-fec", I40E_FLAG_BASE_R_FEC, 0), + I40E_PRIV_FLAG("vf-vlan-pruning", + I40E_FLAG_VF_VLAN_PRUNING, 0), }; #define I40E_PRIV_FLAGS_STR_LEN ARRAY_SIZE(i40e_gstrings_priv_flags) @@ -5285,6 +5287,13 @@ flags_complete: return -EOPNOTSUPP; } + if ((changed_flags & I40E_FLAG_VF_VLAN_PRUNING) && + pf->num_alloc_vfs) { + dev_warn(&pf->pdev->dev, + "Changing vf-vlan-pruning flag while VF(s) are active is not supported\n"); + return -EOPNOTSUPP; + } + if ((changed_flags & new_flags & I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED) && (new_flags & I40E_FLAG_MFP_ENABLED)) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 332a608dbaa6..1599ac538e7f 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -1368,6 +1368,114 @@ static int i40e_correct_mac_vlan_filters(struct i40e_vsi *vsi, return 0; } +/** + * i40e_get_vf_new_vlan - Get new vlan id on a vf + * @vsi: the vsi to configure + * @new_mac: new mac filter to be added + * @f: existing mac filter, replaced with new_mac->f if new_mac is not NULL + * @vlan_filters: the number of active VLAN filters + * @trusted: flag if the VF is trusted + * + * Get new VLAN id based on current VLAN filters, trust, PVID + * and vf-vlan-prune-disable flag. + * + * Returns the value of the new vlan filter or + * the old value if no new filter is needed. + */ +static s16 i40e_get_vf_new_vlan(struct i40e_vsi *vsi, + struct i40e_new_mac_filter *new_mac, + struct i40e_mac_filter *f, + int vlan_filters, + bool trusted) +{ + s16 pvid = le16_to_cpu(vsi->info.pvid); + struct i40e_pf *pf = vsi->back; + bool is_any; + + if (new_mac) + f = new_mac->f; + + if (pvid && f->vlan != pvid) + return pvid; + + is_any = (trusted || + !(pf->flags & I40E_FLAG_VF_VLAN_PRUNING)); + + if ((vlan_filters && f->vlan == I40E_VLAN_ANY) || + (!is_any && !vlan_filters && f->vlan == I40E_VLAN_ANY) || + (is_any && !vlan_filters && f->vlan == 0)) { + if (is_any) + return I40E_VLAN_ANY; + else + return 0; + } + + return f->vlan; +} + +/** + * i40e_correct_vf_mac_vlan_filters - Correct non-VLAN VF filters if necessary + * @vsi: the vsi to configure + * @tmp_add_list: list of filters ready to be added + * @tmp_del_list: list of filters ready to be deleted + * @vlan_filters: the number of active VLAN filters + * @trusted: flag if the VF is trusted + * + * Correct VF VLAN filters based on current VLAN filters, trust, PVID + * and vf-vlan-prune-disable flag. + * + * In case of memory allocation failure return -ENOMEM. Otherwise, return 0. + * + * This function is only expected to be called from within + * i40e_sync_vsi_filters. + * + * NOTE: This function expects to be called while under the + * mac_filter_hash_lock + */ +static int i40e_correct_vf_mac_vlan_filters(struct i40e_vsi *vsi, + struct hlist_head *tmp_add_list, + struct hlist_head *tmp_del_list, + int vlan_filters, + bool trusted) +{ + struct i40e_mac_filter *f, *add_head; + struct i40e_new_mac_filter *new_mac; + struct hlist_node *h; + int bkt, new_vlan; + + hlist_for_each_entry(new_mac, tmp_add_list, hlist) { + new_mac->f->vlan = i40e_get_vf_new_vlan(vsi, new_mac, NULL, + vlan_filters, trusted); + } + + hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) { + new_vlan = i40e_get_vf_new_vlan(vsi, NULL, f, vlan_filters, + trusted); + if (new_vlan != f->vlan) { + add_head = i40e_add_filter(vsi, f->macaddr, new_vlan); + if (!add_head) + return -ENOMEM; + /* Create a temporary i40e_new_mac_filter */ + new_mac = kzalloc(sizeof(*new_mac), GFP_ATOMIC); + if (!new_mac) + return -ENOMEM; + new_mac->f = add_head; + new_mac->state = add_head->state; + + /* Add the new filter to the tmp list */ + hlist_add_head(&new_mac->hlist, tmp_add_list); + + /* Put the original filter into the delete list */ + f->state = I40E_FILTER_REMOVE; + hash_del(&f->hlist); + hlist_add_head(&f->hlist, tmp_del_list); + } + } + + vsi->has_vlan_filter = !!vlan_filters; + return 0; +} + /** * i40e_rm_default_mac_filter - Remove the default MAC filter set by NVM * @vsi: the PF Main VSI - inappropriate for any other VSI @@ -2423,10 +2531,14 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) vlan_filters++; } - retval = i40e_correct_mac_vlan_filters(vsi, - &tmp_add_list, - &tmp_del_list, - vlan_filters); + if (vsi->type != I40E_VSI_SRIOV) + retval = i40e_correct_mac_vlan_filters + (vsi, &tmp_add_list, &tmp_del_list, + vlan_filters); + else + retval = i40e_correct_vf_mac_vlan_filters + (vsi, &tmp_add_list, &tmp_del_list, + vlan_filters, pf->vf[vsi->vf_id].trusted); hlist_for_each_entry(new, &tmp_add_list, hlist) netdev_hw_addr_refcnt(new->f, vsi->netdev, 1); @@ -2855,8 +2967,21 @@ int i40e_add_vlan_all_mac(struct i40e_vsi *vsi, s16 vid) int bkt; hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) { - if (f->state == I40E_FILTER_REMOVE) + /* If we're asked to add a filter that has been marked for + * removal, it is safe to simply restore it to active state. + * __i40e_del_filter will have simply deleted any filters which + * were previously marked NEW or FAILED, so if it is currently + * marked REMOVE it must have previously been ACTIVE. Since we + * haven't yet run the sync filters task, just restore this + * filter to the ACTIVE state so that the sync task leaves it + * in place. + */ + if (f->state == I40E_FILTER_REMOVE && f->vlan == vid) { + f->state = I40E_FILTER_ACTIVE; continue; + } else if (f->state == I40E_FILTER_REMOVE) { + continue; + } add_f = i40e_add_filter(vsi, f->macaddr, vid); if (!add_f) { dev_info(&vsi->back->pdev->dev, diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 2606e8f0f19b..9949469333d5 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -4349,6 +4349,7 @@ int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, int vf_id, /* duplicate request, so just return success */ goto error_pvid; + i40e_vlan_stripping_enable(vsi); i40e_vc_reset_vf(vf, true); /* During reset the VF got a new VSI, so refresh a pointer. */ vsi = pf->vsi[vf->lan_vsi_idx]; @@ -4364,7 +4365,7 @@ int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, int vf_id, * MAC addresses deleted. */ if ((!(vlan_id || qos) || - vlanprio != le16_to_cpu(vsi->info.pvid)) && + vlanprio != le16_to_cpu(vsi->info.pvid)) && vsi->info.pvid) { ret = i40e_add_vlan_all_mac(vsi, I40E_VLAN_ANY); if (ret) { @@ -4727,6 +4728,11 @@ int i40e_ndo_set_vf_trust(struct net_device *netdev, int vf_id, bool setting) goto out; vf->trusted = setting; + + /* request PF to sync mac/vlan filters for the VF */ + set_bit(__I40E_MACVLAN_SYNC_PENDING, pf->state); + pf->vsi[vf->lan_vsi_idx]->flags |= I40E_VSI_FLAG_FILTER_CHANGED; + i40e_vc_reset_vf(vf, true); dev_info(&pf->pdev->dev, "VF %u is now %strusted\n", vf_id, setting ? "" : "un"); From 35a2443d0910fdd6ce29d4f724447ad7029e8f23 Mon Sep 17 00:00:00 2001 From: Mateusz Palczewski Date: Fri, 20 May 2022 12:07:13 +0200 Subject: [PATCH 07/21] iavf: Add waiting for response from PF in set mac Make iavf_set_mac synchronous by waiting for a response from a PF. Without this iavf_set_mac is always returning success even though set_mac can be rejected by a PF. This ensures that when set_mac exits netdev MAC is updated. This is needed for sending ARPs with correct MAC after changing VF's MAC. This is also needed by bonding module. Signed-off-by: Sylwester Dziedziuch Signed-off-by: Mateusz Palczewski Tested-by: Konrad Jankowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/iavf/iavf.h | 7 +- drivers/net/ethernet/intel/iavf/iavf_main.c | 127 +++++++++++++++--- .../net/ethernet/intel/iavf/iavf_virtchnl.c | 61 ++++++++- 3 files changed, 174 insertions(+), 21 deletions(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h index 49aed3e506a6..fda1198d2c00 100644 --- a/drivers/net/ethernet/intel/iavf/iavf.h +++ b/drivers/net/ethernet/intel/iavf/iavf.h @@ -146,7 +146,8 @@ struct iavf_mac_filter { u8 remove:1; /* filter needs to be removed */ u8 add:1; /* filter needs to be added */ u8 is_primary:1; /* filter is a default VF MAC */ - u8 padding:4; + u8 add_handled:1; /* received response for filter add */ + u8 padding:3; }; }; @@ -248,6 +249,7 @@ struct iavf_adapter { struct work_struct adminq_task; struct delayed_work client_task; wait_queue_head_t down_waitqueue; + wait_queue_head_t vc_waitqueue; struct iavf_q_vector *q_vectors; struct list_head vlan_filter_list; struct list_head mac_filter_list; @@ -292,6 +294,7 @@ struct iavf_adapter { #define IAVF_FLAG_QUEUES_DISABLED BIT(17) #define IAVF_FLAG_SETUP_NETDEV_FEATURES BIT(18) #define IAVF_FLAG_REINIT_MSIX_NEEDED BIT(20) +#define IAVF_FLAG_INITIAL_MAC_SET BIT(23) /* duplicates for common code */ #define IAVF_FLAG_DCB_ENABLED 0 /* flags for admin queue service task */ @@ -559,6 +562,8 @@ void iavf_enable_vlan_stripping_v2(struct iavf_adapter *adapter, u16 tpid); void iavf_disable_vlan_stripping_v2(struct iavf_adapter *adapter, u16 tpid); void iavf_enable_vlan_insertion_v2(struct iavf_adapter *adapter, u16 tpid); void iavf_disable_vlan_insertion_v2(struct iavf_adapter *adapter, u16 tpid); +int iavf_replace_primary_mac(struct iavf_adapter *adapter, + const u8 *new_mac); void iavf_set_vlan_offload_features(struct iavf_adapter *adapter, netdev_features_t prev_features, diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 7dfcf78b57fb..95772e17e5be 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -983,6 +983,7 @@ struct iavf_mac_filter *iavf_add_filter(struct iavf_adapter *adapter, list_add_tail(&f->list, &adapter->mac_filter_list); f->add = true; + f->add_handled = false; f->is_new_mac = true; f->is_primary = false; adapter->aq_required |= IAVF_FLAG_AQ_ADD_MAC_FILTER; @@ -994,47 +995,132 @@ struct iavf_mac_filter *iavf_add_filter(struct iavf_adapter *adapter, } /** - * iavf_set_mac - NDO callback to set port mac address - * @netdev: network interface device structure - * @p: pointer to an address structure + * iavf_replace_primary_mac - Replace current primary address + * @adapter: board private structure + * @new_mac: new MAC address to be applied * - * Returns 0 on success, negative on failure + * Replace current dev_addr and send request to PF for removal of previous + * primary MAC address filter and addition of new primary MAC filter. + * Return 0 for success, -ENOMEM for failure. + * + * Do not call this with mac_vlan_list_lock! **/ -static int iavf_set_mac(struct net_device *netdev, void *p) +int iavf_replace_primary_mac(struct iavf_adapter *adapter, + const u8 *new_mac) { - struct iavf_adapter *adapter = netdev_priv(netdev); struct iavf_hw *hw = &adapter->hw; struct iavf_mac_filter *f; - struct sockaddr *addr = p; - - if (!is_valid_ether_addr(addr->sa_data)) - return -EADDRNOTAVAIL; - - if (ether_addr_equal(netdev->dev_addr, addr->sa_data)) - return 0; spin_lock_bh(&adapter->mac_vlan_list_lock); + list_for_each_entry(f, &adapter->mac_filter_list, list) { + f->is_primary = false; + } + f = iavf_find_filter(adapter, hw->mac.addr); if (f) { f->remove = true; - f->is_primary = true; adapter->aq_required |= IAVF_FLAG_AQ_DEL_MAC_FILTER; } - f = iavf_add_filter(adapter, addr->sa_data); + f = iavf_add_filter(adapter, new_mac); + if (f) { + /* Always send the request to add if changing primary MAC + * even if filter is already present on the list + */ f->is_primary = true; - ether_addr_copy(hw->mac.addr, addr->sa_data); + f->add = true; + adapter->aq_required |= IAVF_FLAG_AQ_ADD_MAC_FILTER; + ether_addr_copy(hw->mac.addr, new_mac); } spin_unlock_bh(&adapter->mac_vlan_list_lock); /* schedule the watchdog task to immediately process the request */ - if (f) + if (f) { queue_work(iavf_wq, &adapter->watchdog_task.work); + return 0; + } + return -ENOMEM; +} - return (f == NULL) ? -ENOMEM : 0; +/** + * iavf_is_mac_set_handled - wait for a response to set MAC from PF + * @netdev: network interface device structure + * @macaddr: MAC address to set + * + * Returns true on success, false on failure + */ +static bool iavf_is_mac_set_handled(struct net_device *netdev, + const u8 *macaddr) +{ + struct iavf_adapter *adapter = netdev_priv(netdev); + struct iavf_mac_filter *f; + bool ret = false; + + spin_lock_bh(&adapter->mac_vlan_list_lock); + + f = iavf_find_filter(adapter, macaddr); + + if (!f || (!f->add && f->add_handled)) + ret = true; + + spin_unlock_bh(&adapter->mac_vlan_list_lock); + + return ret; +} + +/** + * iavf_set_mac - NDO callback to set port MAC address + * @netdev: network interface device structure + * @p: pointer to an address structure + * + * Returns 0 on success, negative on failure + */ +static int iavf_set_mac(struct net_device *netdev, void *p) +{ + struct iavf_adapter *adapter = netdev_priv(netdev); + struct sockaddr *addr = p; + bool handle_mac = iavf_is_mac_set_handled(netdev, addr->sa_data); + int ret; + + if (!is_valid_ether_addr(addr->sa_data)) + return -EADDRNOTAVAIL; + + ret = iavf_replace_primary_mac(adapter, addr->sa_data); + + if (ret) + return ret; + + /* If this is an initial set MAC during VF spawn do not wait */ + if (adapter->flags & IAVF_FLAG_INITIAL_MAC_SET) { + adapter->flags &= ~IAVF_FLAG_INITIAL_MAC_SET; + return 0; + } + + if (handle_mac) + goto done; + + ret = wait_event_interruptible_timeout(adapter->vc_waitqueue, false, msecs_to_jiffies(2500)); + + /* If ret < 0 then it means wait was interrupted. + * If ret == 0 then it means we got a timeout. + * else it means we got response for set MAC from PF, + * check if netdev MAC was updated to requested MAC, + * if yes then set MAC succeeded otherwise it failed return -EACCES + */ + if (ret < 0) + return ret; + + if (!ret) + return -EAGAIN; + +done: + if (!ether_addr_equal(netdev->dev_addr, addr->sa_data)) + return -EACCES; + + return 0; } /** @@ -2451,6 +2537,8 @@ static void iavf_init_config_adapter(struct iavf_adapter *adapter) ether_addr_copy(netdev->perm_addr, adapter->hw.mac.addr); } + adapter->flags |= IAVF_FLAG_INITIAL_MAC_SET; + adapter->tx_desc_count = IAVF_DEFAULT_TXD; adapter->rx_desc_count = IAVF_DEFAULT_RXD; err = iavf_init_interrupt_scheme(adapter); @@ -4681,6 +4769,9 @@ static int iavf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* Setup the wait queue for indicating transition to down status */ init_waitqueue_head(&adapter->down_waitqueue); + /* Setup the wait queue for indicating virtchannel events */ + init_waitqueue_head(&adapter->vc_waitqueue); + return 0; err_ioremap: diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c index 782450d5c12f..e2b4ba98f71e 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c +++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c @@ -598,6 +598,8 @@ static void iavf_mac_add_ok(struct iavf_adapter *adapter) spin_lock_bh(&adapter->mac_vlan_list_lock); list_for_each_entry_safe(f, ftmp, &adapter->mac_filter_list, list) { f->is_new_mac = false; + if (!f->add && !f->add_handled) + f->add_handled = true; } spin_unlock_bh(&adapter->mac_vlan_list_lock); } @@ -618,6 +620,9 @@ static void iavf_mac_add_reject(struct iavf_adapter *adapter) if (f->remove && ether_addr_equal(f->macaddr, netdev->dev_addr)) f->remove = false; + if (!f->add && !f->add_handled) + f->add_handled = true; + if (f->is_new_mac) { list_del(&f->list); kfree(f); @@ -1932,6 +1937,7 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, iavf_mac_add_reject(adapter); /* restore administratively set MAC address */ ether_addr_copy(adapter->hw.mac.addr, netdev->dev_addr); + wake_up(&adapter->vc_waitqueue); break; case VIRTCHNL_OP_DEL_VLAN: dev_err(&adapter->pdev->dev, "Failed to delete VLAN filter, error %s\n", @@ -2091,7 +2097,13 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, if (!v_retval) iavf_mac_add_ok(adapter); if (!ether_addr_equal(netdev->dev_addr, adapter->hw.mac.addr)) - eth_hw_addr_set(netdev, adapter->hw.mac.addr); + if (!ether_addr_equal(netdev->dev_addr, + adapter->hw.mac.addr)) { + netif_addr_lock_bh(netdev); + eth_hw_addr_set(netdev, adapter->hw.mac.addr); + netif_addr_unlock_bh(netdev); + } + wake_up(&adapter->vc_waitqueue); break; case VIRTCHNL_OP_GET_STATS: { struct iavf_eth_stats *stats = @@ -2121,10 +2133,11 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, /* restore current mac address */ ether_addr_copy(adapter->hw.mac.addr, netdev->dev_addr); } else { + netif_addr_lock_bh(netdev); /* refresh current mac address if changed */ - eth_hw_addr_set(netdev, adapter->hw.mac.addr); ether_addr_copy(netdev->perm_addr, adapter->hw.mac.addr); + netif_addr_unlock_bh(netdev); } spin_lock_bh(&adapter->mac_vlan_list_lock); iavf_add_filter(adapter, adapter->hw.mac.addr); @@ -2160,6 +2173,10 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, } fallthrough; case VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS: { + struct iavf_mac_filter *f; + bool was_mac_changed; + u64 aq_required = 0; + if (v_opcode == VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS) memcpy(&adapter->vlan_v2_caps, msg, min_t(u16, msglen, @@ -2167,6 +2184,46 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, iavf_process_config(adapter); adapter->flags |= IAVF_FLAG_SETUP_NETDEV_FEATURES; + was_mac_changed = !ether_addr_equal(netdev->dev_addr, + adapter->hw.mac.addr); + + spin_lock_bh(&adapter->mac_vlan_list_lock); + + /* re-add all MAC filters */ + list_for_each_entry(f, &adapter->mac_filter_list, list) { + if (was_mac_changed && + ether_addr_equal(netdev->dev_addr, f->macaddr)) + ether_addr_copy(f->macaddr, + adapter->hw.mac.addr); + + f->is_new_mac = true; + f->add = true; + f->add_handled = false; + f->remove = false; + } + + /* re-add all VLAN filters */ + if (VLAN_FILTERING_ALLOWED(adapter)) { + struct iavf_vlan_filter *vlf; + + if (!list_empty(&adapter->vlan_filter_list)) { + list_for_each_entry(vlf, + &adapter->vlan_filter_list, + list) + vlf->add = true; + + aq_required |= IAVF_FLAG_AQ_ADD_VLAN_FILTER; + } + } + + spin_unlock_bh(&adapter->mac_vlan_list_lock); + + netif_addr_lock_bh(netdev); + eth_hw_addr_set(netdev, adapter->hw.mac.addr); + netif_addr_unlock_bh(netdev); + + adapter->aq_required |= IAVF_FLAG_AQ_ADD_MAC_FILTER | + aq_required; } break; case VIRTCHNL_OP_ENABLE_QUEUES: From 67074ae6af59c7d9112535e6f8449bc59f66df40 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 4 Jun 2022 08:15:21 +0200 Subject: [PATCH 08/21] net: dsa: microchip: ksz8xxx: Replace kernel.h with the necessary inclusions When kernel.h is used in the headers it adds a lot into dependency hell, especially when there are circular dependencies are involved. Replace kernel.h inclusion with the list of what is really being used. Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/41d99ef8629e1db03d4f2662f5556611e0b94652.1654323308.git.christophe.jaillet@wanadoo.fr Signed-off-by: Jakub Kicinski --- drivers/net/dsa/microchip/ksz8.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/microchip/ksz8.h b/drivers/net/dsa/microchip/ksz8.h index 03da369675c6..cae76f5e7787 100644 --- a/drivers/net/dsa/microchip/ksz8.h +++ b/drivers/net/dsa/microchip/ksz8.h @@ -7,7 +7,8 @@ #ifndef __KSZ8XXX_H #define __KSZ8XXX_H -#include + +#include enum ksz_regs { REG_IND_CTRL_0, From da6e113ff010815fdd21ee1e9af2e8d179a2680f Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Mon, 6 Jun 2022 21:49:00 +0200 Subject: [PATCH 09/21] net: ethernet: mtk_eth_soc: enable rx cksum offload for MTK_NETSYS_V2 Enable rx checksum offload for mt7986 chipset. Signed-off-by: Lorenzo Bianconi Link: https://lore.kernel.org/r/c8699805c18f7fd38315fcb8da2787676d83a32c.1654544585.git.lorenzo@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index b3b3c079a0fa..9d7700984e9b 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -1433,8 +1433,8 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget, int done = 0, bytes = 0; while (done < budget) { + unsigned int pktlen, *rxdcsum; struct net_device *netdev; - unsigned int pktlen; dma_addr_t dma_addr; u32 hash, reason; int mac = 0; @@ -1498,7 +1498,13 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget, pktlen = RX_DMA_GET_PLEN0(trxd.rxd2); skb->dev = netdev; skb_put(skb, pktlen); - if (trxd.rxd4 & eth->soc->txrx.rx_dma_l4_valid) + + if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2)) + rxdcsum = &trxd.rxd3; + else + rxdcsum = &trxd.rxd4; + + if (*rxdcsum & eth->soc->txrx.rx_dma_l4_valid) skb->ip_summed = CHECKSUM_UNNECESSARY; else skb_checksum_none_assert(skb); @@ -3744,6 +3750,7 @@ static const struct mtk_soc_data mt7986_data = { .txd_size = sizeof(struct mtk_tx_dma_v2), .rxd_size = sizeof(struct mtk_rx_dma_v2), .rx_irq_done_mask = MTK_RX_DONE_INT_V2, + .rx_dma_l4_valid = RX_DMA_L4_VALID_V2, .dma_max_len = MTK_TX_DMA_BUF_LEN_V2, .dma_len_offset = 8, }, From 17e9157c4ed00a90fcf34d9caffee90d194ebfa3 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Tue, 7 Jun 2022 14:51:03 +0200 Subject: [PATCH 10/21] nfp: Remove kernel.h when not needed When kernel.h is used in the headers it adds a lot into dependency hell, especially when there are circular dependencies are involved. Remove kernel.h when it is not needed. Signed-off-by: Christophe JAILLET Signed-off-by: Simon Horman Link: https://lore.kernel.org/r/20220607125103.487801-1-simon.horman@corigine.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/netronome/nfp/nfpcore/crc32.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/crc32.h b/drivers/net/ethernet/netronome/nfp/nfpcore/crc32.h index afab6f0fc564..6ad43c7cefe6 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/crc32.h +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/crc32.h @@ -4,7 +4,6 @@ #ifndef NFP_CRC32_H #define NFP_CRC32_H -#include #include /** From a84a434baf9427a1c49782fb1f0973d1308016df Mon Sep 17 00:00:00 2001 From: Peter Lafreniere Date: Mon, 6 Jun 2022 07:34:58 -0400 Subject: [PATCH 11/21] net: constify some inline functions in sock.h Despite these inline functions having full visibility to the compiler at compile time, they still strip const from passed pointers. This change allows for functions in various network drivers to be marked as const that could not be marked const before. Signed-off-by: Peter Lafreniere Link: https://lore.kernel.org/r/20220606113458.35953-1-pjlafren@mtu.edu Signed-off-by: Jakub Kicinski --- include/net/sock.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index c585ef6565d9..657873e2d90f 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -611,7 +611,7 @@ void sock_net_set(struct sock *sk, struct net *net) int sk_set_peek_off(struct sock *sk, int val); -static inline int sk_peek_offset(struct sock *sk, int flags) +static inline int sk_peek_offset(const struct sock *sk, int flags) { if (unlikely(flags & MSG_PEEK)) { return READ_ONCE(sk->sk_peek_off); @@ -863,7 +863,7 @@ static inline void sk_add_bind2_node(struct sock *sk, struct hlist_head *list) ({ tpos = (typeof(*tpos) *)((void *)pos - offset); 1;}); \ pos = rcu_dereference(hlist_next_rcu(pos))) -static inline struct user_namespace *sk_user_ns(struct sock *sk) +static inline struct user_namespace *sk_user_ns(const struct sock *sk) { /* Careful only use this in a context where these parameters * can not change and must all be valid, such as recvmsg from @@ -909,7 +909,7 @@ enum sock_flags { #define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE)) -static inline void sock_copy_flags(struct sock *nsk, struct sock *osk) +static inline void sock_copy_flags(struct sock *nsk, const struct sock *osk) { nsk->sk_flags = osk->sk_flags; } From 5834e72eda0b7e5767eb107259d98eef19ebd11f Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Wed, 8 Jun 2022 06:37:26 +0200 Subject: [PATCH 12/21] xen/netback: do some code cleanup Remove some unused macros and functions, make local functions static. Signed-off-by: Juergen Gross Acked-by: Wei Liu Link: https://lore.kernel.org/r/20220608043726.9380-1-jgross@suse.com Signed-off-by: Jakub Kicinski --- drivers/net/xen-netback/common.h | 12 ------------ drivers/net/xen-netback/interface.c | 16 +--------------- drivers/net/xen-netback/netback.c | 4 +++- drivers/net/xen-netback/rx.c | 2 +- 4 files changed, 5 insertions(+), 29 deletions(-) diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h index d9dea4829c86..8174d7b2966c 100644 --- a/drivers/net/xen-netback/common.h +++ b/drivers/net/xen-netback/common.h @@ -48,7 +48,6 @@ #include typedef unsigned int pending_ring_idx_t; -#define INVALID_PENDING_RING_IDX (~0U) struct pending_tx_info { struct xen_netif_tx_request req; /* tx request */ @@ -82,8 +81,6 @@ struct xenvif_rx_meta { /* Discriminate from any valid pending_idx value. */ #define INVALID_PENDING_IDX 0xFFFF -#define MAX_BUFFER_OFFSET XEN_PAGE_SIZE - #define MAX_PENDING_REQS XEN_NETIF_TX_RING_SIZE /* The maximum number of frags is derived from the size of a grant (same @@ -367,11 +364,6 @@ void xenvif_free(struct xenvif *vif); int xenvif_xenbus_init(void); void xenvif_xenbus_fini(void); -int xenvif_schedulable(struct xenvif *vif); - -int xenvif_queue_stopped(struct xenvif_queue *queue); -void xenvif_wake_queue(struct xenvif_queue *queue); - /* (Un)Map communication rings. */ void xenvif_unmap_frontend_data_rings(struct xenvif_queue *queue); int xenvif_map_frontend_data_rings(struct xenvif_queue *queue, @@ -394,7 +386,6 @@ int xenvif_dealloc_kthread(void *data); irqreturn_t xenvif_ctrl_irq_fn(int irq, void *data); bool xenvif_have_rx_work(struct xenvif_queue *queue, bool test_kthread); -void xenvif_rx_action(struct xenvif_queue *queue); void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb); void xenvif_carrier_on(struct xenvif *vif); @@ -403,9 +394,6 @@ void xenvif_carrier_on(struct xenvif *vif); void xenvif_zerocopy_callback(struct sk_buff *skb, struct ubuf_info *ubuf, bool zerocopy_success); -/* Unmap a pending page and release it back to the guest */ -void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx); - static inline pending_ring_idx_t nr_pending_reqs(struct xenvif_queue *queue) { return MAX_PENDING_REQS - diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index 8e035374a370..fb32ae82d9b0 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -69,7 +69,7 @@ void xenvif_skb_zerocopy_complete(struct xenvif_queue *queue) wake_up(&queue->dealloc_wq); } -int xenvif_schedulable(struct xenvif *vif) +static int xenvif_schedulable(struct xenvif *vif) { return netif_running(vif->dev) && test_bit(VIF_STATUS_CONNECTED, &vif->status) && @@ -177,20 +177,6 @@ irqreturn_t xenvif_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } -int xenvif_queue_stopped(struct xenvif_queue *queue) -{ - struct net_device *dev = queue->vif->dev; - unsigned int id = queue->id; - return netif_tx_queue_stopped(netdev_get_tx_queue(dev, id)); -} - -void xenvif_wake_queue(struct xenvif_queue *queue) -{ - struct net_device *dev = queue->vif->dev; - unsigned int id = queue->id; - netif_tx_wake_queue(netdev_get_tx_queue(dev, id)); -} - static u16 xenvif_select_queue(struct net_device *dev, struct sk_buff *skb, struct net_device *sb_dev) { diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index d93814c14a23..fc61a4418737 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -112,6 +112,8 @@ static void make_tx_response(struct xenvif_queue *queue, s8 st); static void push_tx_responses(struct xenvif_queue *queue); +static void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx); + static inline int tx_work_todo(struct xenvif_queue *queue); static inline unsigned long idx_to_pfn(struct xenvif_queue *queue, @@ -1418,7 +1420,7 @@ static void push_tx_responses(struct xenvif_queue *queue) notify_remote_via_irq(queue->tx_irq); } -void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx) +static void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx) { int ret; struct gnttab_unmap_grant_ref tx_unmap_op; diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c index dbac4c03d21a..8df2c736fd23 100644 --- a/drivers/net/xen-netback/rx.c +++ b/drivers/net/xen-netback/rx.c @@ -486,7 +486,7 @@ static void xenvif_rx_skb(struct xenvif_queue *queue) #define RX_BATCH_SIZE 64 -void xenvif_rx_action(struct xenvif_queue *queue) +static void xenvif_rx_action(struct xenvif_queue *queue) { struct sk_buff_head completed_skbs; unsigned int work_done = 0; From 55f0395fcace9e675af2cbb96015ce1ae8856806 Mon Sep 17 00:00:00 2001 From: Ronak Doshi Date: Tue, 7 Jun 2022 20:23:46 -0700 Subject: [PATCH 13/21] vmxnet3: prepare for version 7 changes vmxnet3 is currently at version 6 and this patch initiates the preparation to accommodate changes for upto version 7. Introduced utility macros for vmxnet3 version 7 comparison and update Copyright information. Signed-off-by: Ronak Doshi Acked-by: Guolin Yang Signed-off-by: Paolo Abeni --- drivers/net/vmxnet3/Makefile | 2 +- drivers/net/vmxnet3/upt1_defs.h | 2 +- drivers/net/vmxnet3/vmxnet3_defs.h | 2 +- drivers/net/vmxnet3/vmxnet3_drv.c | 2 +- drivers/net/vmxnet3/vmxnet3_ethtool.c | 2 +- drivers/net/vmxnet3/vmxnet3_int.h | 5 ++++- 6 files changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/net/vmxnet3/Makefile b/drivers/net/vmxnet3/Makefile index 7a38925f4165..a666a88ac1ff 100644 --- a/drivers/net/vmxnet3/Makefile +++ b/drivers/net/vmxnet3/Makefile @@ -2,7 +2,7 @@ # # Linux driver for VMware's vmxnet3 ethernet NIC. # -# Copyright (C) 2007-2021, VMware, Inc. All Rights Reserved. +# Copyright (C) 2007-2022, VMware, Inc. All Rights Reserved. # # This program is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the diff --git a/drivers/net/vmxnet3/upt1_defs.h b/drivers/net/vmxnet3/upt1_defs.h index f9f3a23d1698..41c0660a0c54 100644 --- a/drivers/net/vmxnet3/upt1_defs.h +++ b/drivers/net/vmxnet3/upt1_defs.h @@ -1,7 +1,7 @@ /* * Linux driver for VMware's vmxnet3 ethernet NIC. * - * Copyright (C) 2008-2021, VMware, Inc. All Rights Reserved. + * Copyright (C) 2008-2022, VMware, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the diff --git a/drivers/net/vmxnet3/vmxnet3_defs.h b/drivers/net/vmxnet3/vmxnet3_defs.h index 74d4e8bc4abc..9f91ebb10137 100644 --- a/drivers/net/vmxnet3/vmxnet3_defs.h +++ b/drivers/net/vmxnet3/vmxnet3_defs.h @@ -1,7 +1,7 @@ /* * Linux driver for VMware's vmxnet3 ethernet NIC. * - * Copyright (C) 2008-2021, VMware, Inc. All Rights Reserved. + * Copyright (C) 2008-2022, VMware, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index 93e8d119d45f..6fc6a2a26161 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -1,7 +1,7 @@ /* * Linux driver for VMware's vmxnet3 ethernet NIC. * - * Copyright (C) 2008-2021, VMware, Inc. All Rights Reserved. + * Copyright (C) 2008-2022, VMware, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the diff --git a/drivers/net/vmxnet3/vmxnet3_ethtool.c b/drivers/net/vmxnet3/vmxnet3_ethtool.c index 3172d46c0335..e41e76757c5b 100644 --- a/drivers/net/vmxnet3/vmxnet3_ethtool.c +++ b/drivers/net/vmxnet3/vmxnet3_ethtool.c @@ -1,7 +1,7 @@ /* * Linux driver for VMware's vmxnet3 ethernet NIC. * - * Copyright (C) 2008-2021, VMware, Inc. All Rights Reserved. + * Copyright (C) 2008-2022, VMware, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the diff --git a/drivers/net/vmxnet3/vmxnet3_int.h b/drivers/net/vmxnet3/vmxnet3_int.h index 7027ff483fa5..5251c3439d6a 100644 --- a/drivers/net/vmxnet3/vmxnet3_int.h +++ b/drivers/net/vmxnet3/vmxnet3_int.h @@ -1,7 +1,7 @@ /* * Linux driver for VMware's vmxnet3 ethernet NIC. * - * Copyright (C) 2008-2021, VMware, Inc. All Rights Reserved. + * Copyright (C) 2008-2022, VMware, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -81,6 +81,7 @@ #define VMXNET3_RSS #endif +#define VMXNET3_REV_7 6 /* Vmxnet3 Rev. 7 */ #define VMXNET3_REV_6 5 /* Vmxnet3 Rev. 6 */ #define VMXNET3_REV_5 4 /* Vmxnet3 Rev. 5 */ #define VMXNET3_REV_4 3 /* Vmxnet3 Rev. 4 */ @@ -431,6 +432,8 @@ struct vmxnet3_adapter { (adapter->version >= VMXNET3_REV_5 + 1) #define VMXNET3_VERSION_GE_6(adapter) \ (adapter->version >= VMXNET3_REV_6 + 1) +#define VMXNET3_VERSION_GE_7(adapter) \ + (adapter->version >= VMXNET3_REV_7 + 1) /* must be a multiple of VMXNET3_RING_SIZE_ALIGN */ #define VMXNET3_DEF_TX_RING_SIZE 512 From 6f91f4ba046e5de6a6e579620b32b8ecf56873f7 Mon Sep 17 00:00:00 2001 From: Ronak Doshi Date: Tue, 7 Jun 2022 20:23:47 -0700 Subject: [PATCH 14/21] vmxnet3: add support for capability registers This patch enhances vmxnet3 to suuport capability registers which allows it to enable features selectively. The DCR register tracks the capabilities vmxnet3 device supports. The PTCR register states the capabilities that the passthrough device supports. With the help of these registers, vmxnet3 can enable only those features which the passthrough device supoprts. This allows smooth trasition to Uniform-Passthrough (UPT) mode if the virtual nic requests it. If PTCR register returns nothing or error it means UPT is not being requested and vnic will continue in emulation mode. Signed-off-by: Ronak Doshi Acked-by: Guolin Yang Signed-off-by: Paolo Abeni --- drivers/net/vmxnet3/vmxnet3_defs.h | 37 +++++++++- drivers/net/vmxnet3/vmxnet3_drv.c | 97 +++++++++++++++++++++++++ drivers/net/vmxnet3/vmxnet3_ethtool.c | 101 ++++++++++++++++++++++++-- drivers/net/vmxnet3/vmxnet3_int.h | 4 + 4 files changed, 233 insertions(+), 6 deletions(-) diff --git a/drivers/net/vmxnet3/vmxnet3_defs.h b/drivers/net/vmxnet3/vmxnet3_defs.h index 9f91ebb10137..0157155ff677 100644 --- a/drivers/net/vmxnet3/vmxnet3_defs.h +++ b/drivers/net/vmxnet3/vmxnet3_defs.h @@ -40,7 +40,13 @@ enum { VMXNET3_REG_MACL = 0x28, /* MAC Address Low */ VMXNET3_REG_MACH = 0x30, /* MAC Address High */ VMXNET3_REG_ICR = 0x38, /* Interrupt Cause Register */ - VMXNET3_REG_ECR = 0x40 /* Event Cause Register */ + VMXNET3_REG_ECR = 0x40, /* Event Cause Register */ + VMXNET3_REG_DCR = 0x48, /* Device capability register, + * from 0x48 to 0x80 + */ + VMXNET3_REG_PTCR = 0x88, /* Passthru capbility register + * from 0x88 to 0xb0 + */ }; /* BAR 0 */ @@ -101,6 +107,9 @@ enum { VMXNET3_CMD_GET_RESERVED2, VMXNET3_CMD_GET_RESERVED3, VMXNET3_CMD_GET_MAX_QUEUES_CONF, + VMXNET3_CMD_GET_RESERVED4, + VMXNET3_CMD_GET_MAX_CAPABILITIES, + VMXNET3_CMD_GET_DCR0_REG, }; /* @@ -801,4 +810,30 @@ struct Vmxnet3_DriverShared { #define VMXNET3_LINK_UP (10000 << 16 | 1) /* 10 Gbps, up */ #define VMXNET3_LINK_DOWN 0 +#define VMXNET3_DCR_ERROR 31 /* error when bit 31 of DCR is set */ +#define VMXNET3_CAP_UDP_RSS 0 /* bit 0 of DCR 0 */ +#define VMXNET3_CAP_ESP_RSS_IPV4 1 /* bit 1 of DCR 0 */ +#define VMXNET3_CAP_GENEVE_CHECKSUM_OFFLOAD 2 /* bit 2 of DCR 0 */ +#define VMXNET3_CAP_GENEVE_TSO 3 /* bit 3 of DCR 0 */ +#define VMXNET3_CAP_VXLAN_CHECKSUM_OFFLOAD 4 /* bit 4 of DCR 0 */ +#define VMXNET3_CAP_VXLAN_TSO 5 /* bit 5 of DCR 0 */ +#define VMXNET3_CAP_GENEVE_OUTER_CHECKSUM_OFFLOAD 6 /* bit 6 of DCR 0 */ +#define VMXNET3_CAP_VXLAN_OUTER_CHECKSUM_OFFLOAD 7 /* bit 7 of DCR 0 */ +#define VMXNET3_CAP_PKT_STEERING_IPV4 8 /* bit 8 of DCR 0 */ +#define VMXNET3_CAP_VERSION_4_MAX VMXNET3_CAP_PKT_STEERING_IPV4 +#define VMXNET3_CAP_ESP_RSS_IPV6 9 /* bit 9 of DCR 0 */ +#define VMXNET3_CAP_VERSION_5_MAX VMXNET3_CAP_ESP_RSS_IPV6 +#define VMXNET3_CAP_ESP_OVER_UDP_RSS 10 /* bit 10 of DCR 0 */ +#define VMXNET3_CAP_INNER_RSS 11 /* bit 11 of DCR 0 */ +#define VMXNET3_CAP_INNER_ESP_RSS 12 /* bit 12 of DCR 0 */ +#define VMXNET3_CAP_CRC32_HASH_FUNC 13 /* bit 13 of DCR 0 */ +#define VMXNET3_CAP_VERSION_6_MAX VMXNET3_CAP_CRC32_HASH_FUNC +#define VMXNET3_CAP_OAM_FILTER 14 /* bit 14 of DCR 0 */ +#define VMXNET3_CAP_ESP_QS 15 /* bit 15 of DCR 0 */ +#define VMXNET3_CAP_LARGE_BAR 16 /* bit 16 of DCR 0 */ +#define VMXNET3_CAP_OOORX_COMP 17 /* bit 17 of DCR 0 */ +#define VMXNET3_CAP_VERSION_7_MAX 18 +/* when new capability is introduced, update VMXNET3_CAP_MAX */ +#define VMXNET3_CAP_MAX VMXNET3_CAP_VERSION_7_MAX + #endif /* _VMXNET3_DEFS_H_ */ diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index 6fc6a2a26161..edc4f23d4965 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -130,6 +130,20 @@ vmxnet3_tq_stop(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter) netif_stop_subqueue(adapter->netdev, (tq - adapter->tx_queue)); } +/* Check if capability is supported by UPT device or + * UPT is even requested + */ +bool +vmxnet3_check_ptcapability(u32 cap_supported, u32 cap) +{ + if (cap_supported & (1UL << VMXNET3_DCR_ERROR) || + cap_supported & (1UL << cap)) { + return true; + } + + return false; +} + /* * Check the link state. This may start or stop the tx queue. @@ -2671,6 +2685,36 @@ vmxnet3_init_rssfields(struct vmxnet3_adapter *adapter) adapter->rss_fields = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD); } else { + if (VMXNET3_VERSION_GE_7(adapter)) { + if ((adapter->rss_fields & VMXNET3_RSS_FIELDS_UDPIP4 || + adapter->rss_fields & VMXNET3_RSS_FIELDS_UDPIP6) && + vmxnet3_check_ptcapability(adapter->ptcap_supported[0], + VMXNET3_CAP_UDP_RSS)) { + adapter->dev_caps[0] |= 1UL << VMXNET3_CAP_UDP_RSS; + } else { + adapter->dev_caps[0] &= ~(1UL << VMXNET3_CAP_UDP_RSS); + } + + if ((adapter->rss_fields & VMXNET3_RSS_FIELDS_ESPIP4) && + vmxnet3_check_ptcapability(adapter->ptcap_supported[0], + VMXNET3_CAP_ESP_RSS_IPV4)) { + adapter->dev_caps[0] |= 1UL << VMXNET3_CAP_ESP_RSS_IPV4; + } else { + adapter->dev_caps[0] &= ~(1UL << VMXNET3_CAP_ESP_RSS_IPV4); + } + + if ((adapter->rss_fields & VMXNET3_RSS_FIELDS_ESPIP6) && + vmxnet3_check_ptcapability(adapter->ptcap_supported[0], + VMXNET3_CAP_ESP_RSS_IPV6)) { + adapter->dev_caps[0] |= 1UL << VMXNET3_CAP_ESP_RSS_IPV6; + } else { + adapter->dev_caps[0] &= ~(1UL << VMXNET3_CAP_ESP_RSS_IPV6); + } + + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DCR, adapter->dev_caps[0]); + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, VMXNET3_CMD_GET_DCR0_REG); + adapter->dev_caps[0] = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD); + } cmdInfo->setRssFields = adapter->rss_fields; VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, VMXNET3_CMD_SET_RSS_FIELDS); @@ -3185,6 +3229,47 @@ vmxnet3_declare_features(struct vmxnet3_adapter *adapter) NETIF_F_GSO_UDP_TUNNEL_CSUM; } + if (VMXNET3_VERSION_GE_7(adapter)) { + unsigned long flags; + + if (vmxnet3_check_ptcapability(adapter->ptcap_supported[0], + VMXNET3_CAP_GENEVE_CHECKSUM_OFFLOAD)) { + adapter->dev_caps[0] |= 1UL << VMXNET3_CAP_GENEVE_CHECKSUM_OFFLOAD; + } + if (vmxnet3_check_ptcapability(adapter->ptcap_supported[0], + VMXNET3_CAP_VXLAN_CHECKSUM_OFFLOAD)) { + adapter->dev_caps[0] |= 1UL << VMXNET3_CAP_VXLAN_CHECKSUM_OFFLOAD; + } + if (vmxnet3_check_ptcapability(adapter->ptcap_supported[0], + VMXNET3_CAP_GENEVE_TSO)) { + adapter->dev_caps[0] |= 1UL << VMXNET3_CAP_GENEVE_TSO; + } + if (vmxnet3_check_ptcapability(adapter->ptcap_supported[0], + VMXNET3_CAP_VXLAN_TSO)) { + adapter->dev_caps[0] |= 1UL << VMXNET3_CAP_VXLAN_TSO; + } + if (vmxnet3_check_ptcapability(adapter->ptcap_supported[0], + VMXNET3_CAP_GENEVE_OUTER_CHECKSUM_OFFLOAD)) { + adapter->dev_caps[0] |= 1UL << VMXNET3_CAP_GENEVE_OUTER_CHECKSUM_OFFLOAD; + } + if (vmxnet3_check_ptcapability(adapter->ptcap_supported[0], + VMXNET3_CAP_VXLAN_OUTER_CHECKSUM_OFFLOAD)) { + adapter->dev_caps[0] |= 1UL << VMXNET3_CAP_VXLAN_OUTER_CHECKSUM_OFFLOAD; + } + + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DCR, adapter->dev_caps[0]); + spin_lock_irqsave(&adapter->cmd_lock, flags); + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, VMXNET3_CMD_GET_DCR0_REG); + adapter->dev_caps[0] = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD); + spin_unlock_irqrestore(&adapter->cmd_lock, flags); + + if (!(adapter->dev_caps[0] & (1UL << VMXNET3_CAP_GENEVE_OUTER_CHECKSUM_OFFLOAD)) && + !(adapter->dev_caps[0] & (1UL << VMXNET3_CAP_VXLAN_OUTER_CHECKSUM_OFFLOAD))) { + netdev->hw_enc_features &= ~NETIF_F_GSO_UDP_TUNNEL_CSUM; + netdev->features &= ~NETIF_F_GSO_UDP_TUNNEL_CSUM; + } + } + netdev->vlan_features = netdev->hw_features & ~(NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX); @@ -3520,6 +3605,18 @@ vmxnet3_probe_device(struct pci_dev *pdev, goto err_ver; } + if (VMXNET3_VERSION_GE_7(adapter)) { + adapter->devcap_supported[0] = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_DCR); + adapter->ptcap_supported[0] = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_PTCR); + if (adapter->dev_caps[0]) + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DCR, adapter->dev_caps[0]); + + spin_lock_irqsave(&adapter->cmd_lock, flags); + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, VMXNET3_CMD_GET_DCR0_REG); + adapter->dev_caps[0] = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD); + spin_unlock_irqrestore(&adapter->cmd_lock, flags); + } + if (VMXNET3_VERSION_GE_6(adapter)) { spin_lock_irqsave(&adapter->cmd_lock, flags); VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, diff --git a/drivers/net/vmxnet3/vmxnet3_ethtool.c b/drivers/net/vmxnet3/vmxnet3_ethtool.c index e41e76757c5b..458f2da1ebab 100644 --- a/drivers/net/vmxnet3/vmxnet3_ethtool.c +++ b/drivers/net/vmxnet3/vmxnet3_ethtool.c @@ -298,7 +298,7 @@ netdev_features_t vmxnet3_features_check(struct sk_buff *skb, return features; } -static void vmxnet3_enable_encap_offloads(struct net_device *netdev) +static void vmxnet3_enable_encap_offloads(struct net_device *netdev, netdev_features_t features) { struct vmxnet3_adapter *adapter = netdev_priv(netdev); @@ -306,8 +306,50 @@ static void vmxnet3_enable_encap_offloads(struct net_device *netdev) netdev->hw_enc_features |= NETIF_F_SG | NETIF_F_RXCSUM | NETIF_F_HW_CSUM | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_TSO | NETIF_F_TSO6 | - NETIF_F_LRO | NETIF_F_GSO_UDP_TUNNEL | - NETIF_F_GSO_UDP_TUNNEL_CSUM; + NETIF_F_LRO; + if (features & NETIF_F_GSO_UDP_TUNNEL) + netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL; + if (features & NETIF_F_GSO_UDP_TUNNEL_CSUM) + netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL_CSUM; + } + if (VMXNET3_VERSION_GE_7(adapter)) { + unsigned long flags; + + if (vmxnet3_check_ptcapability(adapter->ptcap_supported[0], + VMXNET3_CAP_GENEVE_CHECKSUM_OFFLOAD)) { + adapter->dev_caps[0] |= 1UL << VMXNET3_CAP_GENEVE_CHECKSUM_OFFLOAD; + } + if (vmxnet3_check_ptcapability(adapter->ptcap_supported[0], + VMXNET3_CAP_VXLAN_CHECKSUM_OFFLOAD)) { + adapter->dev_caps[0] |= 1UL << VMXNET3_CAP_VXLAN_CHECKSUM_OFFLOAD; + } + if (vmxnet3_check_ptcapability(adapter->ptcap_supported[0], + VMXNET3_CAP_GENEVE_TSO)) { + adapter->dev_caps[0] |= 1UL << VMXNET3_CAP_GENEVE_TSO; + } + if (vmxnet3_check_ptcapability(adapter->ptcap_supported[0], + VMXNET3_CAP_VXLAN_TSO)) { + adapter->dev_caps[0] |= 1UL << VMXNET3_CAP_VXLAN_TSO; + } + if (vmxnet3_check_ptcapability(adapter->ptcap_supported[0], + VMXNET3_CAP_GENEVE_OUTER_CHECKSUM_OFFLOAD)) { + adapter->dev_caps[0] |= 1UL << VMXNET3_CAP_GENEVE_OUTER_CHECKSUM_OFFLOAD; + } + if (vmxnet3_check_ptcapability(adapter->ptcap_supported[0], + VMXNET3_CAP_VXLAN_OUTER_CHECKSUM_OFFLOAD)) { + adapter->dev_caps[0] |= 1UL << VMXNET3_CAP_VXLAN_OUTER_CHECKSUM_OFFLOAD; + } + + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DCR, adapter->dev_caps[0]); + spin_lock_irqsave(&adapter->cmd_lock, flags); + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, VMXNET3_CMD_GET_DCR0_REG); + adapter->dev_caps[0] = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD); + spin_unlock_irqrestore(&adapter->cmd_lock, flags); + + if (!(adapter->dev_caps[0] & (1UL << VMXNET3_CAP_GENEVE_OUTER_CHECKSUM_OFFLOAD)) && + !(adapter->dev_caps[0] & (1UL << VMXNET3_CAP_VXLAN_OUTER_CHECKSUM_OFFLOAD))) { + netdev->hw_enc_features &= ~NETIF_F_GSO_UDP_TUNNEL_CSUM; + } } } @@ -322,6 +364,22 @@ static void vmxnet3_disable_encap_offloads(struct net_device *netdev) NETIF_F_LRO | NETIF_F_GSO_UDP_TUNNEL | NETIF_F_GSO_UDP_TUNNEL_CSUM); } + if (VMXNET3_VERSION_GE_7(adapter)) { + unsigned long flags; + + adapter->dev_caps[0] &= ~(1UL << VMXNET3_CAP_GENEVE_CHECKSUM_OFFLOAD | + 1UL << VMXNET3_CAP_VXLAN_CHECKSUM_OFFLOAD | + 1UL << VMXNET3_CAP_GENEVE_TSO | + 1UL << VMXNET3_CAP_VXLAN_TSO | + 1UL << VMXNET3_CAP_GENEVE_OUTER_CHECKSUM_OFFLOAD | + 1UL << VMXNET3_CAP_VXLAN_OUTER_CHECKSUM_OFFLOAD); + + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DCR, adapter->dev_caps[0]); + spin_lock_irqsave(&adapter->cmd_lock, flags); + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, VMXNET3_CMD_GET_DCR0_REG); + adapter->dev_caps[0] = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD); + spin_unlock_irqrestore(&adapter->cmd_lock, flags); + } } int vmxnet3_set_features(struct net_device *netdev, netdev_features_t features) @@ -357,8 +415,8 @@ int vmxnet3_set_features(struct net_device *netdev, netdev_features_t features) adapter->shared->devRead.misc.uptFeatures &= ~UPT1_F_RXVLAN; - if ((features & tun_offload_mask) != 0 && !udp_tun_enabled) { - vmxnet3_enable_encap_offloads(netdev); + if ((features & tun_offload_mask) != 0) { + vmxnet3_enable_encap_offloads(netdev, features); adapter->shared->devRead.misc.uptFeatures |= UPT1_F_RXINNEROFLD; } else if ((features & tun_offload_mask) == 0 && @@ -913,6 +971,39 @@ vmxnet3_set_rss_hash_opt(struct net_device *netdev, union Vmxnet3_CmdInfo *cmdInfo = &shared->cu.cmdInfo; unsigned long flags; + if (VMXNET3_VERSION_GE_7(adapter)) { + if ((rss_fields & VMXNET3_RSS_FIELDS_UDPIP4 || + rss_fields & VMXNET3_RSS_FIELDS_UDPIP6) && + vmxnet3_check_ptcapability(adapter->ptcap_supported[0], + VMXNET3_CAP_UDP_RSS)) { + adapter->dev_caps[0] |= 1UL << VMXNET3_CAP_UDP_RSS; + } else { + adapter->dev_caps[0] &= ~(1UL << VMXNET3_CAP_UDP_RSS); + } + if ((rss_fields & VMXNET3_RSS_FIELDS_ESPIP4) && + vmxnet3_check_ptcapability(adapter->ptcap_supported[0], + VMXNET3_CAP_ESP_RSS_IPV4)) { + adapter->dev_caps[0] |= 1UL << VMXNET3_CAP_ESP_RSS_IPV4; + } else { + adapter->dev_caps[0] &= ~(1UL << VMXNET3_CAP_ESP_RSS_IPV4); + } + if ((rss_fields & VMXNET3_RSS_FIELDS_ESPIP6) && + vmxnet3_check_ptcapability(adapter->ptcap_supported[0], + VMXNET3_CAP_ESP_RSS_IPV6)) { + adapter->dev_caps[0] |= 1UL << VMXNET3_CAP_ESP_RSS_IPV6; + } else { + adapter->dev_caps[0] &= ~(1UL << VMXNET3_CAP_ESP_RSS_IPV6); + } + + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DCR, + adapter->dev_caps[0]); + spin_lock_irqsave(&adapter->cmd_lock, flags); + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, + VMXNET3_CMD_GET_DCR0_REG); + adapter->dev_caps[0] = VMXNET3_READ_BAR1_REG(adapter, + VMXNET3_REG_CMD); + spin_unlock_irqrestore(&adapter->cmd_lock, flags); + } spin_lock_irqsave(&adapter->cmd_lock, flags); cmdInfo->setRssFields = rss_fields; VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, diff --git a/drivers/net/vmxnet3/vmxnet3_int.h b/drivers/net/vmxnet3/vmxnet3_int.h index 5251c3439d6a..a7c8f80702c2 100644 --- a/drivers/net/vmxnet3/vmxnet3_int.h +++ b/drivers/net/vmxnet3/vmxnet3_int.h @@ -403,6 +403,9 @@ struct vmxnet3_adapter { dma_addr_t pm_conf_pa; dma_addr_t rss_conf_pa; bool queuesExtEnabled; + u32 devcap_supported[8]; + u32 ptcap_supported[8]; + u32 dev_caps[8]; }; #define VMXNET3_WRITE_BAR0_REG(adapter, reg, val) \ @@ -497,6 +500,7 @@ void vmxnet3_set_ethtool_ops(struct net_device *netdev); void vmxnet3_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats); +bool vmxnet3_check_ptcapability(u32 cap_supported, u32 cap); extern char vmxnet3_driver_name[]; #endif From 543fb67405410cc548a72d7a9a4087688d2f56ac Mon Sep 17 00:00:00 2001 From: Ronak Doshi Date: Tue, 7 Jun 2022 20:23:48 -0700 Subject: [PATCH 15/21] vmxnet3: add support for large passthrough BAR register For vmxnet3 to work in UPT mode, the BAR sizes have been increased. The PT page has been extended to 2 pages and also includes OOB pages as a part of PT BAR. This patch enhances vmxnet3 to use appropriate BAR offsets based on the capability registered. To use new offsets, VMXNET3_CAP_LARGE_BAR needs to be set by the device. If it is not set then the device will use legacy PT page layout. Signed-off-by: Ronak Doshi Acked-by: Guolin Yang Signed-off-by: Paolo Abeni --- drivers/net/vmxnet3/vmxnet3_defs.h | 14 ++++++++++++-- drivers/net/vmxnet3/vmxnet3_drv.c | 25 ++++++++++++++++++++----- drivers/net/vmxnet3/vmxnet3_ethtool.c | 6 +++--- drivers/net/vmxnet3/vmxnet3_int.h | 3 +++ 4 files changed, 38 insertions(+), 10 deletions(-) diff --git a/drivers/net/vmxnet3/vmxnet3_defs.h b/drivers/net/vmxnet3/vmxnet3_defs.h index 0157155ff677..8d626611ab2d 100644 --- a/drivers/net/vmxnet3/vmxnet3_defs.h +++ b/drivers/net/vmxnet3/vmxnet3_defs.h @@ -57,8 +57,18 @@ enum { VMXNET3_REG_RXPROD2 = 0xA00 /* Rx Producer Index for ring 2 */ }; -#define VMXNET3_PT_REG_SIZE 4096 /* BAR 0 */ -#define VMXNET3_VD_REG_SIZE 4096 /* BAR 1 */ +/* For Large PT BAR, the following offset to DB register */ +enum { + VMXNET3_REG_LB_TXPROD = 0x1000, /* Tx Producer Index */ + VMXNET3_REG_LB_RXPROD = 0x1400, /* Rx Producer Index for ring 1 */ + VMXNET3_REG_LB_RXPROD2 = 0x1800, /* Rx Producer Index for ring 2 */ +}; + +#define VMXNET3_PT_REG_SIZE 4096 /* BAR 0 */ +#define VMXNET3_LARGE_PT_REG_SIZE 8192 /* large PT pages */ +#define VMXNET3_VD_REG_SIZE 4096 /* BAR 1 */ +#define VMXNET3_LARGE_BAR0_REG_SIZE (4096 * 4096) /* LARGE BAR 0 */ +#define VMXNET3_OOB_REG_SIZE (4094 * 4096) /* OOB pages */ #define VMXNET3_REG_ALIGN 8 /* All registers are 8-byte aligned. */ #define VMXNET3_REG_ALIGN_MASK 0x7 diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index edc4f23d4965..93f237db463d 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -1207,7 +1207,7 @@ vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq, if (tx_num_deferred >= le32_to_cpu(tq->shared->txThreshold)) { tq->shared->txNumDeferred = 0; VMXNET3_WRITE_BAR0_REG(adapter, - VMXNET3_REG_TXPROD + tq->qid * 8, + adapter->tx_prod_offset + tq->qid * 8, tq->tx_ring.next2fill); } @@ -1359,8 +1359,8 @@ static int vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq, struct vmxnet3_adapter *adapter, int quota) { - static const u32 rxprod_reg[2] = { - VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2 + u32 rxprod_reg[2] = { + adapter->rx_prod_offset, adapter->rx_prod2_offset }; u32 num_pkts = 0; bool skip_page_frags = false; @@ -2783,9 +2783,9 @@ vmxnet3_activate_dev(struct vmxnet3_adapter *adapter) for (i = 0; i < adapter->num_rx_queues; i++) { VMXNET3_WRITE_BAR0_REG(adapter, - VMXNET3_REG_RXPROD + i * VMXNET3_REG_ALIGN, + adapter->rx_prod_offset + i * VMXNET3_REG_ALIGN, adapter->rx_queue[i].rx_ring[0].next2fill); - VMXNET3_WRITE_BAR0_REG(adapter, (VMXNET3_REG_RXPROD2 + + VMXNET3_WRITE_BAR0_REG(adapter, (adapter->rx_prod2_offset + (i * VMXNET3_REG_ALIGN)), adapter->rx_queue[i].rx_ring[1].next2fill); } @@ -3608,6 +3608,10 @@ vmxnet3_probe_device(struct pci_dev *pdev, if (VMXNET3_VERSION_GE_7(adapter)) { adapter->devcap_supported[0] = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_DCR); adapter->ptcap_supported[0] = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_PTCR); + if (adapter->devcap_supported[0] & (1UL << VMXNET3_CAP_LARGE_BAR)) { + adapter->dev_caps[0] = adapter->devcap_supported[0] & + (1UL << VMXNET3_CAP_LARGE_BAR); + } if (adapter->dev_caps[0]) VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DCR, adapter->dev_caps[0]); @@ -3617,6 +3621,17 @@ vmxnet3_probe_device(struct pci_dev *pdev, spin_unlock_irqrestore(&adapter->cmd_lock, flags); } + if (VMXNET3_VERSION_GE_7(adapter) && + adapter->dev_caps[0] & (1UL << VMXNET3_CAP_LARGE_BAR)) { + adapter->tx_prod_offset = VMXNET3_REG_LB_TXPROD; + adapter->rx_prod_offset = VMXNET3_REG_LB_RXPROD; + adapter->rx_prod2_offset = VMXNET3_REG_LB_RXPROD2; + } else { + adapter->tx_prod_offset = VMXNET3_REG_TXPROD; + adapter->rx_prod_offset = VMXNET3_REG_RXPROD; + adapter->rx_prod2_offset = VMXNET3_REG_RXPROD2; + } + if (VMXNET3_VERSION_GE_6(adapter)) { spin_lock_irqsave(&adapter->cmd_lock, flags); VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, diff --git a/drivers/net/vmxnet3/vmxnet3_ethtool.c b/drivers/net/vmxnet3/vmxnet3_ethtool.c index 458f2da1ebab..397b268f7bc5 100644 --- a/drivers/net/vmxnet3/vmxnet3_ethtool.c +++ b/drivers/net/vmxnet3/vmxnet3_ethtool.c @@ -520,7 +520,7 @@ vmxnet3_get_regs(struct net_device *netdev, struct ethtool_regs *regs, void *p) for (i = 0; i < adapter->num_tx_queues; i++) { struct vmxnet3_tx_queue *tq = &adapter->tx_queue[i]; - buf[j++] = VMXNET3_READ_BAR0_REG(adapter, VMXNET3_REG_TXPROD + + buf[j++] = VMXNET3_READ_BAR0_REG(adapter, adapter->tx_prod_offset + i * VMXNET3_REG_ALIGN); buf[j++] = VMXNET3_GET_ADDR_LO(tq->tx_ring.basePA); @@ -548,9 +548,9 @@ vmxnet3_get_regs(struct net_device *netdev, struct ethtool_regs *regs, void *p) for (i = 0; i < adapter->num_rx_queues; i++) { struct vmxnet3_rx_queue *rq = &adapter->rx_queue[i]; - buf[j++] = VMXNET3_READ_BAR0_REG(adapter, VMXNET3_REG_RXPROD + + buf[j++] = VMXNET3_READ_BAR0_REG(adapter, adapter->rx_prod_offset + i * VMXNET3_REG_ALIGN); - buf[j++] = VMXNET3_READ_BAR0_REG(adapter, VMXNET3_REG_RXPROD2 + + buf[j++] = VMXNET3_READ_BAR0_REG(adapter, adapter->rx_prod2_offset + i * VMXNET3_REG_ALIGN); buf[j++] = VMXNET3_GET_ADDR_LO(rq->rx_ring[0].basePA); diff --git a/drivers/net/vmxnet3/vmxnet3_int.h b/drivers/net/vmxnet3/vmxnet3_int.h index a7c8f80702c2..a4f832f0ad5b 100644 --- a/drivers/net/vmxnet3/vmxnet3_int.h +++ b/drivers/net/vmxnet3/vmxnet3_int.h @@ -406,6 +406,9 @@ struct vmxnet3_adapter { u32 devcap_supported[8]; u32 ptcap_supported[8]; u32 dev_caps[8]; + u16 tx_prod_offset; + u16 rx_prod_offset; + u16 rx_prod2_offset; }; #define VMXNET3_WRITE_BAR0_REG(adapter, reg, val) \ From 2c5a5748105a6bb901579d365c6f93e79f282b69 Mon Sep 17 00:00:00 2001 From: Ronak Doshi Date: Tue, 7 Jun 2022 20:23:49 -0700 Subject: [PATCH 16/21] vmxnet3: add support for out of order rx completion Currently, vmxnet3 processes rx completions in-order i.e. no out of order completion descriptor is expected. With UPT, if hardware supports LRO, then hardware can report out of order rx completions. This patch enhances vmxnet3 to add this support. This supports gets effective only when the corresponding feature bit is set. Also, minor enhancements are done for performance. Signed-off-by: Ronak Doshi Acked-by: Guolin Yang Signed-off-by: Paolo Abeni --- drivers/net/vmxnet3/vmxnet3_drv.c | 68 ++++++++++++++++++++++++++----- drivers/net/vmxnet3/vmxnet3_int.h | 5 +++ 2 files changed, 62 insertions(+), 11 deletions(-) diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index 93f237db463d..94ca3bc1d540 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -585,6 +585,7 @@ vmxnet3_rq_alloc_rx_buf(struct vmxnet3_rx_queue *rq, u32 ring_idx, rbi = rbi_base + ring->next2fill; gd = ring->base + ring->next2fill; + rbi->comp_state = VMXNET3_RXD_COMP_PENDING; if (rbi->buf_type == VMXNET3_RX_BUF_SKB) { if (rbi->skb == NULL) { @@ -644,8 +645,10 @@ vmxnet3_rq_alloc_rx_buf(struct vmxnet3_rx_queue *rq, u32 ring_idx, /* Fill the last buffer but dont mark it ready, or else the * device will think that the queue is full */ - if (num_allocated == num_to_alloc) + if (num_allocated == num_to_alloc) { + rbi->comp_state = VMXNET3_RXD_COMP_DONE; break; + } gd->dword[2] |= cpu_to_le32(ring->gen << VMXNET3_RXD_GEN_SHIFT); num_allocated++; @@ -1367,6 +1370,7 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq, struct Vmxnet3_RxCompDesc *rcd; struct vmxnet3_rx_ctx *ctx = &rq->rx_ctx; u16 segCnt = 0, mss = 0; + int comp_offset, fill_offset; #ifdef __BIG_ENDIAN_BITFIELD struct Vmxnet3_RxDesc rxCmdDesc; struct Vmxnet3_RxCompDesc rxComp; @@ -1639,9 +1643,15 @@ not_lro: rcd_done: /* device may have skipped some rx descs */ - ring->next2comp = idx; - num_to_alloc = vmxnet3_cmd_ring_desc_avail(ring); ring = rq->rx_ring + ring_idx; + rbi->comp_state = VMXNET3_RXD_COMP_DONE; + + comp_offset = vmxnet3_cmd_ring_desc_avail(ring); + fill_offset = (idx > ring->next2fill ? 0 : ring->size) + + idx - ring->next2fill - 1; + if (!ring->isOutOfOrder || fill_offset >= comp_offset) + ring->next2comp = idx; + num_to_alloc = vmxnet3_cmd_ring_desc_avail(ring); /* Ensure that the writes to rxd->gen bits will be observed * after all other writes to rxd objects. @@ -1649,18 +1659,38 @@ rcd_done: dma_wmb(); while (num_to_alloc) { - vmxnet3_getRxDesc(rxd, &ring->base[ring->next2fill].rxd, - &rxCmdDesc); - BUG_ON(!rxd->addr); + rbi = rq->buf_info[ring_idx] + ring->next2fill; + if (!(adapter->dev_caps[0] & (1UL << VMXNET3_CAP_OOORX_COMP))) + goto refill_buf; + if (ring_idx == 0) { + /* ring0 Type1 buffers can get skipped; re-fill them */ + if (rbi->buf_type != VMXNET3_RX_BUF_SKB) + goto refill_buf; + } + if (rbi->comp_state == VMXNET3_RXD_COMP_DONE) { +refill_buf: + vmxnet3_getRxDesc(rxd, &ring->base[ring->next2fill].rxd, + &rxCmdDesc); + WARN_ON(!rxd->addr); - /* Recv desc is ready to be used by the device */ - rxd->gen = ring->gen; - vmxnet3_cmd_ring_adv_next2fill(ring); - num_to_alloc--; + /* Recv desc is ready to be used by the device */ + rxd->gen = ring->gen; + vmxnet3_cmd_ring_adv_next2fill(ring); + rbi->comp_state = VMXNET3_RXD_COMP_PENDING; + num_to_alloc--; + } else { + /* rx completion hasn't occurred */ + ring->isOutOfOrder = 1; + break; + } + } + + if (num_to_alloc == 0) { + ring->isOutOfOrder = 0; } /* if needed, update the register */ - if (unlikely(rq->shared->updateRxProd)) { + if (unlikely(rq->shared->updateRxProd) && (ring->next2fill & 0xf) == 0) { VMXNET3_WRITE_BAR0_REG(adapter, rxprod_reg[ring_idx] + rq->qid * 8, ring->next2fill); @@ -1824,6 +1854,7 @@ vmxnet3_rq_init(struct vmxnet3_rx_queue *rq, memset(rq->rx_ring[i].base, 0, rq->rx_ring[i].size * sizeof(struct Vmxnet3_RxDesc)); rq->rx_ring[i].gen = VMXNET3_INIT_GEN; + rq->rx_ring[i].isOutOfOrder = 0; } if (vmxnet3_rq_alloc_rx_buf(rq, 0, rq->rx_ring[0].size - 1, adapter) == 0) { @@ -2014,8 +2045,17 @@ vmxnet3_poll_rx_only(struct napi_struct *napi, int budget) rxd_done = vmxnet3_rq_rx_complete(rq, adapter, budget); if (rxd_done < budget) { + struct Vmxnet3_RxCompDesc *rcd; +#ifdef __BIG_ENDIAN_BITFIELD + struct Vmxnet3_RxCompDesc rxComp; +#endif napi_complete_done(napi, rxd_done); vmxnet3_enable_intr(adapter, rq->comp_ring.intr_idx); + /* after unmasking the interrupt, check if any descriptors were completed */ + vmxnet3_getRxComp(rcd, &rq->comp_ring.base[rq->comp_ring.next2proc].rcd, + &rxComp); + if (rcd->gen == rq->comp_ring.gen && napi_reschedule(napi)) + vmxnet3_disable_intr(adapter, rq->comp_ring.intr_idx); } return rxd_done; } @@ -3612,6 +3652,12 @@ vmxnet3_probe_device(struct pci_dev *pdev, adapter->dev_caps[0] = adapter->devcap_supported[0] & (1UL << VMXNET3_CAP_LARGE_BAR); } + if (!(adapter->ptcap_supported[0] & (1UL << VMXNET3_DCR_ERROR)) && + adapter->ptcap_supported[0] & (1UL << VMXNET3_CAP_OOORX_COMP) && + adapter->devcap_supported[0] & (1UL << VMXNET3_CAP_OOORX_COMP)) { + adapter->dev_caps[0] |= adapter->devcap_supported[0] & + (1UL << VMXNET3_CAP_OOORX_COMP); + } if (adapter->dev_caps[0]) VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DCR, adapter->dev_caps[0]); diff --git a/drivers/net/vmxnet3/vmxnet3_int.h b/drivers/net/vmxnet3/vmxnet3_int.h index a4f832f0ad5b..5b495ef253e8 100644 --- a/drivers/net/vmxnet3/vmxnet3_int.h +++ b/drivers/net/vmxnet3/vmxnet3_int.h @@ -136,6 +136,7 @@ struct vmxnet3_cmd_ring { u32 next2fill; u32 next2comp; u8 gen; + u8 isOutOfOrder; dma_addr_t basePA; }; @@ -260,9 +261,13 @@ enum vmxnet3_rx_buf_type { VMXNET3_RX_BUF_PAGE = 2 }; +#define VMXNET3_RXD_COMP_PENDING 0 +#define VMXNET3_RXD_COMP_DONE 1 + struct vmxnet3_rx_buf_info { enum vmxnet3_rx_buf_type buf_type; u16 len; + u8 comp_state; union { struct sk_buff *skb; struct page *page; From c7112ebd27ea0dbe4eecd5c96cad93757e34e73d Mon Sep 17 00:00:00 2001 From: Ronak Doshi Date: Tue, 7 Jun 2022 20:23:50 -0700 Subject: [PATCH 17/21] vmxnet3: add command to set ring buffer sizes This patch adds a new command to set ring buffer sizes. This is required to pass the buffer size information to passthrough devices. For performance reasons, with version7 and later, ring1 will contain only mtu size buffers (bound to 3K). Packets > 3K will use both ring1 and ring2. Also, ring sizes are round down to power of 2 and ring2 default size is increased to 512. Signed-off-by: Ronak Doshi Acked-by: Guolin Yang Signed-off-by: Paolo Abeni --- drivers/net/vmxnet3/vmxnet3_defs.h | 11 ++++++ drivers/net/vmxnet3/vmxnet3_drv.c | 55 +++++++++++++++++++++------ drivers/net/vmxnet3/vmxnet3_ethtool.c | 7 ++++ drivers/net/vmxnet3/vmxnet3_int.h | 3 +- 4 files changed, 64 insertions(+), 12 deletions(-) diff --git a/drivers/net/vmxnet3/vmxnet3_defs.h b/drivers/net/vmxnet3/vmxnet3_defs.h index 8d626611ab2d..415e4c9993ef 100644 --- a/drivers/net/vmxnet3/vmxnet3_defs.h +++ b/drivers/net/vmxnet3/vmxnet3_defs.h @@ -99,6 +99,9 @@ enum { VMXNET3_CMD_SET_COALESCE, VMXNET3_CMD_REGISTER_MEMREGS, VMXNET3_CMD_SET_RSS_FIELDS, + VMXNET3_CMD_RESERVED4, + VMXNET3_CMD_RESERVED5, + VMXNET3_CMD_SET_RING_BUFFER_SIZE, VMXNET3_CMD_FIRST_GET = 0xF00D0000, VMXNET3_CMD_GET_QUEUE_STATUS = VMXNET3_CMD_FIRST_GET, @@ -743,6 +746,13 @@ enum Vmxnet3_RSSField { VMXNET3_RSS_FIELDS_ESPIP6 = 0x0020, }; +struct Vmxnet3_RingBufferSize { + __le16 ring1BufSizeType0; + __le16 ring1BufSizeType1; + __le16 ring2BufSizeType1; + __le16 pad; +}; + /* If the command data <= 16 bytes, use the shared memory directly. * otherwise, use variable length configuration descriptor. */ @@ -750,6 +760,7 @@ union Vmxnet3_CmdInfo { struct Vmxnet3_VariableLenConfDesc varConf; struct Vmxnet3_SetPolling setPolling; enum Vmxnet3_RSSField setRssFields; + struct Vmxnet3_RingBufferSize ringBufSize; __le64 data[2]; }; diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index 94ca3bc1d540..c2f99c01a130 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -2680,6 +2680,23 @@ vmxnet3_setup_driver_shared(struct vmxnet3_adapter *adapter) /* the rest are already zeroed */ } +static void +vmxnet3_init_bufsize(struct vmxnet3_adapter *adapter) +{ + struct Vmxnet3_DriverShared *shared = adapter->shared; + union Vmxnet3_CmdInfo *cmdInfo = &shared->cu.cmdInfo; + unsigned long flags; + + if (!VMXNET3_VERSION_GE_7(adapter)) + return; + + cmdInfo->ringBufSize = adapter->ringBufSize; + spin_lock_irqsave(&adapter->cmd_lock, flags); + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, + VMXNET3_CMD_SET_RING_BUFFER_SIZE); + spin_unlock_irqrestore(&adapter->cmd_lock, flags); +} + static void vmxnet3_init_coalesce(struct vmxnet3_adapter *adapter) { @@ -2818,6 +2835,7 @@ vmxnet3_activate_dev(struct vmxnet3_adapter *adapter) goto activate_err; } + vmxnet3_init_bufsize(adapter); vmxnet3_init_coalesce(adapter); vmxnet3_init_rssfields(adapter); @@ -2991,19 +3009,29 @@ static void vmxnet3_adjust_rx_ring_size(struct vmxnet3_adapter *adapter) { size_t sz, i, ring0_size, ring1_size, comp_size; - if (adapter->netdev->mtu <= VMXNET3_MAX_SKB_BUF_SIZE - - VMXNET3_MAX_ETH_HDR_SIZE) { - adapter->skb_buf_size = adapter->netdev->mtu + - VMXNET3_MAX_ETH_HDR_SIZE; - if (adapter->skb_buf_size < VMXNET3_MIN_T0_BUF_SIZE) - adapter->skb_buf_size = VMXNET3_MIN_T0_BUF_SIZE; + /* With version7 ring1 will have only T0 buffers */ + if (!VMXNET3_VERSION_GE_7(adapter)) { + if (adapter->netdev->mtu <= VMXNET3_MAX_SKB_BUF_SIZE - + VMXNET3_MAX_ETH_HDR_SIZE) { + adapter->skb_buf_size = adapter->netdev->mtu + + VMXNET3_MAX_ETH_HDR_SIZE; + if (adapter->skb_buf_size < VMXNET3_MIN_T0_BUF_SIZE) + adapter->skb_buf_size = VMXNET3_MIN_T0_BUF_SIZE; - adapter->rx_buf_per_pkt = 1; + adapter->rx_buf_per_pkt = 1; + } else { + adapter->skb_buf_size = VMXNET3_MAX_SKB_BUF_SIZE; + sz = adapter->netdev->mtu - VMXNET3_MAX_SKB_BUF_SIZE + + VMXNET3_MAX_ETH_HDR_SIZE; + adapter->rx_buf_per_pkt = 1 + (sz + PAGE_SIZE - 1) / PAGE_SIZE; + } } else { - adapter->skb_buf_size = VMXNET3_MAX_SKB_BUF_SIZE; - sz = adapter->netdev->mtu - VMXNET3_MAX_SKB_BUF_SIZE + - VMXNET3_MAX_ETH_HDR_SIZE; - adapter->rx_buf_per_pkt = 1 + (sz + PAGE_SIZE - 1) / PAGE_SIZE; + adapter->skb_buf_size = min((int)adapter->netdev->mtu + VMXNET3_MAX_ETH_HDR_SIZE, + VMXNET3_MAX_SKB_BUF_SIZE); + adapter->rx_buf_per_pkt = 1; + adapter->ringBufSize.ring1BufSizeType0 = cpu_to_le16(adapter->skb_buf_size); + adapter->ringBufSize.ring1BufSizeType1 = 0; + adapter->ringBufSize.ring2BufSizeType1 = cpu_to_le16(PAGE_SIZE); } /* @@ -3019,6 +3047,11 @@ vmxnet3_adjust_rx_ring_size(struct vmxnet3_adapter *adapter) ring1_size = (ring1_size + sz - 1) / sz * sz; ring1_size = min_t(u32, ring1_size, VMXNET3_RX_RING2_MAX_SIZE / sz * sz); + /* For v7 and later, keep ring size power of 2 for UPT */ + if (VMXNET3_VERSION_GE_7(adapter)) { + ring0_size = rounddown_pow_of_two(ring0_size); + ring1_size = rounddown_pow_of_two(ring1_size); + } comp_size = ring0_size + ring1_size; for (i = 0; i < adapter->num_rx_queues; i++) { diff --git a/drivers/net/vmxnet3/vmxnet3_ethtool.c b/drivers/net/vmxnet3/vmxnet3_ethtool.c index 397b268f7bc5..ce3993282c0f 100644 --- a/drivers/net/vmxnet3/vmxnet3_ethtool.c +++ b/drivers/net/vmxnet3/vmxnet3_ethtool.c @@ -718,6 +718,13 @@ vmxnet3_set_ringparam(struct net_device *netdev, new_rx_ring2_size = min_t(u32, new_rx_ring2_size, VMXNET3_RX_RING2_MAX_SIZE); + /* For v7 and later, keep ring size power of 2 for UPT */ + if (VMXNET3_VERSION_GE_7(adapter)) { + new_tx_ring_size = rounddown_pow_of_two(new_tx_ring_size); + new_rx_ring_size = rounddown_pow_of_two(new_rx_ring_size); + new_rx_ring2_size = rounddown_pow_of_two(new_rx_ring2_size); + } + /* rx data ring buffer size has to be a multiple of * VMXNET3_RXDATA_DESC_SIZE_ALIGN */ diff --git a/drivers/net/vmxnet3/vmxnet3_int.h b/drivers/net/vmxnet3/vmxnet3_int.h index 5b495ef253e8..cb87731f5f1c 100644 --- a/drivers/net/vmxnet3/vmxnet3_int.h +++ b/drivers/net/vmxnet3/vmxnet3_int.h @@ -408,6 +408,7 @@ struct vmxnet3_adapter { dma_addr_t pm_conf_pa; dma_addr_t rss_conf_pa; bool queuesExtEnabled; + struct Vmxnet3_RingBufferSize ringBufSize; u32 devcap_supported[8]; u32 ptcap_supported[8]; u32 dev_caps[8]; @@ -449,7 +450,7 @@ struct vmxnet3_adapter { /* must be a multiple of VMXNET3_RING_SIZE_ALIGN */ #define VMXNET3_DEF_TX_RING_SIZE 512 #define VMXNET3_DEF_RX_RING_SIZE 1024 -#define VMXNET3_DEF_RX_RING2_SIZE 256 +#define VMXNET3_DEF_RX_RING2_SIZE 512 #define VMXNET3_DEF_RXDATA_DESC_SIZE 128 From d2857b99a74b082368ee80f359372faa1d051043 Mon Sep 17 00:00:00 2001 From: Ronak Doshi Date: Tue, 7 Jun 2022 20:23:51 -0700 Subject: [PATCH 18/21] vmxnet3: limit number of TXDs used for TSO packet Currently, vmxnet3 does not have a limit on number of descriptors used for a TSO packet. However, with UPT, for hardware performance reasons, this patch limits the number of transmit descriptors to 24 for a TSO packet. Signed-off-by: Ronak Doshi Acked-by: Guolin Yang Signed-off-by: Paolo Abeni --- drivers/net/vmxnet3/vmxnet3_defs.h | 2 ++ drivers/net/vmxnet3/vmxnet3_drv.c | 17 +++++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/drivers/net/vmxnet3/vmxnet3_defs.h b/drivers/net/vmxnet3/vmxnet3_defs.h index 415e4c9993ef..cb9dc72f2b3d 100644 --- a/drivers/net/vmxnet3/vmxnet3_defs.h +++ b/drivers/net/vmxnet3/vmxnet3_defs.h @@ -400,6 +400,8 @@ union Vmxnet3_GenericDesc { /* max # of tx descs for a non-tso pkt */ #define VMXNET3_MAX_TXD_PER_PKT 16 +/* max # of tx descs for a tso pkt */ +#define VMXNET3_MAX_TSO_TXD_PER_PKT 24 /* Max size of a single rx buffer */ #define VMXNET3_MAX_RX_BUF_SIZE ((1 << 14) - 1) diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index c2f99c01a130..2ee3a2e39f10 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -1061,6 +1061,23 @@ vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq, } tq->stats.copy_skb_header++; } + if (unlikely(count > VMXNET3_MAX_TSO_TXD_PER_PKT)) { + /* tso pkts must not use more than + * VMXNET3_MAX_TSO_TXD_PER_PKT entries + */ + if (skb_linearize(skb) != 0) { + tq->stats.drop_too_many_frags++; + goto drop_pkt; + } + tq->stats.linearized++; + + /* recalculate the # of descriptors to use */ + count = VMXNET3_TXD_NEEDED(skb_headlen(skb)) + 1; + if (unlikely(count > VMXNET3_MAX_TSO_TXD_PER_PKT)) { + tq->stats.drop_too_many_frags++; + goto drop_pkt; + } + } if (skb->encapsulation) { vmxnet3_prepare_inner_tso(skb, &ctx); } else { From 60cafa0395c2bed44d13277ed328317ed16a58c0 Mon Sep 17 00:00:00 2001 From: Ronak Doshi Date: Tue, 7 Jun 2022 20:23:52 -0700 Subject: [PATCH 19/21] vmxnet3: use ext1 field to indicate encapsulated packet Till vmxnet3 version 6, om field of transmit descriptor was used to indicate encapsulated offload packet and msscof was used to indirectly indicate TSO/CSO. From version 7 and later, ext1 field will be used to indicate whether packet is encapsulated or not and om fields will continue to indicate if the packet is TSO or CSO. Signed-off-by: Ronak Doshi Acked-by: Guolin Yang Signed-off-by: Paolo Abeni --- drivers/net/vmxnet3/vmxnet3_defs.h | 14 ++++++++------ drivers/net/vmxnet3/vmxnet3_drv.c | 18 +++++++++++++++--- 2 files changed, 23 insertions(+), 9 deletions(-) diff --git a/drivers/net/vmxnet3/vmxnet3_defs.h b/drivers/net/vmxnet3/vmxnet3_defs.h index cb9dc72f2b3d..41d6767283a6 100644 --- a/drivers/net/vmxnet3/vmxnet3_defs.h +++ b/drivers/net/vmxnet3/vmxnet3_defs.h @@ -148,17 +148,17 @@ struct Vmxnet3_TxDesc { #ifdef __BIG_ENDIAN_BITFIELD u32 msscof:14; /* MSS, checksum offset, flags */ - u32 ext1:1; + u32 ext1:1; /* set to 1 to indicate inner csum/tso, vmxnet3 v7 */ u32 dtype:1; /* descriptor type */ - u32 oco:1; + u32 oco:1; /* Outer csum offload */ u32 gen:1; /* generation bit */ u32 len:14; #else u32 len:14; u32 gen:1; /* generation bit */ - u32 oco:1; + u32 oco:1; /* Outer csum offload */ u32 dtype:1; /* descriptor type */ - u32 ext1:1; + u32 ext1:1; /* set to 1 to indicate inner csum/tso, vmxnet3 v7 */ u32 msscof:14; /* MSS, checksum offset, flags */ #endif /* __BIG_ENDIAN_BITFIELD */ @@ -262,11 +262,13 @@ struct Vmxnet3_RxCompDesc { u32 rqID:10; /* rx queue/ring ID */ u32 sop:1; /* Start of Packet */ u32 eop:1; /* End of Packet */ - u32 ext1:2; + u32 ext1:2; /* bit 0: indicating v4/v6/.. is for inner header */ + /* bit 1: indicating rssType is based on inner header */ u32 rxdIdx:12; /* Index of the RxDesc */ #else u32 rxdIdx:12; /* Index of the RxDesc */ - u32 ext1:2; + u32 ext1:2; /* bit 0: indicating v4/v6/.. is for inner header */ + /* bit 1: indicating rssType is based on inner header */ u32 eop:1; /* End of Packet */ u32 sop:1; /* Start of Packet */ u32 rqID:10; /* rx queue/ring ID */ diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index 2ee3a2e39f10..5c42b4a008e8 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -1161,7 +1161,12 @@ vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq, if (ctx.mss) { if (VMXNET3_VERSION_GE_4(adapter) && skb->encapsulation) { gdesc->txd.hlen = ctx.l4_offset + ctx.l4_hdr_size; - gdesc->txd.om = VMXNET3_OM_ENCAP; + if (VMXNET3_VERSION_GE_7(adapter)) { + gdesc->txd.om = VMXNET3_OM_TSO; + gdesc->txd.ext1 = 1; + } else { + gdesc->txd.om = VMXNET3_OM_ENCAP; + } gdesc->txd.msscof = ctx.mss; if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM) @@ -1178,8 +1183,15 @@ vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq, skb->encapsulation) { gdesc->txd.hlen = ctx.l4_offset + ctx.l4_hdr_size; - gdesc->txd.om = VMXNET3_OM_ENCAP; - gdesc->txd.msscof = 0; /* Reserved */ + if (VMXNET3_VERSION_GE_7(adapter)) { + gdesc->txd.om = VMXNET3_OM_CSUM; + gdesc->txd.msscof = ctx.l4_offset + + skb->csum_offset; + gdesc->txd.ext1 = 1; + } else { + gdesc->txd.om = VMXNET3_OM_ENCAP; + gdesc->txd.msscof = 0; /* Reserved */ + } } else { gdesc->txd.hlen = ctx.l4_offset; gdesc->txd.om = VMXNET3_OM_CSUM; From acc38e041bd304621d4f59cea4849747d13bba9c Mon Sep 17 00:00:00 2001 From: Ronak Doshi Date: Tue, 7 Jun 2022 20:23:53 -0700 Subject: [PATCH 20/21] vmxnet3: update to version 7 With all vmxnet3 version 7 changes incorporated in the vmxnet3 driver, the driver can configure emulation to run at vmxnet3 version 7, provided the emulation advertises support for version 7. Signed-off-by: Ronak Doshi Acked-by: Guolin Yang Signed-off-by: Paolo Abeni --- drivers/net/vmxnet3/vmxnet3_drv.c | 7 ++++++- drivers/net/vmxnet3/vmxnet3_int.h | 4 ++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index 5c42b4a008e8..1565e1808a19 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -3659,7 +3659,12 @@ vmxnet3_probe_device(struct pci_dev *pdev, goto err_alloc_pci; ver = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_VRRS); - if (ver & (1 << VMXNET3_REV_6)) { + if (ver & (1 << VMXNET3_REV_7)) { + VMXNET3_WRITE_BAR1_REG(adapter, + VMXNET3_REG_VRRS, + 1 << VMXNET3_REV_7); + adapter->version = VMXNET3_REV_7 + 1; + } else if (ver & (1 << VMXNET3_REV_6)) { VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_VRRS, 1 << VMXNET3_REV_6); diff --git a/drivers/net/vmxnet3/vmxnet3_int.h b/drivers/net/vmxnet3/vmxnet3_int.h index cb87731f5f1c..3367db23aa13 100644 --- a/drivers/net/vmxnet3/vmxnet3_int.h +++ b/drivers/net/vmxnet3/vmxnet3_int.h @@ -69,12 +69,12 @@ /* * Version numbers */ -#define VMXNET3_DRIVER_VERSION_STRING "1.6.0.0-k" +#define VMXNET3_DRIVER_VERSION_STRING "1.7.0.0-k" /* Each byte of this 32-bit integer encodes a version number in * VMXNET3_DRIVER_VERSION_STRING. */ -#define VMXNET3_DRIVER_VERSION_NUM 0x01060000 +#define VMXNET3_DRIVER_VERSION_NUM 0x01070000 #if defined(CONFIG_PCI_MSI) /* RSS only makes sense if MSI-X is supported. */ From 263efe85a4b618037e1003c9636562d6cbb5f9f3 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Wed, 8 Jun 2022 11:08:18 +0300 Subject: [PATCH 21/21] net: macb: change return type for gem_ptp_set_one_step_sync() gem_ptp_set_one_step_sync() always returns zero thus change its return type to void. Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/20220608080818.1495044-1-claudiu.beznea@microchip.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/cadence/macb_ptp.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb_ptp.c b/drivers/net/ethernet/cadence/macb_ptp.c index 9559c16078f9..e6cb20aaa76a 100644 --- a/drivers/net/ethernet/cadence/macb_ptp.c +++ b/drivers/net/ethernet/cadence/macb_ptp.c @@ -434,7 +434,7 @@ int gem_get_hwtst(struct net_device *dev, struct ifreq *rq) return 0; } -static int gem_ptp_set_one_step_sync(struct macb *bp, u8 enable) +static void gem_ptp_set_one_step_sync(struct macb *bp, u8 enable) { u32 reg_val; @@ -444,8 +444,6 @@ static int gem_ptp_set_one_step_sync(struct macb *bp, u8 enable) macb_writel(bp, NCR, reg_val | MACB_BIT(OSSMODE)); else macb_writel(bp, NCR, reg_val & ~MACB_BIT(OSSMODE)); - - return 0; } int gem_set_hwtst(struct net_device *dev, struct ifreq *ifr, int cmd) @@ -468,8 +466,7 @@ int gem_set_hwtst(struct net_device *dev, struct ifreq *ifr, int cmd) case HWTSTAMP_TX_OFF: break; case HWTSTAMP_TX_ONESTEP_SYNC: - if (gem_ptp_set_one_step_sync(bp, 1) != 0) - return -ERANGE; + gem_ptp_set_one_step_sync(bp, 1); tx_bd_control = TSTAMP_ALL_FRAMES; break; case HWTSTAMP_TX_ON: