diff --git a/Documentation/devicetree/bindings/net/fsl-fman.txt b/Documentation/devicetree/bindings/net/fsl-fman.txt index c00fb0d22c7b..020337f3c05f 100644 --- a/Documentation/devicetree/bindings/net/fsl-fman.txt +++ b/Documentation/devicetree/bindings/net/fsl-fman.txt @@ -410,6 +410,15 @@ PROPERTIES The settings and programming routines for internal/external MDIO are different. Must be included for internal MDIO. +- fsl,erratum-a009885 + Usage: optional + Value type: + Definition: Indicates the presence of the A009885 + erratum describing that the contents of MDIO_DATA may + become corrupt unless it is read within 16 MDC cycles + of MDIO_CFG[BSY] being cleared, when performing an + MDIO read operation. + - fsl,erratum-a011043 Usage: optional Value type: diff --git a/Documentation/devicetree/bindings/net/oxnas-dwmac.txt b/Documentation/devicetree/bindings/net/oxnas-dwmac.txt index d7117a22fd87..27db496f1ce8 100644 --- a/Documentation/devicetree/bindings/net/oxnas-dwmac.txt +++ b/Documentation/devicetree/bindings/net/oxnas-dwmac.txt @@ -9,6 +9,9 @@ Required properties on all platforms: - compatible: For the OX820 SoC, it should be : - "oxsemi,ox820-dwmac" to select glue - "snps,dwmac-3.512" to select IP version. + For the OX810SE SoC, it should be : + - "oxsemi,ox810se-dwmac" to select glue + - "snps,dwmac-3.512" to select IP version. - clocks: Should contain phandles to the following clocks - clock-names: Should contain the following: diff --git a/Documentation/devicetree/bindings/pwm/pwm.yaml b/Documentation/devicetree/bindings/pwm/pwm.yaml index 2effe6c0de6b..3c01f85029e5 100644 --- a/Documentation/devicetree/bindings/pwm/pwm.yaml +++ b/Documentation/devicetree/bindings/pwm/pwm.yaml @@ -9,6 +9,8 @@ title: PWM controllers (providers) maintainers: - Thierry Reding +select: false + properties: $nodename: pattern: "^pwm(@.*|-[0-9a-f])*$" diff --git a/Documentation/filesystems/ceph.rst b/Documentation/filesystems/ceph.rst index 7d2ef4e27273..4942e018db85 100644 --- a/Documentation/filesystems/ceph.rst +++ b/Documentation/filesystems/ceph.rst @@ -82,7 +82,7 @@ Mount Syntax The basic mount syntax is:: - # mount -t ceph monip[:port][,monip2[:port]...]:/[subdir] mnt + # mount -t ceph user@fsid.fs_name=/[subdir] mnt -o mon_addr=monip1[:port][/monip2[:port]] You only need to specify a single monitor, as the client will get the full list when it connects. (However, if the monitor you specify @@ -90,16 +90,35 @@ happens to be down, the mount won't succeed.) The port can be left off if the monitor is using the default. So if the monitor is at 1.2.3.4:: - # mount -t ceph 1.2.3.4:/ /mnt/ceph + # mount -t ceph cephuser@07fe3187-00d9-42a3-814b-72a4d5e7d5be.cephfs=/ /mnt/ceph -o mon_addr=1.2.3.4 is sufficient. If /sbin/mount.ceph is installed, a hostname can be -used instead of an IP address. +used instead of an IP address and the cluster FSID can be left out +(as the mount helper will fill it in by reading the ceph configuration +file):: + # mount -t ceph cephuser@cephfs=/ /mnt/ceph -o mon_addr=mon-addr +Multiple monitor addresses can be passed by separating each address with a slash (`/`):: + + # mount -t ceph cephuser@cephfs=/ /mnt/ceph -o mon_addr=192.168.1.100/192.168.1.101 + +When using the mount helper, monitor address can be read from ceph +configuration file if available. Note that, the cluster FSID (passed as part +of the device string) is validated by checking it with the FSID reported by +the monitor. Mount Options ============= + mon_addr=ip_address[:port][/ip_address[:port]] + Monitor address to the cluster. This is used to bootstrap the + connection to the cluster. Once connection is established, the + monitor addresses in the monitor map are followed. + + fsid=cluster-id + FSID of the cluster (from `ceph fsid` command). + ip=A.B.C.D[:N] Specify the IP and/or port the client should bind to locally. There is normally not much reason to do this. If the IP is not diff --git a/MAINTAINERS b/MAINTAINERS index 1d86fba5cf4b..5b9fc577601a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16814,6 +16814,7 @@ M: Heiko Carstens M: Vasily Gorbik M: Christian Borntraeger R: Alexander Gordeev +R: Sven Schnelle L: linux-s390@vger.kernel.org S: Supported W: http://www.ibm.com/developerworks/linux/linux390/ diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3l-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3l-0.dtsi index c90702b04a53..48e5cd61599c 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3l-0.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3l-0.dtsi @@ -79,6 +79,7 @@ fman0: fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xfc000 0x1000>; + fsl,erratum-a009885; }; xmdio0: mdio@fd000 { @@ -86,6 +87,7 @@ fman0: fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xfd000 0x1000>; + fsl,erratum-a009885; }; }; diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h index 0d90cbeb89b4..e3f12db46cfc 100644 --- a/arch/s390/include/asm/cpu_mf.h +++ b/arch/s390/include/asm/cpu_mf.h @@ -109,7 +109,9 @@ struct hws_basic_entry { unsigned int AS:2; /* 29-30 PSW address-space control */ unsigned int I:1; /* 31 entry valid or invalid */ unsigned int CL:2; /* 32-33 Configuration Level */ - unsigned int:14; + unsigned int H:1; /* 34 Host Indicator */ + unsigned int LS:1; /* 35 Limited Sampling */ + unsigned int:12; unsigned int prim_asn:16; /* primary ASN */ unsigned long long ia; /* Instruction Address */ unsigned long long gpp; /* Guest Program Parameter */ diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index ce550d06abc3..147cb3534ce4 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -49,51 +49,85 @@ int __get_user_bad(void) __attribute__((noreturn)); #ifdef CONFIG_HAVE_MARCH_Z10_FEATURES -#define __put_get_user_asm(to, from, size, insn) \ -({ \ - int __rc; \ - \ - asm volatile( \ - insn " 0,%[spec]\n" \ - "0: mvcos %[_to],%[_from],%[_size]\n" \ - "1: xr %[rc],%[rc]\n" \ - "2:\n" \ - ".pushsection .fixup, \"ax\"\n" \ - "3: lhi %[rc],%[retval]\n" \ - " jg 2b\n" \ - ".popsection\n" \ - EX_TABLE(0b,3b) EX_TABLE(1b,3b) \ - : [rc] "=&d" (__rc), [_to] "+Q" (*(to)) \ - : [_size] "d" (size), [_from] "Q" (*(from)), \ - [retval] "K" (-EFAULT), [spec] "K" (0x81UL) \ - : "cc", "0"); \ - __rc; \ +union oac { + unsigned int val; + struct { + struct { + unsigned short key : 4; + unsigned short : 4; + unsigned short as : 2; + unsigned short : 4; + unsigned short k : 1; + unsigned short a : 1; + } oac1; + struct { + unsigned short key : 4; + unsigned short : 4; + unsigned short as : 2; + unsigned short : 4; + unsigned short k : 1; + unsigned short a : 1; + } oac2; + }; +}; + +#define __put_get_user_asm(to, from, size, oac_spec) \ +({ \ + int __rc; \ + \ + asm volatile( \ + " lr 0,%[spec]\n" \ + "0: mvcos %[_to],%[_from],%[_size]\n" \ + "1: xr %[rc],%[rc]\n" \ + "2:\n" \ + ".pushsection .fixup, \"ax\"\n" \ + "3: lhi %[rc],%[retval]\n" \ + " jg 2b\n" \ + ".popsection\n" \ + EX_TABLE(0b,3b) EX_TABLE(1b,3b) \ + : [rc] "=&d" (__rc), [_to] "+Q" (*(to)) \ + : [_size] "d" (size), [_from] "Q" (*(from)), \ + [retval] "K" (-EFAULT), [spec] "d" (oac_spec.val) \ + : "cc", "0"); \ + __rc; \ }) +#define __put_user_asm(to, from, size) \ + __put_get_user_asm(to, from, size, ((union oac) { \ + .oac1.as = PSW_BITS_AS_SECONDARY, \ + .oac1.a = 1 \ + })) + +#define __get_user_asm(to, from, size) \ + __put_get_user_asm(to, from, size, ((union oac) { \ + .oac2.as = PSW_BITS_AS_SECONDARY, \ + .oac2.a = 1 \ + })) \ + static __always_inline int __put_user_fn(void *x, void __user *ptr, unsigned long size) { int rc; switch (size) { case 1: - rc = __put_get_user_asm((unsigned char __user *)ptr, - (unsigned char *)x, - size, "llilh"); + rc = __put_user_asm((unsigned char __user *)ptr, + (unsigned char *)x, + size); break; case 2: - rc = __put_get_user_asm((unsigned short __user *)ptr, - (unsigned short *)x, - size, "llilh"); + rc = __put_user_asm((unsigned short __user *)ptr, + (unsigned short *)x, + size); break; case 4: - rc = __put_get_user_asm((unsigned int __user *)ptr, - (unsigned int *)x, - size, "llilh"); + rc = __put_user_asm((unsigned int __user *)ptr, + (unsigned int *)x, + size); break; case 8: - rc = __put_get_user_asm((unsigned long __user *)ptr, - (unsigned long *)x, - size, "llilh"); + rc = __put_user_asm((unsigned long __user *)ptr, + (unsigned long *)x, + size); break; default: __put_user_bad(); @@ -108,24 +142,24 @@ static __always_inline int __get_user_fn(void *x, const void __user *ptr, unsign switch (size) { case 1: - rc = __put_get_user_asm((unsigned char *)x, - (unsigned char __user *)ptr, - size, "lghi"); + rc = __get_user_asm((unsigned char *)x, + (unsigned char __user *)ptr, + size); break; case 2: - rc = __put_get_user_asm((unsigned short *)x, - (unsigned short __user *)ptr, - size, "lghi"); + rc = __get_user_asm((unsigned short *)x, + (unsigned short __user *)ptr, + size); break; case 4: - rc = __put_get_user_asm((unsigned int *)x, - (unsigned int __user *)ptr, - size, "lghi"); + rc = __get_user_asm((unsigned int *)x, + (unsigned int __user *)ptr, + size); break; case 8: - rc = __put_get_user_asm((unsigned long *)x, - (unsigned long __user *)ptr, - size, "lghi"); + rc = __get_user_asm((unsigned long *)x, + (unsigned long __user *)ptr, + size); break; default: __get_user_bad(); diff --git a/arch/s390/kernel/perf_cpum_cf_common.c b/arch/s390/kernel/perf_cpum_cf_common.c index 30f0242de4a5..8ee48672233f 100644 --- a/arch/s390/kernel/perf_cpum_cf_common.c +++ b/arch/s390/kernel/perf_cpum_cf_common.c @@ -178,7 +178,7 @@ size_t cpum_cf_ctrset_size(enum cpumf_ctr_set ctrset, case CPUMF_CTR_SET_CRYPTO: if (info->csvn >= 1 && info->csvn <= 5) ctrset_size = 16; - else if (info->csvn == 6) + else if (info->csvn == 6 || info->csvn == 7) ctrset_size = 20; break; case CPUMF_CTR_SET_EXT: @@ -188,7 +188,7 @@ size_t cpum_cf_ctrset_size(enum cpumf_ctr_set ctrset, ctrset_size = 48; else if (info->csvn >= 3 && info->csvn <= 5) ctrset_size = 128; - else if (info->csvn == 6) + else if (info->csvn == 6 || info->csvn == 7) ctrset_size = 160; break; case CPUMF_CTR_SET_MT_DIAG: diff --git a/arch/s390/kernel/perf_cpum_cf_events.c b/arch/s390/kernel/perf_cpum_cf_events.c index 37265f551a11..52c1fe23b823 100644 --- a/arch/s390/kernel/perf_cpum_cf_events.c +++ b/arch/s390/kernel/perf_cpum_cf_events.c @@ -344,7 +344,7 @@ static struct attribute *cpumcf_svn_12345_pmu_event_attr[] __initdata = { NULL, }; -static struct attribute *cpumcf_svn_6_pmu_event_attr[] __initdata = { +static struct attribute *cpumcf_svn_67_pmu_event_attr[] __initdata = { CPUMF_EVENT_PTR(cf_svn_12345, PRNG_FUNCTIONS), CPUMF_EVENT_PTR(cf_svn_12345, PRNG_CYCLES), CPUMF_EVENT_PTR(cf_svn_12345, PRNG_BLOCKED_FUNCTIONS), @@ -715,8 +715,8 @@ __init const struct attribute_group **cpumf_cf_event_group(void) case 1 ... 5: csvn = cpumcf_svn_12345_pmu_event_attr; break; - case 6: - csvn = cpumcf_svn_6_pmu_event_attr; + case 6 ... 7: + csvn = cpumcf_svn_67_pmu_event_attr; break; default: csvn = none; diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index db62def4ef28..332a49965130 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -1179,7 +1179,7 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt, sample = (struct hws_basic_entry *) *sdbt; while ((unsigned long *) sample < (unsigned long *) te) { /* Check for an empty sample */ - if (!sample->def) + if (!sample->def || sample->LS) break; /* Update perf event period */ diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c index a596e69d3c47..8a5d21461889 100644 --- a/arch/s390/lib/uaccess.c +++ b/arch/s390/lib/uaccess.c @@ -62,10 +62,14 @@ static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr unsigned long size) { unsigned long tmp1, tmp2; + union oac spec = { + .oac2.as = PSW_BITS_AS_SECONDARY, + .oac2.a = 1, + }; tmp1 = -4096UL; asm volatile( - " lghi 0,%[spec]\n" + " lr 0,%[spec]\n" "0: .insn ss,0xc80000000000,0(%0,%2),0(%1),0\n" "6: jz 4f\n" "1: algr %0,%3\n" @@ -84,7 +88,7 @@ static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr "5:\n" EX_TABLE(0b,2b) EX_TABLE(3b,5b) EX_TABLE(6b,2b) EX_TABLE(7b,5b) : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2) - : [spec] "K" (0x81UL) + : [spec] "d" (spec.val) : "cc", "memory", "0"); return size; } @@ -135,10 +139,14 @@ static inline unsigned long copy_to_user_mvcos(void __user *ptr, const void *x, unsigned long size) { unsigned long tmp1, tmp2; + union oac spec = { + .oac1.as = PSW_BITS_AS_SECONDARY, + .oac1.a = 1, + }; tmp1 = -4096UL; asm volatile( - " llilh 0,%[spec]\n" + " lr 0,%[spec]\n" "0: .insn ss,0xc80000000000,0(%0,%1),0(%2),0\n" "6: jz 4f\n" "1: algr %0,%3\n" @@ -157,7 +165,7 @@ static inline unsigned long copy_to_user_mvcos(void __user *ptr, const void *x, "5:\n" EX_TABLE(0b,2b) EX_TABLE(3b,5b) EX_TABLE(6b,2b) EX_TABLE(7b,5b) : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2) - : [spec] "K" (0x81UL) + : [spec] "d" (spec.val) : "cc", "memory", "0"); return size; } @@ -207,10 +215,14 @@ EXPORT_SYMBOL(raw_copy_to_user); static inline unsigned long clear_user_mvcos(void __user *to, unsigned long size) { unsigned long tmp1, tmp2; + union oac spec = { + .oac1.as = PSW_BITS_AS_SECONDARY, + .oac1.a = 1, + }; tmp1 = -4096UL; asm volatile( - " llilh 0,%[spec]\n" + " lr 0,%[spec]\n" "0: .insn ss,0xc80000000000,0(%0,%1),0(%4),0\n" " jz 4f\n" "1: algr %0,%2\n" @@ -228,7 +240,7 @@ static inline unsigned long clear_user_mvcos(void __user *to, unsigned long size "5:\n" EX_TABLE(0b,2b) EX_TABLE(3b,5b) : "+a" (size), "+a" (to), "+a" (tmp1), "=a" (tmp2) - : "a" (empty_zero_page), [spec] "K" (0x81UL) + : "a" (empty_zero_page), [spec] "d" (spec.val) : "cc", "memory", "0"); return size; } diff --git a/drivers/atm/iphase.c b/drivers/atm/iphase.c index bc8e8d9f176b..3e726ee91fdc 100644 --- a/drivers/atm/iphase.c +++ b/drivers/atm/iphase.c @@ -178,7 +178,6 @@ static void ia_hack_tcq(IADEV *dev) { static u16 get_desc (IADEV *dev, struct ia_vcc *iavcc) { u_short desc_num, i; - struct sk_buff *skb; struct ia_vcc *iavcc_r = NULL; unsigned long delta; static unsigned long timer = 0; @@ -202,8 +201,7 @@ static u16 get_desc (IADEV *dev, struct ia_vcc *iavcc) { else dev->ffL.tcq_rd -= 2; *(u_short *)(dev->seg_ram + dev->ffL.tcq_rd) = i+1; - if (!(skb = dev->desc_tbl[i].txskb) || - !(iavcc_r = dev->desc_tbl[i].iavcc)) + if (!dev->desc_tbl[i].txskb || !(iavcc_r = dev->desc_tbl[i].iavcc)) printk("Fatal err, desc table vcc or skb is NULL\n"); else iavcc_r->vc_desc_cnt--; diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 8f140da1efe3..4203cdab8abf 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -6189,7 +6189,7 @@ static inline size_t next_token(const char **buf) * These are the characters that produce nonzero for * isspace() in the "C" and "POSIX" locales. */ - const char *spaces = " \f\n\r\t\v"; + static const char spaces[] = " \f\n\r\t\v"; *buf += strspn(*buf, spaces); /* Find start of token */ @@ -6495,7 +6495,8 @@ static int rbd_add_parse_args(const char *buf, pctx.opts->exclusive = RBD_EXCLUSIVE_DEFAULT; pctx.opts->trim = RBD_TRIM_DEFAULT; - ret = ceph_parse_mon_ips(mon_addrs, mon_addrs_size, pctx.copts, NULL); + ret = ceph_parse_mon_ips(mon_addrs, mon_addrs_size, pctx.copts, NULL, + ','); if (ret) goto out_err; diff --git a/drivers/gpio/gpio-idt3243x.c b/drivers/gpio/gpio-idt3243x.c index 50003ad2e589..52b8b72ded77 100644 --- a/drivers/gpio/gpio-idt3243x.c +++ b/drivers/gpio/gpio-idt3243x.c @@ -132,7 +132,7 @@ static int idt_gpio_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; struct gpio_irq_chip *girq; struct idt_gpio_ctrl *ctrl; - unsigned int parent_irq; + int parent_irq; int ngpios; int ret; @@ -164,8 +164,8 @@ static int idt_gpio_probe(struct platform_device *pdev) return PTR_ERR(ctrl->pic); parent_irq = platform_get_irq(pdev, 0); - if (!parent_irq) - return -EINVAL; + if (parent_irq < 0) + return parent_irq; girq = &ctrl->gc.irq; girq->chip = &idt_gpio_irqchip; diff --git a/drivers/gpio/gpio-mpc8xxx.c b/drivers/gpio/gpio-mpc8xxx.c index 70d6ae20b1da..a964e25ea620 100644 --- a/drivers/gpio/gpio-mpc8xxx.c +++ b/drivers/gpio/gpio-mpc8xxx.c @@ -47,7 +47,7 @@ struct mpc8xxx_gpio_chip { unsigned offset, int value); struct irq_domain *irq; - unsigned int irqn; + int irqn; }; /* @@ -388,8 +388,8 @@ static int mpc8xxx_probe(struct platform_device *pdev) } mpc8xxx_gc->irqn = platform_get_irq(pdev, 0); - if (!mpc8xxx_gc->irqn) - return 0; + if (mpc8xxx_gc->irqn < 0) + return mpc8xxx_gc->irqn; mpc8xxx_gc->irq = irq_domain_create_linear(fwnode, MPC8XXX_GPIO_PINS, diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 07fc603c2fa7..ec498ce70f35 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -3874,8 +3874,8 @@ u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb) skb->l4_hash) return skb->hash; - return __bond_xmit_hash(bond, skb, skb->head, skb->protocol, - skb->mac_header, skb->network_header, + return __bond_xmit_hash(bond, skb, skb->data, skb->protocol, + skb_mac_offset(skb), skb_network_offset(skb), skb_headlen(skb)); } @@ -4884,25 +4884,39 @@ static netdev_tx_t bond_xmit_broadcast(struct sk_buff *skb, struct bonding *bond = netdev_priv(bond_dev); struct slave *slave = NULL; struct list_head *iter; + bool xmit_suc = false; + bool skb_used = false; bond_for_each_slave_rcu(bond, slave, iter) { - if (bond_is_last_slave(bond, slave)) - break; - if (bond_slave_is_up(slave) && slave->link == BOND_LINK_UP) { - struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); + struct sk_buff *skb2; + if (!(bond_slave_is_up(slave) && slave->link == BOND_LINK_UP)) + continue; + + if (bond_is_last_slave(bond, slave)) { + skb2 = skb; + skb_used = true; + } else { + skb2 = skb_clone(skb, GFP_ATOMIC); if (!skb2) { net_err_ratelimited("%s: Error: %s: skb_clone() failed\n", bond_dev->name, __func__); continue; } - bond_dev_queue_xmit(bond, skb2, slave->dev); } - } - if (slave && bond_slave_is_up(slave) && slave->link == BOND_LINK_UP) - return bond_dev_queue_xmit(bond, skb, slave->dev); - return bond_tx_drop(bond_dev, skb); + if (bond_dev_queue_xmit(bond, skb2, slave->dev) == NETDEV_TX_OK) + xmit_suc = true; + } + + if (!skb_used) + dev_kfree_skb_any(skb); + + if (xmit_suc) + return NETDEV_TX_OK; + + atomic_long_inc(&bond_dev->tx_dropped); + return NET_XMIT_DROP; } /*------------------------- Device initialization ---------------------------*/ diff --git a/drivers/net/ethernet/allwinner/sun4i-emac.c b/drivers/net/ethernet/allwinner/sun4i-emac.c index 849de4564709..621ce742ad21 100644 --- a/drivers/net/ethernet/allwinner/sun4i-emac.c +++ b/drivers/net/ethernet/allwinner/sun4i-emac.c @@ -106,9 +106,9 @@ static void emac_update_speed(struct net_device *dev) /* set EMAC SPEED, depend on PHY */ reg_val = readl(db->membase + EMAC_MAC_SUPP_REG); - reg_val &= ~(0x1 << 8); + reg_val &= ~EMAC_MAC_SUPP_100M; if (db->speed == SPEED_100) - reg_val |= 1 << 8; + reg_val |= EMAC_MAC_SUPP_100M; writel(reg_val, db->membase + EMAC_MAC_SUPP_REG); } @@ -264,7 +264,7 @@ static void emac_dma_done_callback(void *arg) /* re enable interrupt */ reg_val = readl(db->membase + EMAC_INT_CTL_REG); - reg_val |= (0x01 << 8); + reg_val |= EMAC_INT_CTL_RX_EN; writel(reg_val, db->membase + EMAC_INT_CTL_REG); db->emacrx_completed_flag = 1; @@ -429,7 +429,7 @@ static unsigned int emac_powerup(struct net_device *ndev) /* initial EMAC */ /* flush RX FIFO */ reg_val = readl(db->membase + EMAC_RX_CTL_REG); - reg_val |= 0x8; + reg_val |= EMAC_RX_CTL_FLUSH_FIFO; writel(reg_val, db->membase + EMAC_RX_CTL_REG); udelay(1); @@ -441,8 +441,8 @@ static unsigned int emac_powerup(struct net_device *ndev) /* set MII clock */ reg_val = readl(db->membase + EMAC_MAC_MCFG_REG); - reg_val &= (~(0xf << 2)); - reg_val |= (0xD << 2); + reg_val &= ~EMAC_MAC_MCFG_MII_CLKD_MASK; + reg_val |= EMAC_MAC_MCFG_MII_CLKD_72; writel(reg_val, db->membase + EMAC_MAC_MCFG_REG); /* clear RX counter */ @@ -506,7 +506,7 @@ static void emac_init_device(struct net_device *dev) /* enable RX/TX0/RX Hlevel interrup */ reg_val = readl(db->membase + EMAC_INT_CTL_REG); - reg_val |= (0xf << 0) | (0x01 << 8); + reg_val |= (EMAC_INT_CTL_TX_EN | EMAC_INT_CTL_TX_ABRT_EN | EMAC_INT_CTL_RX_EN); writel(reg_val, db->membase + EMAC_INT_CTL_REG); spin_unlock_irqrestore(&db->lock, flags); @@ -637,7 +637,9 @@ static void emac_rx(struct net_device *dev) if (!rxcount) { db->emacrx_completed_flag = 1; reg_val = readl(db->membase + EMAC_INT_CTL_REG); - reg_val |= (0xf << 0) | (0x01 << 8); + reg_val |= (EMAC_INT_CTL_TX_EN | + EMAC_INT_CTL_TX_ABRT_EN | + EMAC_INT_CTL_RX_EN); writel(reg_val, db->membase + EMAC_INT_CTL_REG); /* had one stuck? */ @@ -669,7 +671,9 @@ static void emac_rx(struct net_device *dev) writel(reg_val | EMAC_CTL_RX_EN, db->membase + EMAC_CTL_REG); reg_val = readl(db->membase + EMAC_INT_CTL_REG); - reg_val |= (0xf << 0) | (0x01 << 8); + reg_val |= (EMAC_INT_CTL_TX_EN | + EMAC_INT_CTL_TX_ABRT_EN | + EMAC_INT_CTL_RX_EN); writel(reg_val, db->membase + EMAC_INT_CTL_REG); db->emacrx_completed_flag = 1; @@ -783,20 +787,20 @@ static irqreturn_t emac_interrupt(int irq, void *dev_id) } /* Transmit Interrupt check */ - if (int_status & (0x01 | 0x02)) + if (int_status & EMAC_INT_STA_TX_COMPLETE) emac_tx_done(dev, db, int_status); - if (int_status & (0x04 | 0x08)) + if (int_status & EMAC_INT_STA_TX_ABRT) netdev_info(dev, " ab : %x\n", int_status); /* Re-enable interrupt mask */ if (db->emacrx_completed_flag == 1) { reg_val = readl(db->membase + EMAC_INT_CTL_REG); - reg_val |= (0xf << 0) | (0x01 << 8); + reg_val |= (EMAC_INT_CTL_TX_EN | EMAC_INT_CTL_TX_ABRT_EN | EMAC_INT_CTL_RX_EN); writel(reg_val, db->membase + EMAC_INT_CTL_REG); } else { reg_val = readl(db->membase + EMAC_INT_CTL_REG); - reg_val |= (0xf << 0); + reg_val |= (EMAC_INT_CTL_TX_EN | EMAC_INT_CTL_TX_ABRT_EN); writel(reg_val, db->membase + EMAC_INT_CTL_REG); } @@ -1068,6 +1072,7 @@ out_clk_disable_unprepare: clk_disable_unprepare(db->clk); out_dispose_mapping: irq_dispose_mapping(ndev->irq); + dma_release_channel(db->rx_chan); out_iounmap: iounmap(db->membase); out: diff --git a/drivers/net/ethernet/allwinner/sun4i-emac.h b/drivers/net/ethernet/allwinner/sun4i-emac.h index 38c72d9ec600..90bd9ad77607 100644 --- a/drivers/net/ethernet/allwinner/sun4i-emac.h +++ b/drivers/net/ethernet/allwinner/sun4i-emac.h @@ -38,6 +38,7 @@ #define EMAC_RX_CTL_REG (0x3c) #define EMAC_RX_CTL_AUTO_DRQ_EN (1 << 1) #define EMAC_RX_CTL_DMA_EN (1 << 2) +#define EMAC_RX_CTL_FLUSH_FIFO (1 << 3) #define EMAC_RX_CTL_PASS_ALL_EN (1 << 4) #define EMAC_RX_CTL_PASS_CTL_EN (1 << 5) #define EMAC_RX_CTL_PASS_CRC_ERR_EN (1 << 6) @@ -61,7 +62,21 @@ #define EMAC_RX_IO_DATA_STATUS_OK (1 << 7) #define EMAC_RX_FBC_REG (0x50) #define EMAC_INT_CTL_REG (0x54) +#define EMAC_INT_CTL_RX_EN (1 << 8) +#define EMAC_INT_CTL_TX0_EN (1) +#define EMAC_INT_CTL_TX1_EN (1 << 1) +#define EMAC_INT_CTL_TX_EN (EMAC_INT_CTL_TX0_EN | EMAC_INT_CTL_TX1_EN) +#define EMAC_INT_CTL_TX0_ABRT_EN (0x1 << 2) +#define EMAC_INT_CTL_TX1_ABRT_EN (0x1 << 3) +#define EMAC_INT_CTL_TX_ABRT_EN (EMAC_INT_CTL_TX0_ABRT_EN | EMAC_INT_CTL_TX1_ABRT_EN) #define EMAC_INT_STA_REG (0x58) +#define EMAC_INT_STA_TX0_COMPLETE (0x1) +#define EMAC_INT_STA_TX1_COMPLETE (0x1 << 1) +#define EMAC_INT_STA_TX_COMPLETE (EMAC_INT_STA_TX0_COMPLETE | EMAC_INT_STA_TX1_COMPLETE) +#define EMAC_INT_STA_TX0_ABRT (0x1 << 2) +#define EMAC_INT_STA_TX1_ABRT (0x1 << 3) +#define EMAC_INT_STA_TX_ABRT (EMAC_INT_STA_TX0_ABRT | EMAC_INT_STA_TX1_ABRT) +#define EMAC_INT_STA_RX_COMPLETE (0x1 << 8) #define EMAC_MAC_CTL0_REG (0x5c) #define EMAC_MAC_CTL0_RX_FLOW_CTL_EN (1 << 2) #define EMAC_MAC_CTL0_TX_FLOW_CTL_EN (1 << 3) @@ -87,8 +102,11 @@ #define EMAC_MAC_CLRT_RM (0x0f) #define EMAC_MAC_MAXF_REG (0x70) #define EMAC_MAC_SUPP_REG (0x74) +#define EMAC_MAC_SUPP_100M (0x1 << 8) #define EMAC_MAC_TEST_REG (0x78) #define EMAC_MAC_MCFG_REG (0x7c) +#define EMAC_MAC_MCFG_MII_CLKD_MASK (0xff << 2) +#define EMAC_MAC_MCFG_MII_CLKD_72 (0x0d << 2) #define EMAC_MAC_A0_REG (0x98) #define EMAC_MAC_A1_REG (0x9c) #define EMAC_MAC_A2_REG (0xa0) diff --git a/drivers/net/ethernet/apple/bmac.c b/drivers/net/ethernet/apple/bmac.c index 9a650d1c1bdd..4d2ba30c2fbd 100644 --- a/drivers/net/ethernet/apple/bmac.c +++ b/drivers/net/ethernet/apple/bmac.c @@ -1237,6 +1237,7 @@ static int bmac_probe(struct macio_dev *mdev, const struct of_device_id *match) struct bmac_data *bp; const unsigned char *prop_addr; unsigned char addr[6]; + u8 macaddr[6]; struct net_device *dev; int is_bmac_plus = ((int)match->data) != 0; @@ -1284,7 +1285,9 @@ static int bmac_probe(struct macio_dev *mdev, const struct of_device_id *match) rev = addr[0] == 0 && addr[1] == 0xA0; for (j = 0; j < 6; ++j) - dev->dev_addr[j] = rev ? bitrev8(addr[j]): addr[j]; + macaddr[j] = rev ? bitrev8(addr[j]): addr[j]; + + eth_hw_addr_set(dev, macaddr); /* Enable chip without interrupts for now */ bmac_enable_and_reset_chip(dev); diff --git a/drivers/net/ethernet/apple/mace.c b/drivers/net/ethernet/apple/mace.c index 4b80e3a52a19..6f8c91eb1263 100644 --- a/drivers/net/ethernet/apple/mace.c +++ b/drivers/net/ethernet/apple/mace.c @@ -90,7 +90,7 @@ static void mace_set_timeout(struct net_device *dev); static void mace_tx_timeout(struct timer_list *t); static inline void dbdma_reset(volatile struct dbdma_regs __iomem *dma); static inline void mace_clean_rings(struct mace_data *mp); -static void __mace_set_address(struct net_device *dev, void *addr); +static void __mace_set_address(struct net_device *dev, const void *addr); /* * If we can't get a skbuff when we need it, we use this area for DMA. @@ -112,6 +112,7 @@ static int mace_probe(struct macio_dev *mdev, const struct of_device_id *match) struct net_device *dev; struct mace_data *mp; const unsigned char *addr; + u8 macaddr[ETH_ALEN]; int j, rev, rc = -EBUSY; if (macio_resource_count(mdev) != 3 || macio_irq_count(mdev) != 3) { @@ -167,8 +168,9 @@ static int mace_probe(struct macio_dev *mdev, const struct of_device_id *match) rev = addr[0] == 0 && addr[1] == 0xA0; for (j = 0; j < 6; ++j) { - dev->dev_addr[j] = rev ? bitrev8(addr[j]): addr[j]; + macaddr[j] = rev ? bitrev8(addr[j]): addr[j]; } + eth_hw_addr_set(dev, macaddr); mp->chipid = (in_8(&mp->mace->chipid_hi) << 8) | in_8(&mp->mace->chipid_lo); @@ -369,11 +371,12 @@ static void mace_reset(struct net_device *dev) out_8(&mb->plscc, PORTSEL_GPSI + ENPLSIO); } -static void __mace_set_address(struct net_device *dev, void *addr) +static void __mace_set_address(struct net_device *dev, const void *addr) { struct mace_data *mp = netdev_priv(dev); volatile struct mace __iomem *mb = mp->mace; - unsigned char *p = addr; + const unsigned char *p = addr; + u8 macaddr[ETH_ALEN]; int i; /* load up the hardware address */ @@ -385,7 +388,10 @@ static void __mace_set_address(struct net_device *dev, void *addr) ; } for (i = 0; i < 6; ++i) - out_8(&mb->padr, dev->dev_addr[i] = p[i]); + out_8(&mb->padr, macaddr[i] = p[i]); + + eth_hw_addr_set(dev, macaddr); + if (mp->chipid != BROKEN_ADDRCHG_REV) out_8(&mb->iac, 0); } diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 226f4403cfed..87f1056e29ff 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -4020,10 +4020,12 @@ static int bcmgenet_probe(struct platform_device *pdev) /* Request the WOL interrupt and advertise suspend if available */ priv->wol_irq_disabled = true; - err = devm_request_irq(&pdev->dev, priv->wol_irq, bcmgenet_wol_isr, 0, - dev->name, priv); - if (!err) - device_set_wakeup_capable(&pdev->dev, 1); + if (priv->wol_irq > 0) { + err = devm_request_irq(&pdev->dev, priv->wol_irq, + bcmgenet_wol_isr, 0, dev->name, priv); + if (!err) + device_set_wakeup_capable(&pdev->dev, 1); + } /* Set the needed headroom to account for any possible * features enabling/disabling at runtime diff --git a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c index d04a6c163445..da8d10475a08 100644 --- a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c +++ b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c @@ -32,6 +32,7 @@ #include #include +#include #include #include @@ -99,7 +100,7 @@ cxgb_find_route(struct cxgb4_lld_info *lldi, rt = ip_route_output_ports(&init_net, &fl4, NULL, peer_ip, local_ip, peer_port, local_port, IPPROTO_TCP, - tos, 0); + tos & ~INET_ECN_MASK, 0); if (IS_ERR(rt)) return NULL; n = dst_neigh_lookup(&rt->dst, &peer_ip); diff --git a/drivers/net/ethernet/freescale/xgmac_mdio.c b/drivers/net/ethernet/freescale/xgmac_mdio.c index 5b8b9bcf41a2..266e562bd67a 100644 --- a/drivers/net/ethernet/freescale/xgmac_mdio.c +++ b/drivers/net/ethernet/freescale/xgmac_mdio.c @@ -51,6 +51,7 @@ struct tgec_mdio_controller { struct mdio_fsl_priv { struct tgec_mdio_controller __iomem *mdio_base; bool is_little_endian; + bool has_a009885; bool has_a011043; }; @@ -186,10 +187,10 @@ static int xgmac_mdio_read(struct mii_bus *bus, int phy_id, int regnum) { struct mdio_fsl_priv *priv = (struct mdio_fsl_priv *)bus->priv; struct tgec_mdio_controller __iomem *regs = priv->mdio_base; + unsigned long flags; uint16_t dev_addr; uint32_t mdio_stat; uint32_t mdio_ctl; - uint16_t value; int ret; bool endian = priv->is_little_endian; @@ -221,12 +222,18 @@ static int xgmac_mdio_read(struct mii_bus *bus, int phy_id, int regnum) return ret; } + if (priv->has_a009885) + /* Once the operation completes, i.e. MDIO_STAT_BSY clears, we + * must read back the data register within 16 MDC cycles. + */ + local_irq_save(flags); + /* Initiate the read */ xgmac_write32(mdio_ctl | MDIO_CTL_READ, ®s->mdio_ctl, endian); ret = xgmac_wait_until_done(&bus->dev, regs, endian); if (ret) - return ret; + goto irq_restore; /* Return all Fs if nothing was there */ if ((xgmac_read32(®s->mdio_stat, endian) & MDIO_STAT_RD_ER) && @@ -234,13 +241,17 @@ static int xgmac_mdio_read(struct mii_bus *bus, int phy_id, int regnum) dev_dbg(&bus->dev, "Error while reading PHY%d reg at %d.%hhu\n", phy_id, dev_addr, regnum); - return 0xffff; + ret = 0xffff; + } else { + ret = xgmac_read32(®s->mdio_data, endian) & 0xffff; + dev_dbg(&bus->dev, "read %04x\n", ret); } - value = xgmac_read32(®s->mdio_data, endian) & 0xffff; - dev_dbg(&bus->dev, "read %04x\n", value); +irq_restore: + if (priv->has_a009885) + local_irq_restore(flags); - return value; + return ret; } static int xgmac_mdio_probe(struct platform_device *pdev) @@ -287,6 +298,8 @@ static int xgmac_mdio_probe(struct platform_device *pdev) priv->is_little_endian = device_property_read_bool(&pdev->dev, "little-endian"); + priv->has_a009885 = device_property_read_bool(&pdev->dev, + "fsl,erratum-a009885"); priv->has_a011043 = device_property_read_bool(&pdev->dev, "fsl,erratum-a011043"); @@ -318,9 +331,10 @@ err_ioremap: static int xgmac_mdio_remove(struct platform_device *pdev) { struct mii_bus *bus = platform_get_drvdata(pdev); + struct mdio_fsl_priv *priv = bus->priv; mdiobus_unregister(bus); - iounmap(bus->priv); + iounmap(priv->mdio_base); mdiobus_free(bus); return 0; diff --git a/drivers/net/ethernet/i825xx/sni_82596.c b/drivers/net/ethernet/i825xx/sni_82596.c index 27937c5d7956..daec9ce04531 100644 --- a/drivers/net/ethernet/i825xx/sni_82596.c +++ b/drivers/net/ethernet/i825xx/sni_82596.c @@ -117,9 +117,10 @@ static int sni_82596_probe(struct platform_device *dev) netdevice->dev_addr[5] = readb(eth_addr + 0x06); iounmap(eth_addr); - if (!netdevice->irq) { + if (netdevice->irq < 0) { printk(KERN_ERR "%s: IRQ not found for i82596 at 0x%lx\n", __FILE__, netdevice->base_addr); + retval = netdevice->irq; goto probe_failed; } diff --git a/drivers/net/ethernet/marvell/prestera/prestera.h b/drivers/net/ethernet/marvell/prestera/prestera.h index a0a5a8e6bd8c..2fd9ef2fe5d6 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera.h +++ b/drivers/net/ethernet/marvell/prestera/prestera.h @@ -283,7 +283,6 @@ struct prestera_router { struct list_head rif_entry_list; struct notifier_block inetaddr_nb; struct notifier_block inetaddr_valid_nb; - bool aborted; }; struct prestera_rxtx_params { diff --git a/drivers/net/ethernet/marvell/prestera/prestera_hw.c b/drivers/net/ethernet/marvell/prestera/prestera_hw.c index 51fc841b1e7a..e6bfadc874c5 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_hw.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_hw.c @@ -1831,8 +1831,8 @@ static int prestera_iface_to_msg(struct prestera_iface *iface, int prestera_hw_rif_create(struct prestera_switch *sw, struct prestera_iface *iif, u8 *mac, u16 *rif_id) { - struct prestera_msg_rif_req req; struct prestera_msg_rif_resp resp; + struct prestera_msg_rif_req req; int err; memcpy(req.mac, mac, ETH_ALEN); @@ -1868,9 +1868,9 @@ int prestera_hw_rif_delete(struct prestera_switch *sw, u16 rif_id, int prestera_hw_vr_create(struct prestera_switch *sw, u16 *vr_id) { - int err; struct prestera_msg_vr_resp resp; struct prestera_msg_vr_req req; + int err; err = prestera_cmd_ret(sw, PRESTERA_CMD_TYPE_ROUTER_VR_CREATE, &req.cmd, sizeof(req), &resp.ret, sizeof(resp)); diff --git a/drivers/net/ethernet/marvell/prestera/prestera_main.c b/drivers/net/ethernet/marvell/prestera/prestera_main.c index 08fdd1e50388..cad93f747d0c 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_main.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_main.c @@ -982,6 +982,7 @@ static void prestera_switch_fini(struct prestera_switch *sw) prestera_event_handlers_unregister(sw); prestera_rxtx_switch_fini(sw); prestera_switchdev_fini(sw); + prestera_router_fini(sw); prestera_netdev_event_handler_unregister(sw); prestera_hw_switch_fini(sw); } diff --git a/drivers/net/ethernet/marvell/prestera/prestera_router.c b/drivers/net/ethernet/marvell/prestera/prestera_router.c index 8a3b7b664358..6ef4d32b8fdd 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_router.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_router.c @@ -25,10 +25,10 @@ static int __prestera_inetaddr_port_event(struct net_device *port_dev, struct netlink_ext_ack *extack) { struct prestera_port *port = netdev_priv(port_dev); - int err; - struct prestera_rif_entry *re; struct prestera_rif_entry_key re_key = {}; + struct prestera_rif_entry *re; u32 kern_tb_id; + int err; err = prestera_is_valid_mac_addr(port, port_dev->dev_addr); if (err) { @@ -45,21 +45,21 @@ static int __prestera_inetaddr_port_event(struct net_device *port_dev, switch (event) { case NETDEV_UP: if (re) { - NL_SET_ERR_MSG_MOD(extack, "rif_entry already exist"); + NL_SET_ERR_MSG_MOD(extack, "RIF already exist"); return -EEXIST; } re = prestera_rif_entry_create(port->sw, &re_key, prestera_fix_tb_id(kern_tb_id), port_dev->dev_addr); if (!re) { - NL_SET_ERR_MSG_MOD(extack, "Can't create rif_entry"); + NL_SET_ERR_MSG_MOD(extack, "Can't create RIF"); return -EINVAL; } dev_hold(port_dev); break; case NETDEV_DOWN: if (!re) { - NL_SET_ERR_MSG_MOD(extack, "rif_entry not exist"); + NL_SET_ERR_MSG_MOD(extack, "Can't find RIF"); return -EEXIST; } prestera_rif_entry_destroy(port->sw, re); @@ -75,11 +75,11 @@ static int __prestera_inetaddr_event(struct prestera_switch *sw, unsigned long event, struct netlink_ext_ack *extack) { - if (prestera_netdev_check(dev) && !netif_is_bridge_port(dev) && - !netif_is_lag_port(dev) && !netif_is_ovs_port(dev)) - return __prestera_inetaddr_port_event(dev, event, extack); + if (!prestera_netdev_check(dev) || netif_is_bridge_port(dev) || + netif_is_lag_port(dev) || netif_is_ovs_port(dev)) + return 0; - return 0; + return __prestera_inetaddr_port_event(dev, event, extack); } static int __prestera_inetaddr_cb(struct notifier_block *nb, @@ -126,6 +126,8 @@ static int __prestera_inetaddr_valid_cb(struct notifier_block *nb, goto out; if (ipv4_is_multicast(ivi->ivi_addr)) { + NL_SET_ERR_MSG_MOD(ivi->extack, + "Multicast addr on RIF is not supported"); err = -EINVAL; goto out; } @@ -166,7 +168,7 @@ int prestera_router_init(struct prestera_switch *sw) err_register_inetaddr_notifier: unregister_inetaddr_validator_notifier(&router->inetaddr_valid_nb); err_register_inetaddr_validator_notifier: - /* prestera_router_hw_fini */ + prestera_router_hw_fini(sw); err_router_lib_init: kfree(sw->router); return err; @@ -176,7 +178,7 @@ void prestera_router_fini(struct prestera_switch *sw) { unregister_inetaddr_notifier(&sw->router->inetaddr_nb); unregister_inetaddr_validator_notifier(&sw->router->inetaddr_valid_nb); - /* router_hw_fini */ + prestera_router_hw_fini(sw); kfree(sw->router); sw->router = NULL; } diff --git a/drivers/net/ethernet/marvell/prestera/prestera_router_hw.c b/drivers/net/ethernet/marvell/prestera/prestera_router_hw.c index 5866a4be50f5..e5592b69ad37 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_router_hw.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_router_hw.c @@ -29,6 +29,12 @@ int prestera_router_hw_init(struct prestera_switch *sw) return 0; } +void prestera_router_hw_fini(struct prestera_switch *sw) +{ + WARN_ON(!list_empty(&sw->router->vr_list)); + WARN_ON(!list_empty(&sw->router->rif_entry_list)); +} + static struct prestera_vr *__prestera_vr_find(struct prestera_switch *sw, u32 tb_id) { @@ -47,13 +53,8 @@ static struct prestera_vr *__prestera_vr_create(struct prestera_switch *sw, struct netlink_ext_ack *extack) { struct prestera_vr *vr; - u16 hw_vr_id; int err; - err = prestera_hw_vr_create(sw, &hw_vr_id); - if (err) - return ERR_PTR(-ENOMEM); - vr = kzalloc(sizeof(*vr), GFP_KERNEL); if (!vr) { err = -ENOMEM; @@ -61,23 +62,26 @@ static struct prestera_vr *__prestera_vr_create(struct prestera_switch *sw, } vr->tb_id = tb_id; - vr->hw_vr_id = hw_vr_id; + + err = prestera_hw_vr_create(sw, &vr->hw_vr_id); + if (err) + goto err_hw_create; list_add(&vr->router_node, &sw->router->vr_list); return vr; -err_alloc_vr: - prestera_hw_vr_delete(sw, hw_vr_id); +err_hw_create: kfree(vr); +err_alloc_vr: return ERR_PTR(err); } static void __prestera_vr_destroy(struct prestera_switch *sw, struct prestera_vr *vr) { - prestera_hw_vr_delete(sw, vr->hw_vr_id); list_del(&vr->router_node); + prestera_hw_vr_delete(sw, vr->hw_vr_id); kfree(vr); } @@ -87,17 +91,22 @@ static struct prestera_vr *prestera_vr_get(struct prestera_switch *sw, u32 tb_id struct prestera_vr *vr; vr = __prestera_vr_find(sw, tb_id); - if (!vr) + if (vr) { + refcount_inc(&vr->refcount); + } else { vr = __prestera_vr_create(sw, tb_id, extack); - if (IS_ERR(vr)) - return ERR_CAST(vr); + if (IS_ERR(vr)) + return ERR_CAST(vr); + + refcount_set(&vr->refcount, 1); + } return vr; } static void prestera_vr_put(struct prestera_switch *sw, struct prestera_vr *vr) { - if (!vr->ref_cnt) + if (refcount_dec_and_test(&vr->refcount)) __prestera_vr_destroy(sw, vr); } @@ -120,7 +129,7 @@ __prestera_rif_entry_key_copy(const struct prestera_rif_entry_key *in, out->iface.vlan_id = in->iface.vlan_id; break; default: - pr_err("Unsupported iface type"); + WARN(1, "Unsupported iface type"); return -EINVAL; } @@ -158,7 +167,6 @@ void prestera_rif_entry_destroy(struct prestera_switch *sw, iface.vr_id = e->vr->hw_vr_id; prestera_hw_rif_delete(sw, e->hw_id, &iface); - e->vr->ref_cnt--; prestera_vr_put(sw, e->vr); kfree(e); } @@ -183,7 +191,6 @@ prestera_rif_entry_create(struct prestera_switch *sw, if (IS_ERR(e->vr)) goto err_vr_get; - e->vr->ref_cnt++; memcpy(&e->addr, addr, sizeof(e->addr)); /* HW */ @@ -198,7 +205,6 @@ prestera_rif_entry_create(struct prestera_switch *sw, return e; err_hw_create: - e->vr->ref_cnt--; prestera_vr_put(sw, e->vr); err_vr_get: err_key_copy: diff --git a/drivers/net/ethernet/marvell/prestera/prestera_router_hw.h b/drivers/net/ethernet/marvell/prestera/prestera_router_hw.h index fed53595f7bb..b6b028551868 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_router_hw.h +++ b/drivers/net/ethernet/marvell/prestera/prestera_router_hw.h @@ -6,7 +6,7 @@ struct prestera_vr { struct list_head router_node; - unsigned int ref_cnt; + refcount_t refcount; u32 tb_id; /* key (kernel fib table id) */ u16 hw_vr_id; /* virtual router ID */ u8 __pad[2]; @@ -32,5 +32,6 @@ prestera_rif_entry_create(struct prestera_switch *sw, struct prestera_rif_entry_key *k, u32 tb_id, const unsigned char *addr); int prestera_router_hw_init(struct prestera_switch *sw); +void prestera_router_hw_fini(struct prestera_switch *sw); #endif /* _PRESTERA_ROUTER_HW_H_ */ diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index b67b4323cff0..f02d07ec5ccb 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -267,7 +267,7 @@ static void mtk_mac_config(struct phylink_config *config, unsigned int mode, phylink_config); struct mtk_eth *eth = mac->hw; u32 mcr_cur, mcr_new, sid, i; - int val, ge_mode, err; + int val, ge_mode, err = 0; /* MT76x8 has no hardware settings between for the MAC */ if (!MTK_HAS_CAPS(eth->soc->caps, MTK_SOC_MT7628) && diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c index 33815246fead..378fc8e3bd97 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* Copyright (c) 2018 Mellanox Technologies. */ +#include #include #include #include @@ -235,7 +236,7 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv, int err; /* add the IP fields */ - attr.fl.fl4.flowi4_tos = tun_key->tos; + attr.fl.fl4.flowi4_tos = tun_key->tos & ~INET_ECN_MASK; attr.fl.fl4.daddr = tun_key->u.ipv4.dst; attr.fl.fl4.saddr = tun_key->u.ipv4.src; attr.ttl = tun_key->ttl; @@ -350,7 +351,7 @@ int mlx5e_tc_tun_update_header_ipv4(struct mlx5e_priv *priv, int err; /* add the IP fields */ - attr.fl.fl4.flowi4_tos = tun_key->tos; + attr.fl.fl4.flowi4_tos = tun_key->tos & ~INET_ECN_MASK; attr.fl.fl4.daddr = tun_key->u.ipv4.dst; attr.fl.fl4.saddr = tun_key->u.ipv4.src; attr.ttl = tun_key->ttl; diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index b1311b656e17..455293aa6343 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -771,7 +771,10 @@ void ocelot_phylink_mac_link_up(struct ocelot *ocelot, int port, ocelot_write_rix(ocelot, 0, ANA_POL_FLOWC, port); - ocelot_fields_write(ocelot, port, SYS_PAUSE_CFG_PAUSE_ENA, tx_pause); + /* Don't attempt to send PAUSE frames on the NPI port, it's broken */ + if (port != ocelot->npi) + ocelot_fields_write(ocelot, port, SYS_PAUSE_CFG_PAUSE_ENA, + tx_pause); /* Undo the effects of ocelot_phylink_mac_link_down: * enable MAC module diff --git a/drivers/net/ethernet/mscc/ocelot_flower.c b/drivers/net/ethernet/mscc/ocelot_flower.c index beb9379424c0..949858891973 100644 --- a/drivers/net/ethernet/mscc/ocelot_flower.c +++ b/drivers/net/ethernet/mscc/ocelot_flower.c @@ -559,13 +559,6 @@ ocelot_flower_parse_key(struct ocelot *ocelot, int port, bool ingress, return -EOPNOTSUPP; } - if (filter->block_id == VCAP_IS1 && - !is_zero_ether_addr(match.mask->dst)) { - NL_SET_ERR_MSG_MOD(extack, - "Key type S1_NORMAL cannot match on destination MAC"); - return -EOPNOTSUPP; - } - /* The hw support mac matches only for MAC_ETYPE key, * therefore if other matches(port, tcp flags, etc) are added * then just bail out @@ -580,6 +573,14 @@ ocelot_flower_parse_key(struct ocelot *ocelot, int port, bool ingress, return -EOPNOTSUPP; flow_rule_match_eth_addrs(rule, &match); + + if (filter->block_id == VCAP_IS1 && + !is_zero_ether_addr(match.mask->dst)) { + NL_SET_ERR_MSG_MOD(extack, + "Key type S1_NORMAL cannot match on destination MAC"); + return -EOPNOTSUPP; + } + filter->key_type = OCELOT_VCAP_KEY_ETYPE; ether_addr_copy(filter->key.etype.dmac.value, match.key->dst); @@ -805,13 +806,34 @@ int ocelot_cls_flower_replace(struct ocelot *ocelot, int port, struct netlink_ext_ack *extack = f->common.extack; struct ocelot_vcap_filter *filter; int chain = f->common.chain_index; - int ret; + int block_id, ret; if (chain && !ocelot_find_vcap_filter_that_points_at(ocelot, chain)) { NL_SET_ERR_MSG_MOD(extack, "No default GOTO action points to this chain"); return -EOPNOTSUPP; } + block_id = ocelot_chain_to_block(chain, ingress); + if (block_id < 0) { + NL_SET_ERR_MSG_MOD(extack, "Cannot offload to this chain"); + return -EOPNOTSUPP; + } + + filter = ocelot_vcap_block_find_filter_by_id(&ocelot->block[block_id], + f->cookie, true); + if (filter) { + /* Filter already exists on other ports */ + if (!ingress) { + NL_SET_ERR_MSG_MOD(extack, "VCAP ES0 does not support shared filters"); + return -EOPNOTSUPP; + } + + filter->ingress_port_mask |= BIT(port); + + return ocelot_vcap_filter_replace(ocelot, filter); + } + + /* Filter didn't exist, create it now */ filter = ocelot_vcap_filter_create(ocelot, port, ingress, f); if (!filter) return -ENOMEM; @@ -874,6 +896,12 @@ int ocelot_cls_flower_destroy(struct ocelot *ocelot, int port, if (filter->type == OCELOT_VCAP_FILTER_DUMMY) return ocelot_vcap_dummy_filter_del(ocelot, filter); + if (ingress) { + filter->ingress_port_mask &= ~BIT(port); + if (filter->ingress_port_mask) + return ocelot_vcap_filter_replace(ocelot, filter); + } + return ocelot_vcap_filter_del(ocelot, filter); } EXPORT_SYMBOL_GPL(ocelot_cls_flower_destroy); diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c index 8115c3db252e..e271b6225b72 100644 --- a/drivers/net/ethernet/mscc/ocelot_net.c +++ b/drivers/net/ethernet/mscc/ocelot_net.c @@ -1187,7 +1187,7 @@ static int ocelot_netdevice_bridge_join(struct net_device *dev, ocelot_port_bridge_join(ocelot, port, bridge); err = switchdev_bridge_port_offload(brport_dev, dev, priv, - &ocelot_netdevice_nb, + &ocelot_switchdev_nb, &ocelot_switchdev_blocking_nb, false, extack); if (err) @@ -1201,7 +1201,7 @@ static int ocelot_netdevice_bridge_join(struct net_device *dev, err_switchdev_sync: switchdev_bridge_port_unoffload(brport_dev, priv, - &ocelot_netdevice_nb, + &ocelot_switchdev_nb, &ocelot_switchdev_blocking_nb); err_switchdev_offload: ocelot_port_bridge_leave(ocelot, port, bridge); @@ -1214,7 +1214,7 @@ static void ocelot_netdevice_pre_bridge_leave(struct net_device *dev, struct ocelot_port_private *priv = netdev_priv(dev); switchdev_bridge_port_unoffload(brport_dev, priv, - &ocelot_netdevice_nb, + &ocelot_switchdev_nb, &ocelot_switchdev_blocking_nb); } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c index adfeb8d3293d..62a69a91ab22 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -48,16 +49,75 @@ #define DWMAC_RX_VARDELAY(d) ((d) << DWMAC_RX_VARDELAY_SHIFT) #define DWMAC_RXN_VARDELAY(d) ((d) << DWMAC_RXN_VARDELAY_SHIFT) +struct oxnas_dwmac; + +struct oxnas_dwmac_data { + int (*setup)(struct oxnas_dwmac *dwmac); +}; + struct oxnas_dwmac { struct device *dev; struct clk *clk; struct regmap *regmap; + const struct oxnas_dwmac_data *data; }; +static int oxnas_dwmac_setup_ox810se(struct oxnas_dwmac *dwmac) +{ + unsigned int value; + int ret; + + ret = regmap_read(dwmac->regmap, OXNAS_DWMAC_CTRL_REGOFFSET, &value); + if (ret < 0) + return ret; + + /* Enable GMII_GTXCLK to follow GMII_REFCLK, required for gigabit PHY */ + value |= BIT(DWMAC_CKEN_GTX) | + /* Use simple mux for 25/125 Mhz clock switching */ + BIT(DWMAC_SIMPLE_MUX); + + regmap_write(dwmac->regmap, OXNAS_DWMAC_CTRL_REGOFFSET, value); + + return 0; +} + +static int oxnas_dwmac_setup_ox820(struct oxnas_dwmac *dwmac) +{ + unsigned int value; + int ret; + + ret = regmap_read(dwmac->regmap, OXNAS_DWMAC_CTRL_REGOFFSET, &value); + if (ret < 0) + return ret; + + /* Enable GMII_GTXCLK to follow GMII_REFCLK, required for gigabit PHY */ + value |= BIT(DWMAC_CKEN_GTX) | + /* Use simple mux for 25/125 Mhz clock switching */ + BIT(DWMAC_SIMPLE_MUX) | + /* set auto switch tx clock source */ + BIT(DWMAC_AUTO_TX_SOURCE) | + /* enable tx & rx vardelay */ + BIT(DWMAC_CKEN_TX_OUT) | + BIT(DWMAC_CKEN_TXN_OUT) | + BIT(DWMAC_CKEN_TX_IN) | + BIT(DWMAC_CKEN_RX_OUT) | + BIT(DWMAC_CKEN_RXN_OUT) | + BIT(DWMAC_CKEN_RX_IN); + regmap_write(dwmac->regmap, OXNAS_DWMAC_CTRL_REGOFFSET, value); + + /* set tx & rx vardelay */ + value = DWMAC_TX_VARDELAY(4) | + DWMAC_TXN_VARDELAY(2) | + DWMAC_RX_VARDELAY(10) | + DWMAC_RXN_VARDELAY(8); + regmap_write(dwmac->regmap, OXNAS_DWMAC_DELAY_REGOFFSET, value); + + return 0; +} + static int oxnas_dwmac_init(struct platform_device *pdev, void *priv) { struct oxnas_dwmac *dwmac = priv; - unsigned int value; int ret; /* Reset HW here before changing the glue configuration */ @@ -69,35 +129,11 @@ static int oxnas_dwmac_init(struct platform_device *pdev, void *priv) if (ret) return ret; - ret = regmap_read(dwmac->regmap, OXNAS_DWMAC_CTRL_REGOFFSET, &value); - if (ret < 0) { + ret = dwmac->data->setup(dwmac); + if (ret) clk_disable_unprepare(dwmac->clk); - return ret; - } - /* Enable GMII_GTXCLK to follow GMII_REFCLK, required for gigabit PHY */ - value |= BIT(DWMAC_CKEN_GTX) | - /* Use simple mux for 25/125 Mhz clock switching */ - BIT(DWMAC_SIMPLE_MUX) | - /* set auto switch tx clock source */ - BIT(DWMAC_AUTO_TX_SOURCE) | - /* enable tx & rx vardelay */ - BIT(DWMAC_CKEN_TX_OUT) | - BIT(DWMAC_CKEN_TXN_OUT) | - BIT(DWMAC_CKEN_TX_IN) | - BIT(DWMAC_CKEN_RX_OUT) | - BIT(DWMAC_CKEN_RXN_OUT) | - BIT(DWMAC_CKEN_RX_IN); - regmap_write(dwmac->regmap, OXNAS_DWMAC_CTRL_REGOFFSET, value); - - /* set tx & rx vardelay */ - value = DWMAC_TX_VARDELAY(4) | - DWMAC_TXN_VARDELAY(2) | - DWMAC_RX_VARDELAY(10) | - DWMAC_RXN_VARDELAY(8); - regmap_write(dwmac->regmap, OXNAS_DWMAC_DELAY_REGOFFSET, value); - - return 0; + return ret; } static void oxnas_dwmac_exit(struct platform_device *pdev, void *priv) @@ -128,6 +164,12 @@ static int oxnas_dwmac_probe(struct platform_device *pdev) goto err_remove_config_dt; } + dwmac->data = (const struct oxnas_dwmac_data *)of_device_get_match_data(&pdev->dev); + if (!dwmac->data) { + ret = -EINVAL; + goto err_remove_config_dt; + } + dwmac->dev = &pdev->dev; plat_dat->bsp_priv = dwmac; plat_dat->init = oxnas_dwmac_init; @@ -166,8 +208,23 @@ err_remove_config_dt: return ret; } +static const struct oxnas_dwmac_data ox810se_dwmac_data = { + .setup = oxnas_dwmac_setup_ox810se, +}; + +static const struct oxnas_dwmac_data ox820_dwmac_data = { + .setup = oxnas_dwmac_setup_ox820, +}; + static const struct of_device_id oxnas_dwmac_match[] = { - { .compatible = "oxsemi,ox820-dwmac" }, + { + .compatible = "oxsemi,ox810se-dwmac", + .data = &ox810se_dwmac_data, + }, + { + .compatible = "oxsemi,ox820-dwmac", + .data = &ox820_dwmac_data, + }, { } }; MODULE_DEVICE_TABLE(of, oxnas_dwmac_match); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 63ff2dad8c85..6708ca2aa4f7 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -7159,7 +7159,8 @@ int stmmac_dvr_probe(struct device *device, pm_runtime_get_noresume(device); pm_runtime_set_active(device); - pm_runtime_enable(device); + if (!pm_runtime_enabled(device)) + pm_runtime_enable(device); if (priv->hw->pcs != STMMAC_PCS_TBI && priv->hw->pcs != STMMAC_PCS_RTBI) { diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 33142d505fc8..03575c017500 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -349,7 +349,7 @@ static void cpsw_rx_handler(void *token, int len, int status) struct cpsw_common *cpsw = ndev_to_cpsw(xmeta->ndev); int pkt_size = cpsw->rx_packet_max; int ret = 0, port, ch = xmeta->ch; - int headroom = CPSW_HEADROOM; + int headroom = CPSW_HEADROOM_NA; struct net_device *ndev = xmeta->ndev; struct cpsw_priv *priv; struct page_pool *pool; @@ -392,7 +392,7 @@ static void cpsw_rx_handler(void *token, int len, int status) } if (priv->xdp_prog) { - int headroom = CPSW_HEADROOM, size = len; + int size = len; xdp_init_buff(&xdp, PAGE_SIZE, &priv->xdp_rxq[ch]); if (status & CPDMA_RX_VLAN_ENCAP) { @@ -442,7 +442,7 @@ requeue: xmeta->ndev = ndev; xmeta->ch = ch; - dma = page_pool_get_dma_addr(new_page) + CPSW_HEADROOM; + dma = page_pool_get_dma_addr(new_page) + CPSW_HEADROOM_NA; ret = cpdma_chan_submit_mapped(cpsw->rxv[ch].ch, new_page, dma, pkt_size, 0); if (ret < 0) { diff --git a/drivers/net/ethernet/ti/cpsw_new.c b/drivers/net/ethernet/ti/cpsw_new.c index 279e261e4720..bd4b1528cf99 100644 --- a/drivers/net/ethernet/ti/cpsw_new.c +++ b/drivers/net/ethernet/ti/cpsw_new.c @@ -283,7 +283,7 @@ static void cpsw_rx_handler(void *token, int len, int status) { struct page *new_page, *page = token; void *pa = page_address(page); - int headroom = CPSW_HEADROOM; + int headroom = CPSW_HEADROOM_NA; struct cpsw_meta_xdp *xmeta; struct cpsw_common *cpsw; struct net_device *ndev; @@ -336,7 +336,7 @@ static void cpsw_rx_handler(void *token, int len, int status) } if (priv->xdp_prog) { - int headroom = CPSW_HEADROOM, size = len; + int size = len; xdp_init_buff(&xdp, PAGE_SIZE, &priv->xdp_rxq[ch]); if (status & CPDMA_RX_VLAN_ENCAP) { @@ -386,7 +386,7 @@ requeue: xmeta->ndev = ndev; xmeta->ch = ch; - dma = page_pool_get_dma_addr(new_page) + CPSW_HEADROOM; + dma = page_pool_get_dma_addr(new_page) + CPSW_HEADROOM_NA; ret = cpdma_chan_submit_mapped(cpsw->rxv[ch].ch, new_page, dma, pkt_size, 0); if (ret < 0) { diff --git a/drivers/net/ethernet/ti/cpsw_priv.c b/drivers/net/ethernet/ti/cpsw_priv.c index 3537502e5e8b..ba220593e6db 100644 --- a/drivers/net/ethernet/ti/cpsw_priv.c +++ b/drivers/net/ethernet/ti/cpsw_priv.c @@ -1122,7 +1122,7 @@ int cpsw_fill_rx_channels(struct cpsw_priv *priv) xmeta->ndev = priv->ndev; xmeta->ch = ch; - dma = page_pool_get_dma_addr(page) + CPSW_HEADROOM; + dma = page_pool_get_dma_addr(page) + CPSW_HEADROOM_NA; ret = cpdma_chan_idle_submit_mapped(cpsw->rxv[ch].ch, page, dma, cpsw->rx_packet_max, diff --git a/drivers/net/ethernet/vertexcom/Kconfig b/drivers/net/ethernet/vertexcom/Kconfig index 6e2cf062ddba..4184a635fe01 100644 --- a/drivers/net/ethernet/vertexcom/Kconfig +++ b/drivers/net/ethernet/vertexcom/Kconfig @@ -5,7 +5,7 @@ config NET_VENDOR_VERTEXCOM bool "Vertexcom devices" - default n + default y help If you have a network (Ethernet) card belonging to this class, say Y. diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 23ac353b35fe..377c94ec2486 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -41,8 +41,9 @@ #include "xilinx_axienet.h" /* Descriptors defines for Tx and Rx DMA */ -#define TX_BD_NUM_DEFAULT 64 +#define TX_BD_NUM_DEFAULT 128 #define RX_BD_NUM_DEFAULT 1024 +#define TX_BD_NUM_MIN (MAX_SKB_FRAGS + 1) #define TX_BD_NUM_MAX 4096 #define RX_BD_NUM_MAX 4096 @@ -496,7 +497,8 @@ static void axienet_setoptions(struct net_device *ndev, u32 options) static int __axienet_device_reset(struct axienet_local *lp) { - u32 timeout; + u32 value; + int ret; /* Reset Axi DMA. This would reset Axi Ethernet core as well. The reset * process of Axi DMA takes a while to complete as all pending @@ -506,15 +508,23 @@ static int __axienet_device_reset(struct axienet_local *lp) * they both reset the entire DMA core, so only one needs to be used. */ axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, XAXIDMA_CR_RESET_MASK); - timeout = DELAY_OF_ONE_MILLISEC; - while (axienet_dma_in32(lp, XAXIDMA_TX_CR_OFFSET) & - XAXIDMA_CR_RESET_MASK) { - udelay(1); - if (--timeout == 0) { - netdev_err(lp->ndev, "%s: DMA reset timeout!\n", - __func__); - return -ETIMEDOUT; - } + ret = read_poll_timeout(axienet_dma_in32, value, + !(value & XAXIDMA_CR_RESET_MASK), + DELAY_OF_ONE_MILLISEC, 50000, false, lp, + XAXIDMA_TX_CR_OFFSET); + if (ret) { + dev_err(lp->dev, "%s: DMA reset timeout!\n", __func__); + return ret; + } + + /* Wait for PhyRstCmplt bit to be set, indicating the PHY reset has finished */ + ret = read_poll_timeout(axienet_ior, value, + value & XAE_INT_PHYRSTCMPLT_MASK, + DELAY_OF_ONE_MILLISEC, 50000, false, lp, + XAE_IS_OFFSET); + if (ret) { + dev_err(lp->dev, "%s: timeout waiting for PhyRstCmplt\n", __func__); + return ret; } return 0; @@ -623,6 +633,8 @@ static int axienet_free_tx_chain(struct net_device *ndev, u32 first_bd, if (nr_bds == -1 && !(status & XAXIDMA_BD_STS_COMPLETE_MASK)) break; + /* Ensure we see complete descriptor update */ + dma_rmb(); phys = desc_get_phys_addr(lp, cur_p); dma_unmap_single(ndev->dev.parent, phys, (cur_p->cntrl & XAXIDMA_BD_CTRL_LENGTH_MASK), @@ -631,13 +643,15 @@ static int axienet_free_tx_chain(struct net_device *ndev, u32 first_bd, if (cur_p->skb && (status & XAXIDMA_BD_STS_COMPLETE_MASK)) dev_consume_skb_irq(cur_p->skb); - cur_p->cntrl = 0; cur_p->app0 = 0; cur_p->app1 = 0; cur_p->app2 = 0; cur_p->app4 = 0; - cur_p->status = 0; cur_p->skb = NULL; + /* ensure our transmit path and device don't prematurely see status cleared */ + wmb(); + cur_p->cntrl = 0; + cur_p->status = 0; if (sizep) *sizep += status & XAXIDMA_BD_STS_ACTUAL_LEN_MASK; @@ -646,6 +660,32 @@ static int axienet_free_tx_chain(struct net_device *ndev, u32 first_bd, return i; } +/** + * axienet_check_tx_bd_space - Checks if a BD/group of BDs are currently busy + * @lp: Pointer to the axienet_local structure + * @num_frag: The number of BDs to check for + * + * Return: 0, on success + * NETDEV_TX_BUSY, if any of the descriptors are not free + * + * This function is invoked before BDs are allocated and transmission starts. + * This function returns 0 if a BD or group of BDs can be allocated for + * transmission. If the BD or any of the BDs are not free the function + * returns a busy status. This is invoked from axienet_start_xmit. + */ +static inline int axienet_check_tx_bd_space(struct axienet_local *lp, + int num_frag) +{ + struct axidma_bd *cur_p; + + /* Ensure we see all descriptor updates from device or TX IRQ path */ + rmb(); + cur_p = &lp->tx_bd_v[(lp->tx_bd_tail + num_frag) % lp->tx_bd_num]; + if (cur_p->cntrl) + return NETDEV_TX_BUSY; + return 0; +} + /** * axienet_start_xmit_done - Invoked once a transmit is completed by the * Axi DMA Tx channel. @@ -675,30 +715,8 @@ static void axienet_start_xmit_done(struct net_device *ndev) /* Matches barrier in axienet_start_xmit */ smp_mb(); - netif_wake_queue(ndev); -} - -/** - * axienet_check_tx_bd_space - Checks if a BD/group of BDs are currently busy - * @lp: Pointer to the axienet_local structure - * @num_frag: The number of BDs to check for - * - * Return: 0, on success - * NETDEV_TX_BUSY, if any of the descriptors are not free - * - * This function is invoked before BDs are allocated and transmission starts. - * This function returns 0 if a BD or group of BDs can be allocated for - * transmission. If the BD or any of the BDs are not free the function - * returns a busy status. This is invoked from axienet_start_xmit. - */ -static inline int axienet_check_tx_bd_space(struct axienet_local *lp, - int num_frag) -{ - struct axidma_bd *cur_p; - cur_p = &lp->tx_bd_v[(lp->tx_bd_tail + num_frag) % lp->tx_bd_num]; - if (cur_p->status & XAXIDMA_BD_STS_ALL_MASK) - return NETDEV_TX_BUSY; - return 0; + if (!axienet_check_tx_bd_space(lp, MAX_SKB_FRAGS + 1)) + netif_wake_queue(ndev); } /** @@ -730,20 +748,15 @@ axienet_start_xmit(struct sk_buff *skb, struct net_device *ndev) num_frag = skb_shinfo(skb)->nr_frags; cur_p = &lp->tx_bd_v[lp->tx_bd_tail]; - if (axienet_check_tx_bd_space(lp, num_frag)) { - if (netif_queue_stopped(ndev)) - return NETDEV_TX_BUSY; - + if (axienet_check_tx_bd_space(lp, num_frag + 1)) { + /* Should not happen as last start_xmit call should have + * checked for sufficient space and queue should only be + * woken when sufficient space is available. + */ netif_stop_queue(ndev); - - /* Matches barrier in axienet_start_xmit_done */ - smp_mb(); - - /* Space might have just been freed - check again */ - if (axienet_check_tx_bd_space(lp, num_frag)) - return NETDEV_TX_BUSY; - - netif_wake_queue(ndev); + if (net_ratelimit()) + netdev_warn(ndev, "TX ring unexpectedly full\n"); + return NETDEV_TX_BUSY; } if (skb->ip_summed == CHECKSUM_PARTIAL) { @@ -804,6 +817,18 @@ axienet_start_xmit(struct sk_buff *skb, struct net_device *ndev) if (++lp->tx_bd_tail >= lp->tx_bd_num) lp->tx_bd_tail = 0; + /* Stop queue if next transmit may not have space */ + if (axienet_check_tx_bd_space(lp, MAX_SKB_FRAGS + 1)) { + netif_stop_queue(ndev); + + /* Matches barrier in axienet_start_xmit_done */ + smp_mb(); + + /* Space might have just been freed - check again */ + if (!axienet_check_tx_bd_space(lp, MAX_SKB_FRAGS + 1)) + netif_wake_queue(ndev); + } + return NETDEV_TX_OK; } @@ -834,6 +859,8 @@ static void axienet_recv(struct net_device *ndev) tail_p = lp->rx_bd_p + sizeof(*lp->rx_bd_v) * lp->rx_bd_ci; + /* Ensure we see complete descriptor update */ + dma_rmb(); phys = desc_get_phys_addr(lp, cur_p); dma_unmap_single(ndev->dev.parent, phys, lp->max_frm_size, DMA_FROM_DEVICE); @@ -1352,7 +1379,8 @@ axienet_ethtools_set_ringparam(struct net_device *ndev, if (ering->rx_pending > RX_BD_NUM_MAX || ering->rx_mini_pending || ering->rx_jumbo_pending || - ering->rx_pending > TX_BD_NUM_MAX) + ering->tx_pending < TX_BD_NUM_MIN || + ering->tx_pending > TX_BD_NUM_MAX) return -EINVAL; if (netif_running(ndev)) @@ -2027,6 +2055,11 @@ static int axienet_probe(struct platform_device *pdev) lp->coalesce_count_rx = XAXIDMA_DFT_RX_THRESHOLD; lp->coalesce_count_tx = XAXIDMA_DFT_TX_THRESHOLD; + /* Reset core now that clocks are enabled, prior to accessing MDIO */ + ret = __axienet_device_reset(lp); + if (ret) + goto cleanup_clk; + lp->phy_node = of_parse_phandle(pdev->dev.of_node, "phy-handle", 0); if (lp->phy_node) { ret = axienet_mdio_setup(lp); diff --git a/drivers/net/ipa/ipa_endpoint.c b/drivers/net/ipa/ipa_endpoint.c index 49d9a077d037..68291a3efd04 100644 --- a/drivers/net/ipa/ipa_endpoint.c +++ b/drivers/net/ipa/ipa_endpoint.c @@ -1080,27 +1080,38 @@ static void ipa_endpoint_replenish(struct ipa_endpoint *endpoint, bool add_one) { struct gsi *gsi; u32 backlog; + int delta; - if (!endpoint->replenish_enabled) { + if (!test_bit(IPA_REPLENISH_ENABLED, endpoint->replenish_flags)) { if (add_one) atomic_inc(&endpoint->replenish_saved); return; } + /* If already active, just update the backlog */ + if (test_and_set_bit(IPA_REPLENISH_ACTIVE, endpoint->replenish_flags)) { + if (add_one) + atomic_inc(&endpoint->replenish_backlog); + return; + } + while (atomic_dec_not_zero(&endpoint->replenish_backlog)) if (ipa_endpoint_replenish_one(endpoint)) goto try_again_later; + + clear_bit(IPA_REPLENISH_ACTIVE, endpoint->replenish_flags); + if (add_one) atomic_inc(&endpoint->replenish_backlog); return; try_again_later: - /* The last one didn't succeed, so fix the backlog */ - backlog = atomic_inc_return(&endpoint->replenish_backlog); + clear_bit(IPA_REPLENISH_ACTIVE, endpoint->replenish_flags); - if (add_one) - atomic_inc(&endpoint->replenish_backlog); + /* The last one didn't succeed, so fix the backlog */ + delta = add_one ? 2 : 1; + backlog = atomic_add_return(delta, &endpoint->replenish_backlog); /* Whenever a receive buffer transaction completes we'll try to * replenish again. It's unlikely, but if we fail to supply even @@ -1120,7 +1131,7 @@ static void ipa_endpoint_replenish_enable(struct ipa_endpoint *endpoint) u32 max_backlog; u32 saved; - endpoint->replenish_enabled = true; + set_bit(IPA_REPLENISH_ENABLED, endpoint->replenish_flags); while ((saved = atomic_xchg(&endpoint->replenish_saved, 0))) atomic_add(saved, &endpoint->replenish_backlog); @@ -1134,7 +1145,7 @@ static void ipa_endpoint_replenish_disable(struct ipa_endpoint *endpoint) { u32 backlog; - endpoint->replenish_enabled = false; + clear_bit(IPA_REPLENISH_ENABLED, endpoint->replenish_flags); while ((backlog = atomic_xchg(&endpoint->replenish_backlog, 0))) atomic_add(backlog, &endpoint->replenish_saved); } @@ -1691,7 +1702,8 @@ static void ipa_endpoint_setup_one(struct ipa_endpoint *endpoint) /* RX transactions require a single TRE, so the maximum * backlog is the same as the maximum outstanding TREs. */ - endpoint->replenish_enabled = false; + clear_bit(IPA_REPLENISH_ENABLED, endpoint->replenish_flags); + clear_bit(IPA_REPLENISH_ACTIVE, endpoint->replenish_flags); atomic_set(&endpoint->replenish_saved, gsi_channel_tre_max(gsi, endpoint->channel_id)); atomic_set(&endpoint->replenish_backlog, 0); diff --git a/drivers/net/ipa/ipa_endpoint.h b/drivers/net/ipa/ipa_endpoint.h index 0a859d10312d..0313cdc607de 100644 --- a/drivers/net/ipa/ipa_endpoint.h +++ b/drivers/net/ipa/ipa_endpoint.h @@ -40,6 +40,19 @@ enum ipa_endpoint_name { #define IPA_ENDPOINT_MAX 32 /* Max supported by driver */ +/** + * enum ipa_replenish_flag: RX buffer replenish flags + * + * @IPA_REPLENISH_ENABLED: Whether receive buffer replenishing is enabled + * @IPA_REPLENISH_ACTIVE: Whether replenishing is underway + * @IPA_REPLENISH_COUNT: Number of defined replenish flags + */ +enum ipa_replenish_flag { + IPA_REPLENISH_ENABLED, + IPA_REPLENISH_ACTIVE, + IPA_REPLENISH_COUNT, /* Number of flags (must be last) */ +}; + /** * struct ipa_endpoint - IPA endpoint information * @ipa: IPA pointer @@ -51,7 +64,7 @@ enum ipa_endpoint_name { * @trans_tre_max: Maximum number of TRE descriptors per transaction * @evt_ring_id: GSI event ring used by the endpoint * @netdev: Network device pointer, if endpoint uses one - * @replenish_enabled: Whether receive buffer replenishing is enabled + * @replenish_flags: Replenishing state flags * @replenish_ready: Number of replenish transactions without doorbell * @replenish_saved: Replenish requests held while disabled * @replenish_backlog: Number of buffers needed to fill hardware queue @@ -72,7 +85,7 @@ struct ipa_endpoint { struct net_device *netdev; /* Receive buffer replenishing for RX endpoints */ - bool replenish_enabled; + DECLARE_BITMAP(replenish_flags, IPA_REPLENISH_COUNT); u32 replenish_ready; atomic_t replenish_saved; atomic_t replenish_backlog; diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c index dae95d9a07e8..5b6c0d120e09 100644 --- a/drivers/net/phy/at803x.c +++ b/drivers/net/phy/at803x.c @@ -421,7 +421,7 @@ static int at803x_set_wol(struct phy_device *phydev, const u8 *mac; int ret, irq_enabled; unsigned int i; - const unsigned int offsets[] = { + static const unsigned int offsets[] = { AT803X_LOC_MAC_ADDR_32_47_OFFSET, AT803X_LOC_MAC_ADDR_16_31_OFFSET, AT803X_LOC_MAC_ADDR_0_15_OFFSET, diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index 739859c0dfb1..fa71fb7a66b5 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -189,6 +189,8 @@ #define MII_88E1510_GEN_CTRL_REG_1_MODE_RGMII_SGMII 0x4 #define MII_88E1510_GEN_CTRL_REG_1_RESET 0x8000 /* Soft reset */ +#define MII_88E1510_MSCR_2 0x15 + #define MII_VCT5_TX_RX_MDI0_COUPLING 0x10 #define MII_VCT5_TX_RX_MDI1_COUPLING 0x11 #define MII_VCT5_TX_RX_MDI2_COUPLING 0x12 @@ -1932,6 +1934,58 @@ static void marvell_get_stats(struct phy_device *phydev, data[i] = marvell_get_stat(phydev, i); } +static int m88e1510_loopback(struct phy_device *phydev, bool enable) +{ + int err; + + if (enable) { + u16 bmcr_ctl = 0, mscr2_ctl = 0; + + if (phydev->speed == SPEED_1000) + bmcr_ctl = BMCR_SPEED1000; + else if (phydev->speed == SPEED_100) + bmcr_ctl = BMCR_SPEED100; + + if (phydev->duplex == DUPLEX_FULL) + bmcr_ctl |= BMCR_FULLDPLX; + + err = phy_write(phydev, MII_BMCR, bmcr_ctl); + if (err < 0) + return err; + + if (phydev->speed == SPEED_1000) + mscr2_ctl = BMCR_SPEED1000; + else if (phydev->speed == SPEED_100) + mscr2_ctl = BMCR_SPEED100; + + err = phy_modify_paged(phydev, MII_MARVELL_MSCR_PAGE, + MII_88E1510_MSCR_2, BMCR_SPEED1000 | + BMCR_SPEED100, mscr2_ctl); + if (err < 0) + return err; + + /* Need soft reset to have speed configuration takes effect */ + err = genphy_soft_reset(phydev); + if (err < 0) + return err; + + /* FIXME: Based on trial and error test, it seem 1G need to have + * delay between soft reset and loopback enablement. + */ + if (phydev->speed == SPEED_1000) + msleep(1000); + + return phy_modify(phydev, MII_BMCR, BMCR_LOOPBACK, + BMCR_LOOPBACK); + } else { + err = phy_modify(phydev, MII_BMCR, BMCR_LOOPBACK, 0); + if (err < 0) + return err; + + return phy_config_aneg(phydev); + } +} + static int marvell_vct5_wait_complete(struct phy_device *phydev) { int i; @@ -3078,7 +3132,7 @@ static struct phy_driver marvell_drivers[] = { .get_sset_count = marvell_get_sset_count, .get_strings = marvell_get_strings, .get_stats = marvell_get_stats, - .set_loopback = genphy_loopback, + .set_loopback = m88e1510_loopback, .get_tunable = m88e1011_get_tunable, .set_tunable = m88e1011_set_tunable, .cable_test_start = marvell_vct7_cable_test_start, diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 4570cb9535b7..a7ebcdab415b 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -1726,8 +1726,8 @@ static struct phy_driver ksphy_driver[] = { .config_init = kszphy_config_init, .config_intr = kszphy_config_intr, .handle_interrupt = kszphy_handle_interrupt, - .suspend = genphy_suspend, - .resume = genphy_resume, + .suspend = kszphy_suspend, + .resume = kszphy_resume, }, { .phy_id = PHY_ID_KSZ8021, .phy_id_mask = 0x00ffffff, @@ -1741,8 +1741,8 @@ static struct phy_driver ksphy_driver[] = { .get_sset_count = kszphy_get_sset_count, .get_strings = kszphy_get_strings, .get_stats = kszphy_get_stats, - .suspend = genphy_suspend, - .resume = genphy_resume, + .suspend = kszphy_suspend, + .resume = kszphy_resume, }, { .phy_id = PHY_ID_KSZ8031, .phy_id_mask = 0x00ffffff, @@ -1756,8 +1756,8 @@ static struct phy_driver ksphy_driver[] = { .get_sset_count = kszphy_get_sset_count, .get_strings = kszphy_get_strings, .get_stats = kszphy_get_stats, - .suspend = genphy_suspend, - .resume = genphy_resume, + .suspend = kszphy_suspend, + .resume = kszphy_resume, }, { .phy_id = PHY_ID_KSZ8041, .phy_id_mask = MICREL_PHY_ID_MASK, @@ -1788,8 +1788,8 @@ static struct phy_driver ksphy_driver[] = { .get_sset_count = kszphy_get_sset_count, .get_strings = kszphy_get_strings, .get_stats = kszphy_get_stats, - .suspend = genphy_suspend, - .resume = genphy_resume, + .suspend = kszphy_suspend, + .resume = kszphy_resume, }, { .name = "Micrel KSZ8051", /* PHY_BASIC_FEATURES */ @@ -1802,8 +1802,8 @@ static struct phy_driver ksphy_driver[] = { .get_strings = kszphy_get_strings, .get_stats = kszphy_get_stats, .match_phy_device = ksz8051_match_phy_device, - .suspend = genphy_suspend, - .resume = genphy_resume, + .suspend = kszphy_suspend, + .resume = kszphy_resume, }, { .phy_id = PHY_ID_KSZ8001, .name = "Micrel KSZ8001 or KS8721", @@ -1817,8 +1817,8 @@ static struct phy_driver ksphy_driver[] = { .get_sset_count = kszphy_get_sset_count, .get_strings = kszphy_get_strings, .get_stats = kszphy_get_stats, - .suspend = genphy_suspend, - .resume = genphy_resume, + .suspend = kszphy_suspend, + .resume = kszphy_resume, }, { .phy_id = PHY_ID_KSZ8081, .name = "Micrel KSZ8081 or KSZ8091", @@ -1848,8 +1848,8 @@ static struct phy_driver ksphy_driver[] = { .config_init = ksz8061_config_init, .config_intr = kszphy_config_intr, .handle_interrupt = kszphy_handle_interrupt, - .suspend = genphy_suspend, - .resume = genphy_resume, + .suspend = kszphy_suspend, + .resume = kszphy_resume, }, { .phy_id = PHY_ID_KSZ9021, .phy_id_mask = 0x000ffffe, @@ -1864,8 +1864,8 @@ static struct phy_driver ksphy_driver[] = { .get_sset_count = kszphy_get_sset_count, .get_strings = kszphy_get_strings, .get_stats = kszphy_get_stats, - .suspend = genphy_suspend, - .resume = genphy_resume, + .suspend = kszphy_suspend, + .resume = kszphy_resume, .read_mmd = genphy_read_mmd_unsupported, .write_mmd = genphy_write_mmd_unsupported, }, { @@ -1883,7 +1883,7 @@ static struct phy_driver ksphy_driver[] = { .get_sset_count = kszphy_get_sset_count, .get_strings = kszphy_get_strings, .get_stats = kszphy_get_stats, - .suspend = genphy_suspend, + .suspend = kszphy_suspend, .resume = kszphy_resume, }, { .phy_id = PHY_ID_LAN8814, @@ -1928,7 +1928,7 @@ static struct phy_driver ksphy_driver[] = { .get_sset_count = kszphy_get_sset_count, .get_strings = kszphy_get_strings, .get_stats = kszphy_get_stats, - .suspend = genphy_suspend, + .suspend = kszphy_suspend, .resume = kszphy_resume, }, { .phy_id = PHY_ID_KSZ8873MLL, diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index ab77a9f439ef..4720b24ca51b 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -1641,17 +1641,20 @@ static int sfp_sm_probe_for_phy(struct sfp *sfp) static int sfp_module_parse_power(struct sfp *sfp) { u32 power_mW = 1000; + bool supports_a2; if (sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_POWER_DECL)) power_mW = 1500; if (sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_HIGH_POWER_LEVEL)) power_mW = 2000; + supports_a2 = sfp->id.ext.sff8472_compliance != + SFP_SFF8472_COMPLIANCE_NONE || + sfp->id.ext.diagmon & SFP_DIAGMON_DDM; + if (power_mW > sfp->max_power_mW) { /* Module power specification exceeds the allowed maximum. */ - if (sfp->id.ext.sff8472_compliance == - SFP_SFF8472_COMPLIANCE_NONE && - !(sfp->id.ext.diagmon & SFP_DIAGMON_DDM)) { + if (!supports_a2) { /* The module appears not to implement bus address * 0xa2, so assume that the module powers up in the * indicated mode. @@ -1668,11 +1671,25 @@ static int sfp_module_parse_power(struct sfp *sfp) } } + if (power_mW <= 1000) { + /* Modules below 1W do not require a power change sequence */ + sfp->module_power_mW = power_mW; + return 0; + } + + if (!supports_a2) { + /* The module power level is below the host maximum and the + * module appears not to implement bus address 0xa2, so assume + * that the module powers up in the indicated mode. + */ + return 0; + } + /* If the module requires a higher power mode, but also requires * an address change sequence, warn the user that the module may * not be functional. */ - if (sfp->id.ext.diagmon & SFP_DIAGMON_ADDRMODE && power_mW > 1000) { + if (sfp->id.ext.diagmon & SFP_DIAGMON_ADDRMODE) { dev_warn(sfp->dev, "Address Change Sequence not supported but module requires %u.%uW, module may not be functional\n", power_mW / 1000, (power_mW / 100) % 10); diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index f510e8219470..37e5f3495362 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1316,6 +1316,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x19d2, 0x1426, 2)}, /* ZTE MF91 */ {QMI_FIXED_INTF(0x19d2, 0x1428, 2)}, /* Telewell TW-LTE 4G v2 */ {QMI_FIXED_INTF(0x19d2, 0x1432, 3)}, /* ZTE ME3620 */ + {QMI_FIXED_INTF(0x19d2, 0x1485, 5)}, /* ZTE MF286D */ {QMI_FIXED_INTF(0x19d2, 0x2002, 4)}, /* ZTE (Vodafone) K3765-Z */ {QMI_FIXED_INTF(0x2001, 0x7e16, 3)}, /* D-Link DWM-221 */ {QMI_FIXED_INTF(0x2001, 0x7e19, 4)}, /* D-Link DWM-221 B1 */ @@ -1401,6 +1402,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x413c, 0x81e0, 0)}, /* Dell Wireless 5821e with eSIM support*/ {QMI_FIXED_INTF(0x03f0, 0x4e1d, 8)}, /* HP lt4111 LTE/EV-DO/HSPA+ Gobi 4G Module */ {QMI_FIXED_INTF(0x03f0, 0x9d1d, 1)}, /* HP lt4120 Snapdragon X5 LTE */ + {QMI_QUIRK_SET_DTR(0x22de, 0x9051, 2)}, /* Hucom Wireless HM-211S/K */ {QMI_FIXED_INTF(0x22de, 0x9061, 3)}, /* WeTelecom WPD-600N */ {QMI_QUIRK_SET_DTR(0x1e0e, 0x9001, 5)}, /* SIMCom 7100E, 7230E, 7600E ++ */ {QMI_QUIRK_SET_DTR(0x2c7c, 0x0121, 4)}, /* Quectel EC21 Mini PCIe */ diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c index abe0149ed917..bc1e3dd67c04 100644 --- a/drivers/net/usb/smsc95xx.c +++ b/drivers/net/usb/smsc95xx.c @@ -1962,7 +1962,8 @@ static const struct driver_info smsc95xx_info = { .bind = smsc95xx_bind, .unbind = smsc95xx_unbind, .link_reset = smsc95xx_link_reset, - .reset = smsc95xx_start_phy, + .reset = smsc95xx_reset, + .check_connect = smsc95xx_start_phy, .stop = smsc95xx_stop, .rx_fixup = smsc95xx_rx_fixup, .tx_fixup = smsc95xx_tx_fixup, diff --git a/drivers/net/wwan/mhi_wwan_mbim.c b/drivers/net/wwan/mhi_wwan_mbim.c index 71bf9b4f769f..6872782e8dd8 100644 --- a/drivers/net/wwan/mhi_wwan_mbim.c +++ b/drivers/net/wwan/mhi_wwan_mbim.c @@ -385,13 +385,13 @@ static void mhi_net_rx_refill_work(struct work_struct *work) int err; while (!mhi_queue_is_full(mdev, DMA_FROM_DEVICE)) { - struct sk_buff *skb = alloc_skb(MHI_DEFAULT_MRU, GFP_KERNEL); + struct sk_buff *skb = alloc_skb(mbim->mru, GFP_KERNEL); if (unlikely(!skb)) break; err = mhi_queue_skb(mdev, DMA_FROM_DEVICE, skb, - MHI_DEFAULT_MRU, MHI_EOT); + mbim->mru, MHI_EOT); if (unlikely(err)) { kfree_skb(skb); break; diff --git a/drivers/nfc/pn544/i2c.c b/drivers/nfc/pn544/i2c.c index 37d26f01986b..62a0f1a010cb 100644 --- a/drivers/nfc/pn544/i2c.c +++ b/drivers/nfc/pn544/i2c.c @@ -188,7 +188,7 @@ do { \ static void pn544_hci_i2c_platform_init(struct pn544_i2c_phy *phy) { int polarity, retry, ret; - char rset_cmd[] = { 0x05, 0xF9, 0x04, 0x00, 0xC3, 0xE5 }; + static const char rset_cmd[] = { 0x05, 0xF9, 0x04, 0x00, 0xC3, 0xE5 }; int count = sizeof(rset_cmd); nfc_info(&phy->i2c_dev->dev, "Detecting nfc_en polarity\n"); diff --git a/drivers/nfc/st21nfca/se.c b/drivers/nfc/st21nfca/se.c index a43fc4117fa5..c922f10d0d7b 100644 --- a/drivers/nfc/st21nfca/se.c +++ b/drivers/nfc/st21nfca/se.c @@ -316,6 +316,11 @@ int st21nfca_connectivity_event_received(struct nfc_hci_dev *hdev, u8 host, return -ENOMEM; transaction->aid_len = skb->data[1]; + + /* Checking if the length of the AID is valid */ + if (transaction->aid_len > sizeof(transaction->aid)) + return -EINVAL; + memcpy(transaction->aid, &skb->data[2], transaction->aid_len); @@ -325,6 +330,11 @@ int st21nfca_connectivity_event_received(struct nfc_hci_dev *hdev, u8 host, return -EPROTO; transaction->params_len = skb->data[transaction->aid_len + 3]; + + /* Total size is allocated (skb->len - 2) minus fixed array members */ + if (transaction->params_len > ((skb->len - 2) - sizeof(struct nfc_evt_transaction))) + return -EINVAL; + memcpy(transaction->params, skb->data + transaction->aid_len + 4, transaction->params_len); diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c index 93772ab8d7e3..c7552df32082 100644 --- a/drivers/pwm/core.c +++ b/drivers/pwm/core.c @@ -548,6 +548,73 @@ static void pwm_apply_state_debug(struct pwm_device *pwm, } } +static int pwm_apply_legacy(struct pwm_chip *chip, struct pwm_device *pwm, + const struct pwm_state *state) +{ + int err; + struct pwm_state initial_state = pwm->state; + + if (state->polarity != pwm->state.polarity) { + if (!chip->ops->set_polarity) + return -EINVAL; + + /* + * Changing the polarity of a running PWM is only allowed when + * the PWM driver implements ->apply(). + */ + if (pwm->state.enabled) { + chip->ops->disable(chip, pwm); + + /* + * Update pwm->state already here in case + * .set_polarity() or another callback depend on that. + */ + pwm->state.enabled = false; + } + + err = chip->ops->set_polarity(chip, pwm, state->polarity); + if (err) + goto rollback; + + pwm->state.polarity = state->polarity; + } + + if (!state->enabled) { + if (pwm->state.enabled) + chip->ops->disable(chip, pwm); + + return 0; + } + + /* + * We cannot skip calling ->config even if state->period == + * pwm->state.period && state->duty_cycle == pwm->state.duty_cycle + * because we might have exited early in the last call to + * pwm_apply_state because of !state->enabled and so the two values in + * pwm->state might not be configured in hardware. + */ + err = chip->ops->config(pwm->chip, pwm, + state->duty_cycle, + state->period); + if (err) + goto rollback; + + pwm->state.period = state->period; + pwm->state.duty_cycle = state->duty_cycle; + + if (!pwm->state.enabled) { + err = chip->ops->enable(chip, pwm); + if (err) + goto rollback; + } + + return 0; + +rollback: + pwm->state = initial_state; + return err; +} + /** * pwm_apply_state() - atomically apply a new state to a PWM device * @pwm: PWM device @@ -580,70 +647,22 @@ int pwm_apply_state(struct pwm_device *pwm, const struct pwm_state *state) state->usage_power == pwm->state.usage_power) return 0; - if (chip->ops->apply) { + if (chip->ops->apply) err = chip->ops->apply(chip, pwm, state); - if (err) - return err; + else + err = pwm_apply_legacy(chip, pwm, state); + if (err) + return err; - trace_pwm_apply(pwm, state); + trace_pwm_apply(pwm, state); - pwm->state = *state; + pwm->state = *state; - /* - * only do this after pwm->state was applied as some - * implementations of .get_state depend on this - */ - pwm_apply_state_debug(pwm, state); - } else { - /* - * FIXME: restore the initial state in case of error. - */ - if (state->polarity != pwm->state.polarity) { - if (!chip->ops->set_polarity) - return -EINVAL; - - /* - * Changing the polarity of a running PWM is - * only allowed when the PWM driver implements - * ->apply(). - */ - if (pwm->state.enabled) { - chip->ops->disable(chip, pwm); - pwm->state.enabled = false; - } - - err = chip->ops->set_polarity(chip, pwm, - state->polarity); - if (err) - return err; - - pwm->state.polarity = state->polarity; - } - - if (state->period != pwm->state.period || - state->duty_cycle != pwm->state.duty_cycle) { - err = chip->ops->config(pwm->chip, pwm, - state->duty_cycle, - state->period); - if (err) - return err; - - pwm->state.duty_cycle = state->duty_cycle; - pwm->state.period = state->period; - } - - if (state->enabled != pwm->state.enabled) { - if (state->enabled) { - err = chip->ops->enable(chip, pwm); - if (err) - return err; - } else { - chip->ops->disable(chip, pwm); - } - - pwm->state.enabled = state->enabled; - } - } + /* + * only do this after pwm->state was applied as some + * implementations of .get_state depend on this + */ + pwm_apply_state_debug(pwm, state); return 0; } diff --git a/drivers/pwm/pwm-img.c b/drivers/pwm/pwm-img.c index f97f82548293..5996049f66ec 100644 --- a/drivers/pwm/pwm-img.c +++ b/drivers/pwm/pwm-img.c @@ -128,11 +128,9 @@ static int img_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, duty = DIV_ROUND_UP(timebase * duty_ns, period_ns); - ret = pm_runtime_get_sync(chip->dev); - if (ret < 0) { - pm_runtime_put_autosuspend(chip->dev); + ret = pm_runtime_resume_and_get(chip->dev); + if (ret < 0) return ret; - } val = img_pwm_readl(pwm_chip, PWM_CTRL_CFG); val &= ~(PWM_CTRL_CFG_DIV_MASK << PWM_CTRL_CFG_DIV_SHIFT(pwm->hwpwm)); @@ -184,10 +182,33 @@ static void img_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm) pm_runtime_put_autosuspend(chip->dev); } +static int img_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, + const struct pwm_state *state) +{ + int err; + + if (state->polarity != PWM_POLARITY_NORMAL) + return -EINVAL; + + if (!state->enabled) { + if (pwm->state.enabled) + img_pwm_disable(chip, pwm); + + return 0; + } + + err = img_pwm_config(pwm->chip, pwm, state->duty_cycle, state->period); + if (err) + return err; + + if (!pwm->state.enabled) + err = img_pwm_enable(chip, pwm); + + return err; +} + static const struct pwm_ops img_pwm_ops = { - .config = img_pwm_config, - .enable = img_pwm_enable, - .disable = img_pwm_disable, + .apply = img_pwm_apply, .owner = THIS_MODULE, }; diff --git a/drivers/pwm/pwm-twl.c b/drivers/pwm/pwm-twl.c index 203194f2c92e..86567add79db 100644 --- a/drivers/pwm/pwm-twl.c +++ b/drivers/pwm/pwm-twl.c @@ -58,9 +58,9 @@ static inline struct twl_pwm_chip *to_twl(struct pwm_chip *chip) } static int twl_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, - int duty_ns, int period_ns) + u64 duty_ns, u64 period_ns) { - int duty_cycle = DIV_ROUND_UP(duty_ns * TWL_PWM_MAX, period_ns) + 1; + int duty_cycle = DIV64_U64_ROUND_UP(duty_ns * TWL_PWM_MAX, period_ns) + 1; u8 pwm_config[2] = { 1, 0 }; int base, ret; @@ -279,19 +279,65 @@ out: mutex_unlock(&twl->mutex); } +static int twl4030_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, + const struct pwm_state *state) +{ + int err; + + if (state->polarity != PWM_POLARITY_NORMAL) + return -EINVAL; + + if (!state->enabled) { + if (pwm->state.enabled) + twl4030_pwm_disable(chip, pwm); + + return 0; + } + + err = twl_pwm_config(pwm->chip, pwm, state->duty_cycle, state->period); + if (err) + return err; + + if (!pwm->state.enabled) + err = twl4030_pwm_enable(chip, pwm); + + return err; +} + +static int twl6030_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, + const struct pwm_state *state) +{ + int err; + + if (state->polarity != PWM_POLARITY_NORMAL) + return -EINVAL; + + if (!state->enabled) { + if (pwm->state.enabled) + twl6030_pwm_disable(chip, pwm); + + return 0; + } + + err = twl_pwm_config(pwm->chip, pwm, state->duty_cycle, state->period); + if (err) + return err; + + if (!pwm->state.enabled) + err = twl6030_pwm_enable(chip, pwm); + + return err; +} + static const struct pwm_ops twl4030_pwm_ops = { - .config = twl_pwm_config, - .enable = twl4030_pwm_enable, - .disable = twl4030_pwm_disable, + .apply = twl4030_pwm_apply, .request = twl4030_pwm_request, .free = twl4030_pwm_free, .owner = THIS_MODULE, }; static const struct pwm_ops twl6030_pwm_ops = { - .config = twl_pwm_config, - .enable = twl6030_pwm_enable, - .disable = twl6030_pwm_disable, + .apply = twl6030_pwm_apply, .owner = THIS_MODULE, }; diff --git a/drivers/pwm/pwm-vt8500.c b/drivers/pwm/pwm-vt8500.c index 480bfc29782f..7170a315535b 100644 --- a/drivers/pwm/pwm-vt8500.c +++ b/drivers/pwm/pwm-vt8500.c @@ -70,7 +70,7 @@ static inline void vt8500_pwm_busy_wait(struct vt8500_chip *vt8500, int nr, u8 b } static int vt8500_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, - int duty_ns, int period_ns) + u64 duty_ns, u64 period_ns) { struct vt8500_chip *vt8500 = to_vt8500_chip(chip); unsigned long long c; @@ -102,8 +102,8 @@ static int vt8500_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, } c = (unsigned long long)pv * duty_ns; - do_div(c, period_ns); - dc = c; + + dc = div64_u64(c, period_ns); writel(prescale, vt8500->base + REG_SCALAR(pwm->hwpwm)); vt8500_pwm_busy_wait(vt8500, pwm->hwpwm, STATUS_SCALAR_UPDATE); @@ -176,11 +176,54 @@ static int vt8500_pwm_set_polarity(struct pwm_chip *chip, return 0; } +static int vt8500_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, + const struct pwm_state *state) +{ + int err; + bool enabled = pwm->state.enabled; + + if (state->polarity != pwm->state.polarity) { + /* + * Changing the polarity of a running PWM is only allowed when + * the PWM driver implements ->apply(). + */ + if (enabled) { + vt8500_pwm_disable(chip, pwm); + + enabled = false; + } + + err = vt8500_pwm_set_polarity(chip, pwm, state->polarity); + if (err) + return err; + } + + if (!state->enabled) { + if (enabled) + vt8500_pwm_disable(chip, pwm); + + return 0; + } + + /* + * We cannot skip calling ->config even if state->period == + * pwm->state.period && state->duty_cycle == pwm->state.duty_cycle + * because we might have exited early in the last call to + * pwm_apply_state because of !state->enabled and so the two values in + * pwm->state might not be configured in hardware. + */ + err = vt8500_pwm_config(pwm->chip, pwm, state->duty_cycle, state->period); + if (err) + return err; + + if (!enabled) + err = vt8500_pwm_enable(chip, pwm); + + return err; +} + static const struct pwm_ops vt8500_pwm_ops = { - .enable = vt8500_pwm_enable, - .disable = vt8500_pwm_disable, - .config = vt8500_pwm_config, - .set_polarity = vt8500_pwm_set_polarity, + .apply = vt8500_pwm_apply, .owner = THIS_MODULE, }; diff --git a/drivers/vfio/pci/vfio_pci_igd.c b/drivers/vfio/pci/vfio_pci_igd.c index 362f91ec8845..352c725ccf18 100644 --- a/drivers/vfio/pci/vfio_pci_igd.c +++ b/drivers/vfio/pci/vfio_pci_igd.c @@ -309,13 +309,14 @@ static ssize_t vfio_pci_igd_cfg_rw(struct vfio_pci_core_device *vdev, if ((pos & 3) && size > 2) { u16 val; + __le16 lval; ret = pci_user_read_config_word(pdev, pos, &val); if (ret) return ret; - val = cpu_to_le16(val); - if (copy_to_user(buf + count - size, &val, 2)) + lval = cpu_to_le16(val); + if (copy_to_user(buf + count - size, &lval, 2)) return -EFAULT; pos += 2; @@ -324,13 +325,14 @@ static ssize_t vfio_pci_igd_cfg_rw(struct vfio_pci_core_device *vdev, while (size > 3) { u32 val; + __le32 lval; ret = pci_user_read_config_dword(pdev, pos, &val); if (ret) return ret; - val = cpu_to_le32(val); - if (copy_to_user(buf + count - size, &val, 4)) + lval = cpu_to_le32(val); + if (copy_to_user(buf + count - size, &lval, 4)) return -EFAULT; pos += 4; @@ -339,13 +341,14 @@ static ssize_t vfio_pci_igd_cfg_rw(struct vfio_pci_core_device *vdev, while (size >= 2) { u16 val; + __le16 lval; ret = pci_user_read_config_word(pdev, pos, &val); if (ret) return ret; - val = cpu_to_le16(val); - if (copy_to_user(buf + count - size, &val, 2)) + lval = cpu_to_le16(val); + if (copy_to_user(buf + count - size, &lval, 2)) return -EFAULT; pos += 2; diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index f17490ab238f..9394aa9444c1 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -256,7 +256,7 @@ static int vfio_dma_bitmap_alloc(struct vfio_dma *dma, size_t pgsize) static void vfio_dma_bitmap_free(struct vfio_dma *dma) { - kfree(dma->bitmap); + kvfree(dma->bitmap); dma->bitmap = NULL; } diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 7d2c33cdbac6..7d305b974824 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -3376,8 +3376,7 @@ static void handle_cap_grant(struct inode *inode, if ((newcaps & CEPH_CAP_LINK_SHARED) && (extra_info->issued & CEPH_CAP_LINK_EXCL) == 0) { set_nlink(inode, le32_to_cpu(grant->nlink)); - if (inode->i_nlink == 0 && - (newcaps & (CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL))) + if (inode->i_nlink == 0) deleted_inode = true; } diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 9d9304e712d9..5b9104b8e453 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -204,6 +204,8 @@ static int ceph_init_file_info(struct inode *inode, struct file *file, int fmode, bool isdir) { struct ceph_inode_info *ci = ceph_inode(inode); + struct ceph_mount_options *opt = + ceph_inode_to_client(&ci->vfs_inode)->mount_options; struct ceph_file_info *fi; dout("%s %p %p 0%o (%s)\n", __func__, inode, file, @@ -225,6 +227,9 @@ static int ceph_init_file_info(struct inode *inode, struct file *file, if (!fi) return -ENOMEM; + if (opt->flags & CEPH_MOUNT_OPT_NOPAGECACHE) + fi->flags |= CEPH_F_SYNC; + file->private_data = fi; } @@ -1541,7 +1546,7 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to) struct ceph_inode_info *ci = ceph_inode(inode); bool direct_lock = iocb->ki_flags & IOCB_DIRECT; ssize_t ret; - int want, got = 0; + int want = 0, got = 0; int retry_op = 0, read = 0; again: @@ -1556,13 +1561,14 @@ again: else ceph_start_io_read(inode); + if (!(fi->flags & CEPH_F_SYNC) && !direct_lock) + want |= CEPH_CAP_FILE_CACHE; if (fi->fmode & CEPH_FILE_MODE_LAZY) - want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO; - else - want = CEPH_CAP_FILE_CACHE; + want |= CEPH_CAP_FILE_LAZYIO; + ret = ceph_get_caps(filp, CEPH_CAP_FILE_RD, want, -1, &got); if (ret < 0) { - if (iocb->ki_flags & IOCB_DIRECT) + if (direct_lock) ceph_end_io_direct(inode); else ceph_end_io_read(inode); @@ -1696,7 +1702,7 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) struct ceph_osd_client *osdc = &fsc->client->osdc; struct ceph_cap_flush *prealloc_cf; ssize_t count, written = 0; - int err, want, got; + int err, want = 0, got; bool direct_lock = false; u32 map_flags; u64 pool_flags; @@ -1771,10 +1777,10 @@ retry_snap: dout("aio_write %p %llx.%llx %llu~%zd getting caps. i_size %llu\n", inode, ceph_vinop(inode), pos, count, i_size_read(inode)); + if (!(fi->flags & CEPH_F_SYNC) && !direct_lock) + want |= CEPH_CAP_FILE_BUFFER; if (fi->fmode & CEPH_FILE_MODE_LAZY) - want = CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO; - else - want = CEPH_CAP_FILE_BUFFER; + want |= CEPH_CAP_FILE_LAZYIO; got = 0; err = ceph_get_caps(file, CEPH_CAP_FILE_WR, want, pos + count, &got); if (err < 0) diff --git a/fs/ceph/metric.c b/fs/ceph/metric.c index c57699d8408d..0fcba68f9a99 100644 --- a/fs/ceph/metric.c +++ b/fs/ceph/metric.c @@ -160,8 +160,6 @@ static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc, msg->hdr.version = cpu_to_le16(1); msg->hdr.compat_version = cpu_to_le16(1); msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); - dout("client%llu send metrics to mds%d\n", - ceph_client_gid(mdsc->fsc->client), s->s_mds); ceph_con_send(&s->s_con, msg); return true; diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c index 620c691af40e..a338a3ec0dc4 100644 --- a/fs/ceph/quota.c +++ b/fs/ceph/quota.c @@ -30,6 +30,9 @@ static inline bool ceph_has_realms_with_quotas(struct inode *inode) /* if root is the real CephFS root, we don't have quota realms */ if (root && ceph_ino(root) == CEPH_INO_ROOT) return false; + /* MDS stray dirs have no quota realms */ + if (ceph_vino_is_reserved(ceph_inode(inode)->i_vino)) + return false; /* otherwise, we can't know for sure */ return true; } @@ -494,10 +497,24 @@ bool ceph_quota_update_statfs(struct ceph_fs_client *fsc, struct kstatfs *buf) if (ci->i_max_bytes) { total = ci->i_max_bytes >> CEPH_BLOCK_SHIFT; used = ci->i_rbytes >> CEPH_BLOCK_SHIFT; + /* For quota size less than 4MB, use 4KB block size */ + if (!total) { + total = ci->i_max_bytes >> CEPH_4K_BLOCK_SHIFT; + used = ci->i_rbytes >> CEPH_4K_BLOCK_SHIFT; + buf->f_frsize = 1 << CEPH_4K_BLOCK_SHIFT; + } /* It is possible for a quota to be exceeded. * Report 'zero' in that case */ free = total > used ? total - used : 0; + /* For quota size less than 4KB, report the + * total=used=4KB,free=0 when quota is full + * and total=free=4KB, used=0 otherwise */ + if (!total) { + total = 1; + free = ci->i_max_bytes > ci->i_rbytes ? 1 : 0; + buf->f_frsize = 1 << CEPH_4K_BLOCK_SHIFT; + } } spin_unlock(&ci->i_ceph_lock); if (total) { diff --git a/fs/ceph/super.c b/fs/ceph/super.c index bea89bdb534a..bf79f369aec6 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -27,6 +27,8 @@ #include #include +#include + static DEFINE_SPINLOCK(ceph_fsc_lock); static LIST_HEAD(ceph_fsc_list); @@ -146,6 +148,7 @@ enum { Opt_mds_namespace, Opt_recover_session, Opt_source, + Opt_mon_addr, /* string args above */ Opt_dirstat, Opt_rbytes, @@ -159,6 +162,7 @@ enum { Opt_quotadf, Opt_copyfrom, Opt_wsync, + Opt_pagecache, }; enum ceph_recover_session_mode { @@ -197,8 +201,10 @@ static const struct fs_parameter_spec ceph_mount_parameters[] = { fsparam_u32 ("rsize", Opt_rsize), fsparam_string ("snapdirname", Opt_snapdirname), fsparam_string ("source", Opt_source), + fsparam_string ("mon_addr", Opt_mon_addr), fsparam_u32 ("wsize", Opt_wsize), fsparam_flag_no ("wsync", Opt_wsync), + fsparam_flag_no ("pagecache", Opt_pagecache), {} }; @@ -228,9 +234,92 @@ static void canonicalize_path(char *path) } /* - * Parse the source parameter. Distinguish the server list from the path. + * Check if the mds namespace in ceph_mount_options matches + * the passed in namespace string. First time match (when + * ->mds_namespace is NULL) is treated specially, since + * ->mds_namespace needs to be initialized by the caller. + */ +static int namespace_equals(struct ceph_mount_options *fsopt, + const char *namespace, size_t len) +{ + return !(fsopt->mds_namespace && + (strlen(fsopt->mds_namespace) != len || + strncmp(fsopt->mds_namespace, namespace, len))); +} + +static int ceph_parse_old_source(const char *dev_name, const char *dev_name_end, + struct fs_context *fc) +{ + int r; + struct ceph_parse_opts_ctx *pctx = fc->fs_private; + struct ceph_mount_options *fsopt = pctx->opts; + + if (*dev_name_end != ':') + return invalfc(fc, "separator ':' missing in source"); + + r = ceph_parse_mon_ips(dev_name, dev_name_end - dev_name, + pctx->copts, fc->log.log, ','); + if (r) + return r; + + fsopt->new_dev_syntax = false; + return 0; +} + +static int ceph_parse_new_source(const char *dev_name, const char *dev_name_end, + struct fs_context *fc) +{ + size_t len; + struct ceph_fsid fsid; + struct ceph_parse_opts_ctx *pctx = fc->fs_private; + struct ceph_mount_options *fsopt = pctx->opts; + char *fsid_start, *fs_name_start; + + if (*dev_name_end != '=') { + dout("separator '=' missing in source"); + return -EINVAL; + } + + fsid_start = strchr(dev_name, '@'); + if (!fsid_start) + return invalfc(fc, "missing cluster fsid"); + ++fsid_start; /* start of cluster fsid */ + + fs_name_start = strchr(fsid_start, '.'); + if (!fs_name_start) + return invalfc(fc, "missing file system name"); + + if (ceph_parse_fsid(fsid_start, &fsid)) + return invalfc(fc, "Invalid FSID"); + + ++fs_name_start; /* start of file system name */ + len = dev_name_end - fs_name_start; + + if (!namespace_equals(fsopt, fs_name_start, len)) + return invalfc(fc, "Mismatching mds_namespace"); + kfree(fsopt->mds_namespace); + fsopt->mds_namespace = kstrndup(fs_name_start, len, GFP_KERNEL); + if (!fsopt->mds_namespace) + return -ENOMEM; + dout("file system (mds namespace) '%s'\n", fsopt->mds_namespace); + + fsopt->new_dev_syntax = true; + return 0; +} + +/* + * Parse the source parameter for new device format. Distinguish the device + * spec from the path. Try parsing new device format and fallback to old + * format if needed. * - * The source will look like: + * New device syntax will looks like: + * =/ + * where + * is name@fsid.fsname + * is optional, but if present must begin with '/' + * (monitor addresses are passed via mount option) + * + * Old device syntax is: * [,...]:[] * where * is [:] @@ -263,24 +352,44 @@ static int ceph_parse_source(struct fs_parameter *param, struct fs_context *fc) dev_name_end = dev_name + strlen(dev_name); } - dev_name_end--; /* back up to ':' separator */ - if (dev_name_end < dev_name || *dev_name_end != ':') - return invalfc(fc, "No path or : separator in source"); + dev_name_end--; /* back up to separator */ + if (dev_name_end < dev_name) + return invalfc(fc, "Path missing in source"); dout("device name '%.*s'\n", (int)(dev_name_end - dev_name), dev_name); if (fsopt->server_path) dout("server path '%s'\n", fsopt->server_path); - ret = ceph_parse_mon_ips(param->string, dev_name_end - dev_name, - pctx->copts, fc->log.log); - if (ret) - return ret; + dout("trying new device syntax"); + ret = ceph_parse_new_source(dev_name, dev_name_end, fc); + if (ret) { + if (ret != -EINVAL) + return ret; + dout("trying old device syntax"); + ret = ceph_parse_old_source(dev_name, dev_name_end, fc); + if (ret) + return ret; + } fc->source = param->string; param->string = NULL; return 0; } +static int ceph_parse_mon_addr(struct fs_parameter *param, + struct fs_context *fc) +{ + struct ceph_parse_opts_ctx *pctx = fc->fs_private; + struct ceph_mount_options *fsopt = pctx->opts; + + kfree(fsopt->mon_addr); + fsopt->mon_addr = param->string; + param->string = NULL; + + return ceph_parse_mon_ips(fsopt->mon_addr, strlen(fsopt->mon_addr), + pctx->copts, fc->log.log, '/'); +} + static int ceph_parse_mount_param(struct fs_context *fc, struct fs_parameter *param) { @@ -306,6 +415,8 @@ static int ceph_parse_mount_param(struct fs_context *fc, param->string = NULL; break; case Opt_mds_namespace: + if (!namespace_equals(fsopt, param->string, strlen(param->string))) + return invalfc(fc, "Mismatching mds_namespace"); kfree(fsopt->mds_namespace); fsopt->mds_namespace = param->string; param->string = NULL; @@ -323,6 +434,8 @@ static int ceph_parse_mount_param(struct fs_context *fc, if (fc->source) return invalfc(fc, "Multiple sources specified"); return ceph_parse_source(param, fc); + case Opt_mon_addr: + return ceph_parse_mon_addr(param, fc); case Opt_wsize: if (result.uint_32 < PAGE_SIZE || result.uint_32 > CEPH_MAX_WRITE_SIZE) @@ -455,6 +568,12 @@ static int ceph_parse_mount_param(struct fs_context *fc, else fsopt->flags |= CEPH_MOUNT_OPT_ASYNC_DIROPS; break; + case Opt_pagecache: + if (result.negated) + fsopt->flags |= CEPH_MOUNT_OPT_NOPAGECACHE; + else + fsopt->flags &= ~CEPH_MOUNT_OPT_NOPAGECACHE; + break; default: BUG(); } @@ -474,6 +593,7 @@ static void destroy_mount_options(struct ceph_mount_options *args) kfree(args->mds_namespace); kfree(args->server_path); kfree(args->fscache_uniq); + kfree(args->mon_addr); kfree(args); } @@ -517,6 +637,10 @@ static int compare_mount_options(struct ceph_mount_options *new_fsopt, if (ret) return ret; + ret = strcmp_null(fsopt1->mon_addr, fsopt2->mon_addr); + if (ret) + return ret; + return ceph_compare_options(new_opt, fsc->client); } @@ -572,15 +696,22 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root) if ((fsopt->flags & CEPH_MOUNT_OPT_NOCOPYFROM) == 0) seq_puts(m, ",copyfrom"); - if (fsopt->mds_namespace) + /* dump mds_namespace when old device syntax is in use */ + if (fsopt->mds_namespace && !fsopt->new_dev_syntax) seq_show_option(m, "mds_namespace", fsopt->mds_namespace); + if (fsopt->mon_addr) + seq_printf(m, ",mon_addr=%s", fsopt->mon_addr); + if (fsopt->flags & CEPH_MOUNT_OPT_CLEANRECOVER) seq_show_option(m, "recover_session", "clean"); if (!(fsopt->flags & CEPH_MOUNT_OPT_ASYNC_DIROPS)) seq_puts(m, ",wsync"); + if (fsopt->flags & CEPH_MOUNT_OPT_NOPAGECACHE) + seq_puts(m, ",nopagecache"); + if (fsopt->wsize != CEPH_MAX_WRITE_SIZE) seq_printf(m, ",wsize=%u", fsopt->wsize); if (fsopt->rsize != CEPH_MAX_READ_SIZE) @@ -1052,6 +1183,7 @@ static int ceph_setup_bdi(struct super_block *sb, struct ceph_fs_client *fsc) static int ceph_get_tree(struct fs_context *fc) { struct ceph_parse_opts_ctx *pctx = fc->fs_private; + struct ceph_mount_options *fsopt = pctx->opts; struct super_block *sb; struct ceph_fs_client *fsc; struct dentry *res; @@ -1063,6 +1195,8 @@ static int ceph_get_tree(struct fs_context *fc) if (!fc->source) return invalfc(fc, "No source"); + if (fsopt->new_dev_syntax && !fsopt->mon_addr) + return invalfc(fc, "No monitor address"); /* create client (which we may/may not use) */ fsc = create_fs_client(pctx->opts, pctx->copts); @@ -1148,6 +1282,13 @@ static int ceph_reconfigure_fc(struct fs_context *fc) else ceph_clear_mount_opt(fsc, ASYNC_DIROPS); + if (strcmp_null(fsc->mount_options->mon_addr, fsopt->mon_addr)) { + kfree(fsc->mount_options->mon_addr); + fsc->mount_options->mon_addr = fsopt->mon_addr; + fsopt->mon_addr = NULL; + pr_notice("ceph: monitor addresses recorded, but not used for reconnection"); + } + sync_filesystem(fc->root->d_sb); return 0; } @@ -1325,6 +1466,14 @@ bool disable_send_metrics = false; module_param_cb(disable_send_metrics, ¶m_ops_metrics, &disable_send_metrics, 0644); MODULE_PARM_DESC(disable_send_metrics, "Enable sending perf metrics to ceph cluster (default: on)"); +/* for both v1 and v2 syntax */ +static bool mount_support = true; +static const struct kernel_param_ops param_ops_mount_syntax = { + .get = param_get_bool, +}; +module_param_cb(mount_syntax_v1, ¶m_ops_mount_syntax, &mount_support, 0444); +module_param_cb(mount_syntax_v2, ¶m_ops_mount_syntax, &mount_support, 0444); + module_init(init_ceph); module_exit(exit_ceph); diff --git a/fs/ceph/super.h b/fs/ceph/super.h index d0142cc5c41b..67f145e1ae7a 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -24,13 +24,11 @@ #include #endif -/* f_type in struct statfs */ -#define CEPH_SUPER_MAGIC 0x00c36400 - /* large granularity for statfs utilization stats to facilitate * large volume sizes on 32-bit machines. */ #define CEPH_BLOCK_SHIFT 22 /* 4 MB */ #define CEPH_BLOCK (1 << CEPH_BLOCK_SHIFT) +#define CEPH_4K_BLOCK_SHIFT 12 /* 4 KB */ #define CEPH_MOUNT_OPT_CLEANRECOVER (1<<1) /* auto reonnect (clean mode) after blocklisted */ #define CEPH_MOUNT_OPT_DIRSTAT (1<<4) /* `cat dirname` for stats */ @@ -44,6 +42,7 @@ #define CEPH_MOUNT_OPT_NOQUOTADF (1<<13) /* no root dir quota in statfs */ #define CEPH_MOUNT_OPT_NOCOPYFROM (1<<14) /* don't use RADOS 'copy-from' op */ #define CEPH_MOUNT_OPT_ASYNC_DIROPS (1<<15) /* allow async directory ops */ +#define CEPH_MOUNT_OPT_NOPAGECACHE (1<<16) /* bypass pagecache altogether */ #define CEPH_MOUNT_OPT_DEFAULT \ (CEPH_MOUNT_OPT_DCACHE | \ @@ -88,6 +87,8 @@ struct ceph_mount_options { unsigned int max_readdir; /* max readdir result (entries) */ unsigned int max_readdir_bytes; /* max readdir result (bytes) */ + bool new_dev_syntax; + /* * everything above this point can be memcmp'd; everything below * is handled in compare_mount_options() @@ -97,6 +98,7 @@ struct ceph_mount_options { char *mds_namespace; /* default NULL */ char *server_path; /* default NULL (means "/") */ char *fscache_uniq; /* default NULL */ + char *mon_addr; }; struct ceph_fs_client { @@ -534,19 +536,23 @@ static inline int ceph_ino_compare(struct inode *inode, void *data) * * These come from src/mds/mdstypes.h in the ceph sources. */ -#define CEPH_MAX_MDS 0x100 -#define CEPH_NUM_STRAY 10 +#define CEPH_MAX_MDS 0x100 +#define CEPH_NUM_STRAY 10 #define CEPH_MDS_INO_MDSDIR_OFFSET (1 * CEPH_MAX_MDS) +#define CEPH_MDS_INO_LOG_OFFSET (2 * CEPH_MAX_MDS) #define CEPH_INO_SYSTEM_BASE ((6*CEPH_MAX_MDS) + (CEPH_MAX_MDS * CEPH_NUM_STRAY)) static inline bool ceph_vino_is_reserved(const struct ceph_vino vino) { - if (vino.ino < CEPH_INO_SYSTEM_BASE && - vino.ino >= CEPH_MDS_INO_MDSDIR_OFFSET) { - WARN_RATELIMIT(1, "Attempt to access reserved inode number 0x%llx", vino.ino); - return true; - } - return false; + if (vino.ino >= CEPH_INO_SYSTEM_BASE || + vino.ino < CEPH_MDS_INO_MDSDIR_OFFSET) + return false; + + /* Don't warn on mdsdirs */ + WARN_RATELIMIT(vino.ino >= CEPH_MDS_INO_LOG_OFFSET, + "Attempt to access reserved inode number 0x%llx", + vino.ino); + return true; } static inline struct inode *ceph_find_inode(struct super_block *sb, diff --git a/fs/ksmbd/asn1.c b/fs/ksmbd/asn1.c index b014f4638610..c03eba090368 100644 --- a/fs/ksmbd/asn1.c +++ b/fs/ksmbd/asn1.c @@ -21,101 +21,11 @@ #include "ksmbd_spnego_negtokeninit.asn1.h" #include "ksmbd_spnego_negtokentarg.asn1.h" -#define SPNEGO_OID_LEN 7 #define NTLMSSP_OID_LEN 10 -#define KRB5_OID_LEN 7 -#define KRB5U2U_OID_LEN 8 -#define MSKRB5_OID_LEN 7 -static unsigned long SPNEGO_OID[7] = { 1, 3, 6, 1, 5, 5, 2 }; -static unsigned long NTLMSSP_OID[10] = { 1, 3, 6, 1, 4, 1, 311, 2, 2, 10 }; -static unsigned long KRB5_OID[7] = { 1, 2, 840, 113554, 1, 2, 2 }; -static unsigned long KRB5U2U_OID[8] = { 1, 2, 840, 113554, 1, 2, 2, 3 }; -static unsigned long MSKRB5_OID[7] = { 1, 2, 840, 48018, 1, 2, 2 }; static char NTLMSSP_OID_STR[NTLMSSP_OID_LEN] = { 0x2b, 0x06, 0x01, 0x04, 0x01, 0x82, 0x37, 0x02, 0x02, 0x0a }; -static bool -asn1_subid_decode(const unsigned char **begin, const unsigned char *end, - unsigned long *subid) -{ - const unsigned char *ptr = *begin; - unsigned char ch; - - *subid = 0; - - do { - if (ptr >= end) - return false; - - ch = *ptr++; - *subid <<= 7; - *subid |= ch & 0x7F; - } while ((ch & 0x80) == 0x80); - - *begin = ptr; - return true; -} - -static bool asn1_oid_decode(const unsigned char *value, size_t vlen, - unsigned long **oid, size_t *oidlen) -{ - const unsigned char *iptr = value, *end = value + vlen; - unsigned long *optr; - unsigned long subid; - - vlen += 1; - if (vlen < 2 || vlen > UINT_MAX / sizeof(unsigned long)) - goto fail_nullify; - - *oid = kmalloc(vlen * sizeof(unsigned long), GFP_KERNEL); - if (!*oid) - return false; - - optr = *oid; - - if (!asn1_subid_decode(&iptr, end, &subid)) - goto fail; - - if (subid < 40) { - optr[0] = 0; - optr[1] = subid; - } else if (subid < 80) { - optr[0] = 1; - optr[1] = subid - 40; - } else { - optr[0] = 2; - optr[1] = subid - 80; - } - - *oidlen = 2; - optr += 2; - - while (iptr < end) { - if (++(*oidlen) > vlen) - goto fail; - - if (!asn1_subid_decode(&iptr, end, optr++)) - goto fail; - } - return true; - -fail: - kfree(*oid); -fail_nullify: - *oid = NULL; - return false; -} - -static bool oid_eq(unsigned long *oid1, unsigned int oid1len, - unsigned long *oid2, unsigned int oid2len) -{ - if (oid1len != oid2len) - return false; - - return memcmp(oid1, oid2, oid1len) == 0; -} - int ksmbd_decode_negTokenInit(unsigned char *security_blob, int length, struct ksmbd_conn *conn) @@ -252,26 +162,18 @@ int build_spnego_ntlmssp_auth_blob(unsigned char **pbuffer, u16 *buflen, int ksmbd_gssapi_this_mech(void *context, size_t hdrlen, unsigned char tag, const void *value, size_t vlen) { - unsigned long *oid; - size_t oidlen; - int err = 0; + enum OID oid; - if (!asn1_oid_decode(value, vlen, &oid, &oidlen)) { - err = -EBADMSG; - goto out; - } - - if (!oid_eq(oid, oidlen, SPNEGO_OID, SPNEGO_OID_LEN)) - err = -EBADMSG; - kfree(oid); -out: - if (err) { + oid = look_up_OID(value, vlen); + if (oid != OID_spnego) { char buf[50]; sprint_oid(value, vlen, buf, sizeof(buf)); ksmbd_debug(AUTH, "Unexpected OID: %s\n", buf); + return -EBADMSG; } - return err; + + return 0; } int ksmbd_neg_token_init_mech_type(void *context, size_t hdrlen, @@ -279,37 +181,31 @@ int ksmbd_neg_token_init_mech_type(void *context, size_t hdrlen, size_t vlen) { struct ksmbd_conn *conn = context; - unsigned long *oid; - size_t oidlen; + enum OID oid; int mech_type; - char buf[50]; - if (!asn1_oid_decode(value, vlen, &oid, &oidlen)) - goto fail; - - if (oid_eq(oid, oidlen, NTLMSSP_OID, NTLMSSP_OID_LEN)) + oid = look_up_OID(value, vlen); + if (oid == OID_ntlmssp) { mech_type = KSMBD_AUTH_NTLMSSP; - else if (oid_eq(oid, oidlen, MSKRB5_OID, MSKRB5_OID_LEN)) + } else if (oid == OID_mskrb5) { mech_type = KSMBD_AUTH_MSKRB5; - else if (oid_eq(oid, oidlen, KRB5_OID, KRB5_OID_LEN)) + } else if (oid == OID_krb5) { mech_type = KSMBD_AUTH_KRB5; - else if (oid_eq(oid, oidlen, KRB5U2U_OID, KRB5U2U_OID_LEN)) + } else if (oid == OID_krb5u2u) { mech_type = KSMBD_AUTH_KRB5U2U; - else - goto fail; + } else { + char buf[50]; + + sprint_oid(value, vlen, buf, sizeof(buf)); + ksmbd_debug(AUTH, "Unexpected OID: %s\n", buf); + return -EBADMSG; + } conn->auth_mechs |= mech_type; if (conn->preferred_auth_mech == 0) conn->preferred_auth_mech = mech_type; - kfree(oid); return 0; - -fail: - kfree(oid); - sprint_oid(value, vlen, buf, sizeof(buf)); - ksmbd_debug(AUTH, "Unexpected OID: %s\n", buf); - return -EBADMSG; } int ksmbd_neg_token_init_mech_token(void *context, size_t hdrlen, diff --git a/fs/ksmbd/auth.c b/fs/ksmbd/auth.c index 3503b1c48cb4..dc3d061edda9 100644 --- a/fs/ksmbd/auth.c +++ b/fs/ksmbd/auth.c @@ -215,7 +215,7 @@ out: * Return: 0 on success, error number on error */ int ksmbd_auth_ntlmv2(struct ksmbd_session *sess, struct ntlmv2_resp *ntlmv2, - int blen, char *domain_name) + int blen, char *domain_name, char *cryptkey) { char ntlmv2_hash[CIFS_ENCPWD_SIZE]; char ntlmv2_rsp[CIFS_HMAC_MD5_HASH_SIZE]; @@ -256,7 +256,7 @@ int ksmbd_auth_ntlmv2(struct ksmbd_session *sess, struct ntlmv2_resp *ntlmv2, goto out; } - memcpy(construct, sess->ntlmssp.cryptkey, CIFS_CRYPTO_KEY_SIZE); + memcpy(construct, cryptkey, CIFS_CRYPTO_KEY_SIZE); memcpy(construct + CIFS_CRYPTO_KEY_SIZE, &ntlmv2->blob_signature, blen); rc = crypto_shash_update(CRYPTO_HMACMD5(ctx), construct, len); @@ -295,7 +295,8 @@ out: * Return: 0 on success, error number on error */ int ksmbd_decode_ntlmssp_auth_blob(struct authenticate_message *authblob, - int blob_len, struct ksmbd_session *sess) + int blob_len, struct ksmbd_conn *conn, + struct ksmbd_session *sess) { char *domain_name; unsigned int nt_off, dn_off; @@ -324,7 +325,7 @@ int ksmbd_decode_ntlmssp_auth_blob(struct authenticate_message *authblob, /* TODO : use domain name that imported from configuration file */ domain_name = smb_strndup_from_utf16((const char *)authblob + dn_off, - dn_len, true, sess->conn->local_nls); + dn_len, true, conn->local_nls); if (IS_ERR(domain_name)) return PTR_ERR(domain_name); @@ -333,7 +334,7 @@ int ksmbd_decode_ntlmssp_auth_blob(struct authenticate_message *authblob, domain_name); ret = ksmbd_auth_ntlmv2(sess, (struct ntlmv2_resp *)((char *)authblob + nt_off), nt_len - CIFS_ENCPWD_SIZE, - domain_name); + domain_name, conn->ntlmssp.cryptkey); kfree(domain_name); return ret; } @@ -347,7 +348,7 @@ int ksmbd_decode_ntlmssp_auth_blob(struct authenticate_message *authblob, * */ int ksmbd_decode_ntlmssp_neg_blob(struct negotiate_message *negblob, - int blob_len, struct ksmbd_session *sess) + int blob_len, struct ksmbd_conn *conn) { if (blob_len < sizeof(struct negotiate_message)) { ksmbd_debug(AUTH, "negotiate blob len %d too small\n", @@ -361,7 +362,7 @@ int ksmbd_decode_ntlmssp_neg_blob(struct negotiate_message *negblob, return -EINVAL; } - sess->ntlmssp.client_flags = le32_to_cpu(negblob->NegotiateFlags); + conn->ntlmssp.client_flags = le32_to_cpu(negblob->NegotiateFlags); return 0; } @@ -375,14 +376,14 @@ int ksmbd_decode_ntlmssp_neg_blob(struct negotiate_message *negblob, */ unsigned int ksmbd_build_ntlmssp_challenge_blob(struct challenge_message *chgblob, - struct ksmbd_session *sess) + struct ksmbd_conn *conn) { struct target_info *tinfo; wchar_t *name; __u8 *target_name; unsigned int flags, blob_off, blob_len, type, target_info_len = 0; int len, uni_len, conv_len; - int cflags = sess->ntlmssp.client_flags; + int cflags = conn->ntlmssp.client_flags; memcpy(chgblob->Signature, NTLMSSP_SIGNATURE, 8); chgblob->MessageType = NtLmChallenge; @@ -403,7 +404,7 @@ ksmbd_build_ntlmssp_challenge_blob(struct challenge_message *chgblob, if (cflags & NTLMSSP_REQUEST_TARGET) flags |= NTLMSSP_REQUEST_TARGET; - if (sess->conn->use_spnego && + if (conn->use_spnego && (cflags & NTLMSSP_NEGOTIATE_EXTENDED_SEC)) flags |= NTLMSSP_NEGOTIATE_EXTENDED_SEC; @@ -414,7 +415,7 @@ ksmbd_build_ntlmssp_challenge_blob(struct challenge_message *chgblob, return -ENOMEM; conv_len = smb_strtoUTF16((__le16 *)name, ksmbd_netbios_name(), len, - sess->conn->local_nls); + conn->local_nls); if (conv_len < 0 || conv_len > len) { kfree(name); return -EINVAL; @@ -430,8 +431,8 @@ ksmbd_build_ntlmssp_challenge_blob(struct challenge_message *chgblob, chgblob->TargetName.BufferOffset = cpu_to_le32(blob_off); /* Initialize random conn challenge */ - get_random_bytes(sess->ntlmssp.cryptkey, sizeof(__u64)); - memcpy(chgblob->Challenge, sess->ntlmssp.cryptkey, + get_random_bytes(conn->ntlmssp.cryptkey, sizeof(__u64)); + memcpy(chgblob->Challenge, conn->ntlmssp.cryptkey, CIFS_CRYPTO_KEY_SIZE); /* Add Target Information to security buffer */ diff --git a/fs/ksmbd/auth.h b/fs/ksmbd/auth.h index 9c2d4badd05d..95629651cf26 100644 --- a/fs/ksmbd/auth.h +++ b/fs/ksmbd/auth.h @@ -38,16 +38,16 @@ struct kvec; int ksmbd_crypt_message(struct ksmbd_conn *conn, struct kvec *iov, unsigned int nvec, int enc); void ksmbd_copy_gss_neg_header(void *buf); -int ksmbd_auth_ntlm(struct ksmbd_session *sess, char *pw_buf); int ksmbd_auth_ntlmv2(struct ksmbd_session *sess, struct ntlmv2_resp *ntlmv2, - int blen, char *domain_name); + int blen, char *domain_name, char *cryptkey); int ksmbd_decode_ntlmssp_auth_blob(struct authenticate_message *authblob, - int blob_len, struct ksmbd_session *sess); + int blob_len, struct ksmbd_conn *conn, + struct ksmbd_session *sess); int ksmbd_decode_ntlmssp_neg_blob(struct negotiate_message *negblob, - int blob_len, struct ksmbd_session *sess); + int blob_len, struct ksmbd_conn *conn); unsigned int ksmbd_build_ntlmssp_challenge_blob(struct challenge_message *chgblob, - struct ksmbd_session *sess); + struct ksmbd_conn *conn); int ksmbd_krb5_authenticate(struct ksmbd_session *sess, char *in_blob, int in_len, char *out_blob, int *out_len); int ksmbd_sign_smb2_pdu(struct ksmbd_conn *conn, char *key, struct kvec *iov, diff --git a/fs/ksmbd/connection.c b/fs/ksmbd/connection.c index 83a94d0bb480..208d2cff7bd3 100644 --- a/fs/ksmbd/connection.c +++ b/fs/ksmbd/connection.c @@ -62,6 +62,7 @@ struct ksmbd_conn *ksmbd_conn_alloc(void) atomic_set(&conn->req_running, 0); atomic_set(&conn->r_count, 0); conn->total_credits = 1; + conn->outstanding_credits = 1; init_waitqueue_head(&conn->req_running_q); INIT_LIST_HEAD(&conn->conns_list); @@ -386,17 +387,24 @@ out: static void stop_sessions(void) { struct ksmbd_conn *conn; + struct ksmbd_transport *t; again: read_lock(&conn_list_lock); list_for_each_entry(conn, &conn_list, conns_list) { struct task_struct *task; - task = conn->transport->handler; + t = conn->transport; + task = t->handler; if (task) ksmbd_debug(CONN, "Stop session handler %s/%d\n", task->comm, task_pid_nr(task)); conn->status = KSMBD_SESS_EXITING; + if (t->ops->shutdown) { + read_unlock(&conn_list_lock); + t->ops->shutdown(t); + read_lock(&conn_list_lock); + } } read_unlock(&conn_list_lock); diff --git a/fs/ksmbd/connection.h b/fs/ksmbd/connection.h index e5403c587a58..7a59aacb5daa 100644 --- a/fs/ksmbd/connection.h +++ b/fs/ksmbd/connection.h @@ -61,8 +61,8 @@ struct ksmbd_conn { atomic_t req_running; /* References which are made for this Server object*/ atomic_t r_count; - unsigned short total_credits; - unsigned short max_credits; + unsigned int total_credits; + unsigned int outstanding_credits; spinlock_t credits_lock; wait_queue_head_t req_running_q; /* Lock to protect requests list*/ @@ -72,12 +72,7 @@ struct ksmbd_conn { int connection_type; struct ksmbd_stats stats; char ClientGUID[SMB2_CLIENT_GUID_SIZE]; - union { - /* pending trans request table */ - struct trans_state *recent_trans; - /* Used by ntlmssp */ - char *ntlmssp_cryptkey; - }; + struct ntlmssp_auth ntlmssp; spinlock_t llist_lock; struct list_head lock_list; @@ -122,6 +117,7 @@ struct ksmbd_conn_ops { struct ksmbd_transport_ops { int (*prepare)(struct ksmbd_transport *t); void (*disconnect)(struct ksmbd_transport *t); + void (*shutdown)(struct ksmbd_transport *t); int (*read)(struct ksmbd_transport *t, char *buf, unsigned int size); int (*writev)(struct ksmbd_transport *t, struct kvec *iovs, int niov, int size, bool need_invalidate_rkey, diff --git a/fs/ksmbd/ksmbd_netlink.h b/fs/ksmbd/ksmbd_netlink.h index c6718a05d347..71bfb7de4472 100644 --- a/fs/ksmbd/ksmbd_netlink.h +++ b/fs/ksmbd/ksmbd_netlink.h @@ -103,6 +103,8 @@ struct ksmbd_startup_request { * we set the SPARSE_FILES bit (0x40). */ __u32 sub_auth[3]; /* Subauth value for Security ID */ + __u32 smb2_max_credits; /* MAX credits */ + __u32 reserved[128]; /* Reserved room */ __u32 ifc_list_sz; /* interfaces list size */ __s8 ____payload[]; }; @@ -113,7 +115,7 @@ struct ksmbd_startup_request { * IPC request to shutdown ksmbd server. */ struct ksmbd_shutdown_request { - __s32 reserved; + __s32 reserved[16]; }; /* @@ -122,6 +124,7 @@ struct ksmbd_shutdown_request { struct ksmbd_login_request { __u32 handle; __s8 account[KSMBD_REQ_MAX_ACCOUNT_NAME_SZ]; /* user account name */ + __u32 reserved[16]; /* Reserved room */ }; /* @@ -135,6 +138,7 @@ struct ksmbd_login_response { __u16 status; __u16 hash_sz; /* hash size */ __s8 hash[KSMBD_REQ_MAX_HASH_SZ]; /* password hash */ + __u32 reserved[16]; /* Reserved room */ }; /* @@ -143,6 +147,7 @@ struct ksmbd_login_response { struct ksmbd_share_config_request { __u32 handle; __s8 share_name[KSMBD_REQ_MAX_SHARE_NAME]; /* share name */ + __u32 reserved[16]; /* Reserved room */ }; /* @@ -157,6 +162,7 @@ struct ksmbd_share_config_response { __u16 force_directory_mode; __u16 force_uid; __u16 force_gid; + __u32 reserved[128]; /* Reserved room */ __u32 veto_list_sz; __s8 ____payload[]; }; @@ -187,6 +193,7 @@ struct ksmbd_tree_connect_request { __s8 account[KSMBD_REQ_MAX_ACCOUNT_NAME_SZ]; __s8 share[KSMBD_REQ_MAX_SHARE_NAME]; __s8 peer_addr[64]; + __u32 reserved[16]; /* Reserved room */ }; /* @@ -196,6 +203,7 @@ struct ksmbd_tree_connect_response { __u32 handle; __u16 status; __u16 connection_flags; + __u32 reserved[16]; /* Reserved room */ }; /* @@ -204,6 +212,7 @@ struct ksmbd_tree_connect_response { struct ksmbd_tree_disconnect_request { __u64 session_id; /* session id */ __u64 connect_id; /* tree connection id */ + __u32 reserved[16]; /* Reserved room */ }; /* @@ -212,6 +221,7 @@ struct ksmbd_tree_disconnect_request { struct ksmbd_logout_request { __s8 account[KSMBD_REQ_MAX_ACCOUNT_NAME_SZ]; /* user account name */ __u32 account_flags; + __u32 reserved[16]; /* Reserved room */ }; /* diff --git a/fs/ksmbd/mgmt/user_config.c b/fs/ksmbd/mgmt/user_config.c index 1019d3677d55..279d00feff21 100644 --- a/fs/ksmbd/mgmt/user_config.c +++ b/fs/ksmbd/mgmt/user_config.c @@ -67,3 +67,13 @@ int ksmbd_anonymous_user(struct ksmbd_user *user) return 1; return 0; } + +bool ksmbd_compare_user(struct ksmbd_user *u1, struct ksmbd_user *u2) +{ + if (strcmp(u1->name, u2->name)) + return false; + if (memcmp(u1->passkey, u2->passkey, u1->passkey_sz)) + return false; + + return true; +} diff --git a/fs/ksmbd/mgmt/user_config.h b/fs/ksmbd/mgmt/user_config.h index aff80b029579..6a44109617f1 100644 --- a/fs/ksmbd/mgmt/user_config.h +++ b/fs/ksmbd/mgmt/user_config.h @@ -64,4 +64,5 @@ struct ksmbd_user *ksmbd_login_user(const char *account); struct ksmbd_user *ksmbd_alloc_user(struct ksmbd_login_response *resp); void ksmbd_free_user(struct ksmbd_user *user); int ksmbd_anonymous_user(struct ksmbd_user *user); +bool ksmbd_compare_user(struct ksmbd_user *u1, struct ksmbd_user *u2); #endif /* __USER_CONFIG_MANAGEMENT_H__ */ diff --git a/fs/ksmbd/mgmt/user_session.h b/fs/ksmbd/mgmt/user_session.h index 82289c3cbd2b..e241f16a3851 100644 --- a/fs/ksmbd/mgmt/user_session.h +++ b/fs/ksmbd/mgmt/user_session.h @@ -45,7 +45,6 @@ struct ksmbd_session { int state; __u8 *Preauth_HashValue; - struct ntlmssp_auth ntlmssp; char sess_key[CIFS_KEY_SIZE]; struct hlist_node hlist; diff --git a/fs/ksmbd/smb2misc.c b/fs/ksmbd/smb2misc.c index 50d0b1022289..4a9460153b59 100644 --- a/fs/ksmbd/smb2misc.c +++ b/fs/ksmbd/smb2misc.c @@ -289,7 +289,7 @@ static int smb2_validate_credit_charge(struct ksmbd_conn *conn, unsigned int req_len = 0, expect_resp_len = 0, calc_credit_num, max_len; unsigned short credit_charge = le16_to_cpu(hdr->CreditCharge); void *__hdr = hdr; - int ret; + int ret = 0; switch (hdr->Command) { case SMB2_QUERY_INFO: @@ -326,21 +326,27 @@ static int smb2_validate_credit_charge(struct ksmbd_conn *conn, ksmbd_debug(SMB, "Insufficient credit charge, given: %d, needed: %d\n", credit_charge, calc_credit_num); return 1; - } else if (credit_charge > conn->max_credits) { + } else if (credit_charge > conn->vals->max_credits) { ksmbd_debug(SMB, "Too large credit charge: %d\n", credit_charge); return 1; } spin_lock(&conn->credits_lock); - if (credit_charge <= conn->total_credits) { - conn->total_credits -= credit_charge; - ret = 0; - } else { + if (credit_charge > conn->total_credits) { ksmbd_debug(SMB, "Insufficient credits granted, given: %u, granted: %u\n", credit_charge, conn->total_credits); ret = 1; } + + if ((u64)conn->outstanding_credits + credit_charge > conn->vals->max_credits) { + ksmbd_debug(SMB, "Limits exceeding the maximum allowable outstanding requests, given : %u, pending : %u\n", + credit_charge, conn->outstanding_credits); + ret = 1; + } else + conn->outstanding_credits += credit_charge; + spin_unlock(&conn->credits_lock); + return ret; } diff --git a/fs/ksmbd/smb2ops.c b/fs/ksmbd/smb2ops.c index 02a44d28bdaf..ab23da2120b9 100644 --- a/fs/ksmbd/smb2ops.c +++ b/fs/ksmbd/smb2ops.c @@ -19,6 +19,7 @@ static struct smb_version_values smb21_server_values = { .max_read_size = SMB21_DEFAULT_IOSIZE, .max_write_size = SMB21_DEFAULT_IOSIZE, .max_trans_size = SMB21_DEFAULT_IOSIZE, + .max_credits = SMB2_MAX_CREDITS, .large_lock_type = 0, .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE, .shared_lock_type = SMB2_LOCKFLAG_SHARED, @@ -44,6 +45,7 @@ static struct smb_version_values smb30_server_values = { .max_read_size = SMB3_DEFAULT_IOSIZE, .max_write_size = SMB3_DEFAULT_IOSIZE, .max_trans_size = SMB3_DEFAULT_TRANS_SIZE, + .max_credits = SMB2_MAX_CREDITS, .large_lock_type = 0, .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE, .shared_lock_type = SMB2_LOCKFLAG_SHARED, @@ -70,6 +72,7 @@ static struct smb_version_values smb302_server_values = { .max_read_size = SMB3_DEFAULT_IOSIZE, .max_write_size = SMB3_DEFAULT_IOSIZE, .max_trans_size = SMB3_DEFAULT_TRANS_SIZE, + .max_credits = SMB2_MAX_CREDITS, .large_lock_type = 0, .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE, .shared_lock_type = SMB2_LOCKFLAG_SHARED, @@ -96,6 +99,7 @@ static struct smb_version_values smb311_server_values = { .max_read_size = SMB3_DEFAULT_IOSIZE, .max_write_size = SMB3_DEFAULT_IOSIZE, .max_trans_size = SMB3_DEFAULT_TRANS_SIZE, + .max_credits = SMB2_MAX_CREDITS, .large_lock_type = 0, .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE, .shared_lock_type = SMB2_LOCKFLAG_SHARED, @@ -197,7 +201,6 @@ void init_smb2_1_server(struct ksmbd_conn *conn) conn->ops = &smb2_0_server_ops; conn->cmds = smb2_0_server_cmds; conn->max_cmds = ARRAY_SIZE(smb2_0_server_cmds); - conn->max_credits = SMB2_MAX_CREDITS; conn->signing_algorithm = SIGNING_ALG_HMAC_SHA256_LE; if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_LEASES) @@ -215,7 +218,6 @@ void init_smb3_0_server(struct ksmbd_conn *conn) conn->ops = &smb3_0_server_ops; conn->cmds = smb2_0_server_cmds; conn->max_cmds = ARRAY_SIZE(smb2_0_server_cmds); - conn->max_credits = SMB2_MAX_CREDITS; conn->signing_algorithm = SIGNING_ALG_AES_CMAC_LE; if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_LEASES) @@ -240,7 +242,6 @@ void init_smb3_02_server(struct ksmbd_conn *conn) conn->ops = &smb3_0_server_ops; conn->cmds = smb2_0_server_cmds; conn->max_cmds = ARRAY_SIZE(smb2_0_server_cmds); - conn->max_credits = SMB2_MAX_CREDITS; conn->signing_algorithm = SIGNING_ALG_AES_CMAC_LE; if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_LEASES) @@ -265,7 +266,6 @@ int init_smb3_11_server(struct ksmbd_conn *conn) conn->ops = &smb3_11_server_ops; conn->cmds = smb2_0_server_cmds; conn->max_cmds = ARRAY_SIZE(smb2_0_server_cmds); - conn->max_credits = SMB2_MAX_CREDITS; conn->signing_algorithm = SIGNING_ALG_AES_CMAC_LE; if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_LEASES) @@ -304,3 +304,11 @@ void init_smb2_max_trans_size(unsigned int sz) smb302_server_values.max_trans_size = sz; smb311_server_values.max_trans_size = sz; } + +void init_smb2_max_credits(unsigned int sz) +{ + smb21_server_values.max_credits = sz; + smb30_server_values.max_credits = sz; + smb302_server_values.max_credits = sz; + smb311_server_values.max_credits = sz; +} diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c index b8b3a4c28b74..1866c81c5c99 100644 --- a/fs/ksmbd/smb2pdu.c +++ b/fs/ksmbd/smb2pdu.c @@ -299,16 +299,15 @@ int smb2_set_rsp_credits(struct ksmbd_work *work) struct smb2_hdr *req_hdr = ksmbd_req_buf_next(work); struct smb2_hdr *hdr = ksmbd_resp_buf_next(work); struct ksmbd_conn *conn = work->conn; - unsigned short credits_requested; + unsigned short credits_requested, aux_max; unsigned short credit_charge, credits_granted = 0; - unsigned short aux_max, aux_credits; if (work->send_no_response) return 0; hdr->CreditCharge = req_hdr->CreditCharge; - if (conn->total_credits > conn->max_credits) { + if (conn->total_credits > conn->vals->max_credits) { hdr->CreditRequest = 0; pr_err("Total credits overflow: %d\n", conn->total_credits); return -EINVAL; @@ -316,6 +315,14 @@ int smb2_set_rsp_credits(struct ksmbd_work *work) credit_charge = max_t(unsigned short, le16_to_cpu(req_hdr->CreditCharge), 1); + if (credit_charge > conn->total_credits) { + ksmbd_debug(SMB, "Insufficient credits granted, given: %u, granted: %u\n", + credit_charge, conn->total_credits); + return -EINVAL; + } + + conn->total_credits -= credit_charge; + conn->outstanding_credits -= credit_charge; credits_requested = max_t(unsigned short, le16_to_cpu(req_hdr->CreditRequest), 1); @@ -325,16 +332,14 @@ int smb2_set_rsp_credits(struct ksmbd_work *work) * TODO: Need to adjuct CreditRequest value according to * current cpu load */ - aux_credits = credits_requested - 1; if (hdr->Command == SMB2_NEGOTIATE) - aux_max = 0; + aux_max = 1; else - aux_max = conn->max_credits - credit_charge; - aux_credits = min_t(unsigned short, aux_credits, aux_max); - credits_granted = credit_charge + aux_credits; + aux_max = conn->vals->max_credits - credit_charge; + credits_granted = min_t(unsigned short, credits_requested, aux_max); - if (conn->max_credits - conn->total_credits < credits_granted) - credits_granted = conn->max_credits - + if (conn->vals->max_credits - conn->total_credits < credits_granted) + credits_granted = conn->vals->max_credits - conn->total_credits; conn->total_credits += credits_granted; @@ -610,16 +615,14 @@ static void destroy_previous_session(struct ksmbd_user *user, u64 id) /** * smb2_get_name() - get filename string from on the wire smb format - * @share: ksmbd_share_config pointer * @src: source buffer * @maxlen: maxlen of source string - * @nls_table: nls_table pointer + * @local_nls: nls_table pointer * * Return: matching converted filename on success, otherwise error ptr */ static char * -smb2_get_name(struct ksmbd_share_config *share, const char *src, - const int maxlen, struct nls_table *local_nls) +smb2_get_name(const char *src, const int maxlen, struct nls_table *local_nls) { char *name; @@ -1303,7 +1306,7 @@ static int ntlm_negotiate(struct ksmbd_work *work, int sz, rc; ksmbd_debug(SMB, "negotiate phase\n"); - rc = ksmbd_decode_ntlmssp_neg_blob(negblob, negblob_len, work->sess); + rc = ksmbd_decode_ntlmssp_neg_blob(negblob, negblob_len, work->conn); if (rc) return rc; @@ -1313,7 +1316,7 @@ static int ntlm_negotiate(struct ksmbd_work *work, memset(chgblob, 0, sizeof(struct challenge_message)); if (!work->conn->use_spnego) { - sz = ksmbd_build_ntlmssp_challenge_blob(chgblob, work->sess); + sz = ksmbd_build_ntlmssp_challenge_blob(chgblob, work->conn); if (sz < 0) return -ENOMEM; @@ -1329,7 +1332,7 @@ static int ntlm_negotiate(struct ksmbd_work *work, return -ENOMEM; chgblob = (struct challenge_message *)neg_blob; - sz = ksmbd_build_ntlmssp_challenge_blob(chgblob, work->sess); + sz = ksmbd_build_ntlmssp_challenge_blob(chgblob, work->conn); if (sz < 0) { rc = -ENOMEM; goto out; @@ -1450,60 +1453,62 @@ static int ntlm_authenticate(struct ksmbd_work *work) ksmbd_free_user(user); return 0; } - ksmbd_free_user(sess->user); - } - sess->user = user; - if (user_guest(sess->user)) { - if (conn->sign) { - ksmbd_debug(SMB, "Guest login not allowed when signing enabled\n"); + if (!ksmbd_compare_user(sess->user, user)) { + ksmbd_free_user(user); return -EPERM; } + ksmbd_free_user(user); + } else { + sess->user = user; + } + if (user_guest(sess->user)) { rsp->SessionFlags = SMB2_SESSION_FLAG_IS_GUEST_LE; } else { struct authenticate_message *authblob; authblob = user_authblob(conn, req); sz = le16_to_cpu(req->SecurityBufferLength); - rc = ksmbd_decode_ntlmssp_auth_blob(authblob, sz, sess); + rc = ksmbd_decode_ntlmssp_auth_blob(authblob, sz, conn, sess); if (rc) { set_user_flag(sess->user, KSMBD_USER_FLAG_BAD_PASSWORD); ksmbd_debug(SMB, "authentication failed\n"); return -EPERM; } + } + /* + * If session state is SMB2_SESSION_VALID, We can assume + * that it is reauthentication. And the user/password + * has been verified, so return it here. + */ + if (sess->state == SMB2_SESSION_VALID) { + if (conn->binding) + goto binding_session; + return 0; + } + + if ((rsp->SessionFlags != SMB2_SESSION_FLAG_IS_GUEST_LE && + (conn->sign || server_conf.enforced_signing)) || + (req->SecurityMode & SMB2_NEGOTIATE_SIGNING_REQUIRED)) + sess->sign = true; + + if (smb3_encryption_negotiated(conn) && + !(req->Flags & SMB2_SESSION_REQ_FLAG_BINDING)) { + rc = conn->ops->generate_encryptionkey(sess); + if (rc) { + ksmbd_debug(SMB, + "SMB3 encryption key generation failed\n"); + return -EINVAL; + } + sess->enc = true; + rsp->SessionFlags = SMB2_SESSION_FLAG_ENCRYPT_DATA_LE; /* - * If session state is SMB2_SESSION_VALID, We can assume - * that it is reauthentication. And the user/password - * has been verified, so return it here. + * signing is disable if encryption is enable + * on this session */ - if (sess->state == SMB2_SESSION_VALID) { - if (conn->binding) - goto binding_session; - return 0; - } - - if ((conn->sign || server_conf.enforced_signing) || - (req->SecurityMode & SMB2_NEGOTIATE_SIGNING_REQUIRED)) - sess->sign = true; - - if (smb3_encryption_negotiated(conn) && - !(req->Flags & SMB2_SESSION_REQ_FLAG_BINDING)) { - rc = conn->ops->generate_encryptionkey(sess); - if (rc) { - ksmbd_debug(SMB, - "SMB3 encryption key generation failed\n"); - return -EINVAL; - } - sess->enc = true; - rsp->SessionFlags = SMB2_SESSION_FLAG_ENCRYPT_DATA_LE; - /* - * signing is disable if encryption is enable - * on this session - */ - sess->sign = false; - } + sess->sign = false; } binding_session: @@ -2057,9 +2062,6 @@ int smb2_session_logoff(struct ksmbd_work *work) ksmbd_debug(SMB, "request\n"); - /* Got a valid session, set connection state */ - WARN_ON(sess->conn != conn); - /* setting CifsExiting here may race with start_tcp_sess */ ksmbd_conn_set_need_reconnect(work); ksmbd_close_session_fds(work); @@ -2530,8 +2532,7 @@ int smb2_open(struct ksmbd_work *work) goto err_out1; } - name = smb2_get_name(share, - req->Buffer, + name = smb2_get_name(req->Buffer, le16_to_cpu(req->NameLength), work->conn->local_nls); if (IS_ERR(name)) { @@ -3392,7 +3393,6 @@ static int dentry_name(struct ksmbd_dir_info *d_info, int info_level) * @conn: connection instance * @info_level: smb information level * @d_info: structure included variables for query dir - * @user_ns: user namespace * @ksmbd_kstat: ksmbd wrapper of dirent stat information * * if directory has many entries, find first can't read it fully. @@ -4018,6 +4018,7 @@ err_out2: * buffer_check_err() - helper function to check buffer errors * @reqOutputBufferLength: max buffer length expected in command response * @rsp: query info response buffer contains output buffer length + * @rsp_org: base response buffer pointer in case of chained response * @infoclass_size: query info class response buffer size * * Return: 0 on success, otherwise error @@ -5398,8 +5399,7 @@ static int smb2_rename(struct ksmbd_work *work, goto out; } - new_name = smb2_get_name(share, - file_info->FileName, + new_name = smb2_get_name(file_info->FileName, le32_to_cpu(file_info->FileNameLength), local_nls); if (IS_ERR(new_name)) { @@ -5510,8 +5510,7 @@ static int smb2_create_link(struct ksmbd_work *work, if (!pathname) return -ENOMEM; - link_name = smb2_get_name(share, - file_info->FileName, + link_name = smb2_get_name(file_info->FileName, le32_to_cpu(file_info->FileNameLength), local_nls); if (IS_ERR(link_name) || S_ISDIR(file_inode(filp)->i_mode)) { @@ -5849,7 +5848,7 @@ static int set_file_mode_info(struct ksmbd_file *fp, * smb2_set_info_file() - handler for smb2 set info command * @work: smb work containing set info command buffer * @fp: ksmbd_file pointer - * @info_class: smb2 set info class + * @req: request buffer pointer * @share: ksmbd_share_config pointer * * Return: 0 on success, otherwise error @@ -6121,6 +6120,26 @@ out: return err; } +static int smb2_set_remote_key_for_rdma(struct ksmbd_work *work, + struct smb2_buffer_desc_v1 *desc, + __le32 Channel, + __le16 ChannelInfoOffset, + __le16 ChannelInfoLength) +{ + if (work->conn->dialect == SMB30_PROT_ID && + Channel != SMB2_CHANNEL_RDMA_V1) + return -EINVAL; + + if (ChannelInfoOffset == 0 || + le16_to_cpu(ChannelInfoLength) < sizeof(*desc)) + return -EINVAL; + + work->need_invalidate_rkey = + (Channel == SMB2_CHANNEL_RDMA_V1_INVALIDATE); + work->remote_key = le32_to_cpu(desc->token); + return 0; +} + static ssize_t smb2_read_rdma_channel(struct ksmbd_work *work, struct smb2_read_req *req, void *data_buf, size_t length) @@ -6129,18 +6148,6 @@ static ssize_t smb2_read_rdma_channel(struct ksmbd_work *work, (struct smb2_buffer_desc_v1 *)&req->Buffer[0]; int err; - if (work->conn->dialect == SMB30_PROT_ID && - req->Channel != SMB2_CHANNEL_RDMA_V1) - return -EINVAL; - - if (req->ReadChannelInfoOffset == 0 || - le16_to_cpu(req->ReadChannelInfoLength) < sizeof(*desc)) - return -EINVAL; - - work->need_invalidate_rkey = - (req->Channel == SMB2_CHANNEL_RDMA_V1_INVALIDATE); - work->remote_key = le32_to_cpu(desc->token); - err = ksmbd_conn_rdma_write(work->conn, data_buf, length, le32_to_cpu(desc->token), le64_to_cpu(desc->offset), @@ -6162,7 +6169,7 @@ int smb2_read(struct ksmbd_work *work) struct ksmbd_conn *conn = work->conn; struct smb2_read_req *req; struct smb2_read_rsp *rsp; - struct ksmbd_file *fp; + struct ksmbd_file *fp = NULL; loff_t offset; size_t length, mincount; ssize_t nbytes = 0, remain_bytes = 0; @@ -6176,6 +6183,18 @@ int smb2_read(struct ksmbd_work *work) return smb2_read_pipe(work); } + if (req->Channel == SMB2_CHANNEL_RDMA_V1_INVALIDATE || + req->Channel == SMB2_CHANNEL_RDMA_V1) { + err = smb2_set_remote_key_for_rdma(work, + (struct smb2_buffer_desc_v1 *) + &req->Buffer[0], + req->Channel, + req->ReadChannelInfoOffset, + req->ReadChannelInfoLength); + if (err) + goto out; + } + fp = ksmbd_lookup_fd_slow(work, le64_to_cpu(req->VolatileFileId), le64_to_cpu(req->PersistentFileId)); if (!fp) { @@ -6361,21 +6380,6 @@ static ssize_t smb2_write_rdma_channel(struct ksmbd_work *work, desc = (struct smb2_buffer_desc_v1 *)&req->Buffer[0]; - if (work->conn->dialect == SMB30_PROT_ID && - req->Channel != SMB2_CHANNEL_RDMA_V1) - return -EINVAL; - - if (req->Length != 0 || req->DataOffset != 0) - return -EINVAL; - - if (req->WriteChannelInfoOffset == 0 || - le16_to_cpu(req->WriteChannelInfoLength) < sizeof(*desc)) - return -EINVAL; - - work->need_invalidate_rkey = - (req->Channel == SMB2_CHANNEL_RDMA_V1_INVALIDATE); - work->remote_key = le32_to_cpu(desc->token); - data_buf = kvmalloc(length, GFP_KERNEL | __GFP_ZERO); if (!data_buf) return -ENOMEM; @@ -6422,6 +6426,20 @@ int smb2_write(struct ksmbd_work *work) return smb2_write_pipe(work); } + if (req->Channel == SMB2_CHANNEL_RDMA_V1 || + req->Channel == SMB2_CHANNEL_RDMA_V1_INVALIDATE) { + if (req->Length != 0 || req->DataOffset != 0) + return -EINVAL; + err = smb2_set_remote_key_for_rdma(work, + (struct smb2_buffer_desc_v1 *) + &req->Buffer[0], + req->Channel, + req->WriteChannelInfoOffset, + req->WriteChannelInfoLength); + if (err) + goto out; + } + if (!test_tree_conn_flag(work->tcon, KSMBD_TREE_CONN_FLAG_WRITABLE)) { ksmbd_debug(SMB, "User does not have write permission\n"); err = -EACCES; @@ -7243,15 +7261,10 @@ static int fsctl_query_iface_info_ioctl(struct ksmbd_conn *conn, struct sockaddr_storage_rsp *sockaddr_storage; unsigned int flags; unsigned long long speed; - struct sockaddr_in6 *csin6 = (struct sockaddr_in6 *)&conn->peer_addr; rtnl_lock(); for_each_netdev(&init_net, netdev) { - if (out_buf_len < - nbytes + sizeof(struct network_interface_info_ioctl_rsp)) { - rtnl_unlock(); - return -ENOSPC; - } + bool ipv4_set = false; if (netdev->type == ARPHRD_LOOPBACK) continue; @@ -7259,12 +7272,20 @@ static int fsctl_query_iface_info_ioctl(struct ksmbd_conn *conn, flags = dev_get_flags(netdev); if (!(flags & IFF_RUNNING)) continue; +ipv6_retry: + if (out_buf_len < + nbytes + sizeof(struct network_interface_info_ioctl_rsp)) { + rtnl_unlock(); + return -ENOSPC; + } nii_rsp = (struct network_interface_info_ioctl_rsp *) &rsp->Buffer[nbytes]; nii_rsp->IfIndex = cpu_to_le32(netdev->ifindex); nii_rsp->Capability = 0; + if (netdev->real_num_tx_queues > 1) + nii_rsp->Capability |= cpu_to_le32(RSS_CAPABLE); if (ksmbd_rdma_capable_netdev(netdev)) nii_rsp->Capability |= cpu_to_le32(RDMA_CAPABLE); @@ -7289,8 +7310,7 @@ static int fsctl_query_iface_info_ioctl(struct ksmbd_conn *conn, nii_rsp->SockAddr_Storage; memset(sockaddr_storage, 0, 128); - if (conn->peer_addr.ss_family == PF_INET || - ipv6_addr_v4mapped(&csin6->sin6_addr)) { + if (!ipv4_set) { struct in_device *idev; sockaddr_storage->Family = cpu_to_le16(INTERNETWORK); @@ -7301,6 +7321,9 @@ static int fsctl_query_iface_info_ioctl(struct ksmbd_conn *conn, continue; sockaddr_storage->addr4.IPv4address = idev_ipv4_address(idev); + nbytes += sizeof(struct network_interface_info_ioctl_rsp); + ipv4_set = true; + goto ipv6_retry; } else { struct inet6_dev *idev6; struct inet6_ifaddr *ifa; @@ -7322,9 +7345,8 @@ static int fsctl_query_iface_info_ioctl(struct ksmbd_conn *conn, break; } sockaddr_storage->addr6.ScopeId = 0; + nbytes += sizeof(struct network_interface_info_ioctl_rsp); } - - nbytes += sizeof(struct network_interface_info_ioctl_rsp); } rtnl_unlock(); diff --git a/fs/ksmbd/smb2pdu.h b/fs/ksmbd/smb2pdu.h index 4a3e4339d4c4..725b800c29c8 100644 --- a/fs/ksmbd/smb2pdu.h +++ b/fs/ksmbd/smb2pdu.h @@ -980,6 +980,7 @@ int init_smb3_11_server(struct ksmbd_conn *conn); void init_smb2_max_read_size(unsigned int sz); void init_smb2_max_write_size(unsigned int sz); void init_smb2_max_trans_size(unsigned int sz); +void init_smb2_max_credits(unsigned int sz); bool is_smb2_neg_cmd(struct ksmbd_work *work); bool is_smb2_rsp(struct ksmbd_work *work); diff --git a/fs/ksmbd/smb_common.h b/fs/ksmbd/smb_common.h index 50590842b651..e1369b4345a9 100644 --- a/fs/ksmbd/smb_common.h +++ b/fs/ksmbd/smb_common.h @@ -365,6 +365,7 @@ struct smb_version_values { __u32 max_read_size; __u32 max_write_size; __u32 max_trans_size; + __u32 max_credits; __u32 large_lock_type; __u32 exclusive_lock_type; __u32 shared_lock_type; diff --git a/fs/ksmbd/transport_ipc.c b/fs/ksmbd/transport_ipc.c index 1acf1892a466..3ad6881e0f7e 100644 --- a/fs/ksmbd/transport_ipc.c +++ b/fs/ksmbd/transport_ipc.c @@ -301,6 +301,8 @@ static int ipc_server_config_on_startup(struct ksmbd_startup_request *req) init_smb2_max_write_size(req->smb2_max_write); if (req->smb2_max_trans) init_smb2_max_trans_size(req->smb2_max_trans); + if (req->smb2_max_credits) + init_smb2_max_credits(req->smb2_max_credits); ret = ksmbd_set_netbios_name(req->netbios_name); ret |= ksmbd_set_server_string(req->server_string); diff --git a/fs/ksmbd/transport_rdma.c b/fs/ksmbd/transport_rdma.c index 7e57cbb0bb35..3c1ec1ac0b27 100644 --- a/fs/ksmbd/transport_rdma.c +++ b/fs/ksmbd/transport_rdma.c @@ -34,7 +34,8 @@ #include "smbstatus.h" #include "transport_rdma.h" -#define SMB_DIRECT_PORT 5445 +#define SMB_DIRECT_PORT_IWARP 5445 +#define SMB_DIRECT_PORT_INFINIBAND 445 #define SMB_DIRECT_VERSION_LE cpu_to_le16(0x0100) @@ -60,6 +61,10 @@ * as defined in [MS-SMBD] 3.1.1.1 * Those may change after a SMB_DIRECT negotiation */ + +/* Set 445 port to SMB Direct port by default */ +static int smb_direct_port = SMB_DIRECT_PORT_INFINIBAND; + /* The local peer's maximum number of credits to grant to the peer */ static int smb_direct_receive_credit_max = 255; @@ -75,10 +80,18 @@ static int smb_direct_max_fragmented_recv_size = 1024 * 1024; /* The maximum single-message size which can be received */ static int smb_direct_max_receive_size = 8192; -static int smb_direct_max_read_write_size = 1024 * 1024; +static int smb_direct_max_read_write_size = 1048512; static int smb_direct_max_outstanding_rw_ops = 8; +static LIST_HEAD(smb_direct_device_list); +static DEFINE_RWLOCK(smb_direct_device_lock); + +struct smb_direct_device { + struct ib_device *ib_dev; + struct list_head list; +}; + static struct smb_direct_listener { struct rdma_cm_id *cm_id; } smb_direct_listener; @@ -415,6 +428,7 @@ static void free_transport(struct smb_direct_transport *t) if (t->qp) { ib_drain_qp(t->qp); + ib_mr_pool_destroy(t->qp, &t->qp->rdma_mrs); ib_destroy_qp(t->qp); } @@ -555,6 +569,7 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) } t->negotiation_requested = true; t->full_packet_received = true; + enqueue_reassembly(t, recvmsg, 0); wake_up_interruptible(&t->wait_status); break; case SMB_DIRECT_MSG_DATA_TRANSFER: { @@ -1438,6 +1453,15 @@ static void smb_direct_disconnect(struct ksmbd_transport *t) free_transport(st); } +static void smb_direct_shutdown(struct ksmbd_transport *t) +{ + struct smb_direct_transport *st = smb_trans_direct_transfort(t); + + ksmbd_debug(RDMA, "smb-direct shutdown cm_id=%p\n", st->cm_id); + + smb_direct_disconnect_rdma_work(&st->disconnect_work); +} + static int smb_direct_cm_handler(struct rdma_cm_id *cm_id, struct rdma_cm_event *event) { @@ -1581,19 +1605,13 @@ static int smb_direct_accept_client(struct smb_direct_transport *t) pr_err("error at rdma_accept: %d\n", ret); return ret; } - - wait_event_interruptible(t->wait_status, - t->status != SMB_DIRECT_CS_NEW); - if (t->status != SMB_DIRECT_CS_CONNECTED) - return -ENOTCONN; return 0; } -static int smb_direct_negotiate(struct smb_direct_transport *t) +static int smb_direct_prepare_negotiation(struct smb_direct_transport *t) { int ret; struct smb_direct_recvmsg *recvmsg; - struct smb_direct_negotiate_req *req; recvmsg = get_free_recvmsg(t); if (!recvmsg) @@ -1603,44 +1621,20 @@ static int smb_direct_negotiate(struct smb_direct_transport *t) ret = smb_direct_post_recv(t, recvmsg); if (ret) { pr_err("Can't post recv: %d\n", ret); - goto out; + goto out_err; } t->negotiation_requested = false; ret = smb_direct_accept_client(t); if (ret) { pr_err("Can't accept client\n"); - goto out; + goto out_err; } smb_direct_post_recv_credits(&t->post_recv_credits_work.work); - - ksmbd_debug(RDMA, "Waiting for SMB_DIRECT negotiate request\n"); - ret = wait_event_interruptible_timeout(t->wait_status, - t->negotiation_requested || - t->status == SMB_DIRECT_CS_DISCONNECTED, - SMB_DIRECT_NEGOTIATE_TIMEOUT * HZ); - if (ret <= 0 || t->status == SMB_DIRECT_CS_DISCONNECTED) { - ret = ret < 0 ? ret : -ETIMEDOUT; - goto out; - } - - ret = smb_direct_check_recvmsg(recvmsg); - if (ret == -ECONNABORTED) - goto out; - - req = (struct smb_direct_negotiate_req *)recvmsg->packet; - t->max_recv_size = min_t(int, t->max_recv_size, - le32_to_cpu(req->preferred_send_size)); - t->max_send_size = min_t(int, t->max_send_size, - le32_to_cpu(req->max_receive_size)); - t->max_fragmented_send_size = - le32_to_cpu(req->max_fragmented_size); - - ret = smb_direct_send_negotiate_response(t, ret); -out: - if (recvmsg) - put_recvmsg(t, recvmsg); + return 0; +out_err: + put_recvmsg(t, recvmsg); return ret; } @@ -1724,7 +1718,9 @@ static int smb_direct_init_params(struct smb_direct_transport *t, cap->max_send_sge = SMB_DIRECT_MAX_SEND_SGES; cap->max_recv_sge = SMB_DIRECT_MAX_RECV_SGES; cap->max_inline_data = 0; - cap->max_rdma_ctxs = 0; + cap->max_rdma_ctxs = + rdma_rw_mr_factor(device, t->cm_id->port_num, max_pages) * + smb_direct_max_outstanding_rw_ops; return 0; } @@ -1806,6 +1802,7 @@ static int smb_direct_create_qpair(struct smb_direct_transport *t, { int ret; struct ib_qp_init_attr qp_attr; + int pages_per_rw; t->pd = ib_alloc_pd(t->cm_id->device, 0); if (IS_ERR(t->pd)) { @@ -1853,6 +1850,23 @@ static int smb_direct_create_qpair(struct smb_direct_transport *t, t->qp = t->cm_id->qp; t->cm_id->event_handler = smb_direct_cm_handler; + pages_per_rw = DIV_ROUND_UP(t->max_rdma_rw_size, PAGE_SIZE) + 1; + if (pages_per_rw > t->cm_id->device->attrs.max_sgl_rd) { + int pages_per_mr, mr_count; + + pages_per_mr = min_t(int, pages_per_rw, + t->cm_id->device->attrs.max_fast_reg_page_list_len); + mr_count = DIV_ROUND_UP(pages_per_rw, pages_per_mr) * + atomic_read(&t->rw_avail_ops); + ret = ib_mr_pool_init(t->qp, &t->qp->rdma_mrs, mr_count, + IB_MR_TYPE_MEM_REG, pages_per_mr, 0); + if (ret) { + pr_err("failed to init mr pool count %d pages %d\n", + mr_count, pages_per_mr); + goto err; + } + } + return 0; err: if (t->qp) { @@ -1877,6 +1891,49 @@ err: static int smb_direct_prepare(struct ksmbd_transport *t) { struct smb_direct_transport *st = smb_trans_direct_transfort(t); + struct smb_direct_recvmsg *recvmsg; + struct smb_direct_negotiate_req *req; + int ret; + + ksmbd_debug(RDMA, "Waiting for SMB_DIRECT negotiate request\n"); + ret = wait_event_interruptible_timeout(st->wait_status, + st->negotiation_requested || + st->status == SMB_DIRECT_CS_DISCONNECTED, + SMB_DIRECT_NEGOTIATE_TIMEOUT * HZ); + if (ret <= 0 || st->status == SMB_DIRECT_CS_DISCONNECTED) + return ret < 0 ? ret : -ETIMEDOUT; + + recvmsg = get_first_reassembly(st); + if (!recvmsg) + return -ECONNABORTED; + + ret = smb_direct_check_recvmsg(recvmsg); + if (ret == -ECONNABORTED) + goto out; + + req = (struct smb_direct_negotiate_req *)recvmsg->packet; + st->max_recv_size = min_t(int, st->max_recv_size, + le32_to_cpu(req->preferred_send_size)); + st->max_send_size = min_t(int, st->max_send_size, + le32_to_cpu(req->max_receive_size)); + st->max_fragmented_send_size = + le32_to_cpu(req->max_fragmented_size); + st->max_fragmented_recv_size = + (st->recv_credit_max * st->max_recv_size) / 2; + + ret = smb_direct_send_negotiate_response(st, ret); +out: + spin_lock_irq(&st->reassembly_queue_lock); + st->reassembly_queue_length--; + list_del(&recvmsg->list); + spin_unlock_irq(&st->reassembly_queue_lock); + put_recvmsg(st, recvmsg); + + return ret; +} + +static int smb_direct_connect(struct smb_direct_transport *st) +{ int ret; struct ib_qp_cap qp_cap; @@ -1898,13 +1955,11 @@ static int smb_direct_prepare(struct ksmbd_transport *t) return ret; } - ret = smb_direct_negotiate(st); + ret = smb_direct_prepare_negotiation(st); if (ret) { pr_err("Can't negotiate: %d\n", ret); return ret; } - - st->status = SMB_DIRECT_CS_CONNECTED; return 0; } @@ -1920,6 +1975,7 @@ static bool rdma_frwr_is_supported(struct ib_device_attr *attrs) static int smb_direct_handle_connect_request(struct rdma_cm_id *new_cm_id) { struct smb_direct_transport *t; + int ret; if (!rdma_frwr_is_supported(&new_cm_id->device->attrs)) { ksmbd_debug(RDMA, @@ -1932,18 +1988,23 @@ static int smb_direct_handle_connect_request(struct rdma_cm_id *new_cm_id) if (!t) return -ENOMEM; + ret = smb_direct_connect(t); + if (ret) + goto out_err; + KSMBD_TRANS(t)->handler = kthread_run(ksmbd_conn_handler_loop, KSMBD_TRANS(t)->conn, "ksmbd:r%u", - SMB_DIRECT_PORT); + smb_direct_port); if (IS_ERR(KSMBD_TRANS(t)->handler)) { - int ret = PTR_ERR(KSMBD_TRANS(t)->handler); - + ret = PTR_ERR(KSMBD_TRANS(t)->handler); pr_err("Can't start thread\n"); - free_transport(t); - return ret; + goto out_err; } return 0; +out_err: + free_transport(t); + return ret; } static int smb_direct_listen_handler(struct rdma_cm_id *cm_id, @@ -2007,12 +2068,65 @@ err: return ret; } +static int smb_direct_ib_client_add(struct ib_device *ib_dev) +{ + struct smb_direct_device *smb_dev; + + /* Set 5445 port if device type is iWARP(No IB) */ + if (ib_dev->node_type != RDMA_NODE_IB_CA) + smb_direct_port = SMB_DIRECT_PORT_IWARP; + + if (!ib_dev->ops.get_netdev || + !rdma_frwr_is_supported(&ib_dev->attrs)) + return 0; + + smb_dev = kzalloc(sizeof(*smb_dev), GFP_KERNEL); + if (!smb_dev) + return -ENOMEM; + smb_dev->ib_dev = ib_dev; + + write_lock(&smb_direct_device_lock); + list_add(&smb_dev->list, &smb_direct_device_list); + write_unlock(&smb_direct_device_lock); + + ksmbd_debug(RDMA, "ib device added: name %s\n", ib_dev->name); + return 0; +} + +static void smb_direct_ib_client_remove(struct ib_device *ib_dev, + void *client_data) +{ + struct smb_direct_device *smb_dev, *tmp; + + write_lock(&smb_direct_device_lock); + list_for_each_entry_safe(smb_dev, tmp, &smb_direct_device_list, list) { + if (smb_dev->ib_dev == ib_dev) { + list_del(&smb_dev->list); + kfree(smb_dev); + break; + } + } + write_unlock(&smb_direct_device_lock); +} + +static struct ib_client smb_direct_ib_client = { + .name = "ksmbd_smb_direct_ib", + .add = smb_direct_ib_client_add, + .remove = smb_direct_ib_client_remove, +}; + int ksmbd_rdma_init(void) { int ret; smb_direct_listener.cm_id = NULL; + ret = ib_register_client(&smb_direct_ib_client); + if (ret) { + pr_err("failed to ib_register_client\n"); + return ret; + } + /* When a client is running out of send credits, the credits are * granted by the server's sending a packet using this queue. * This avoids the situation that a clients cannot send packets @@ -2023,7 +2137,7 @@ int ksmbd_rdma_init(void) if (!smb_direct_wq) return -ENOMEM; - ret = smb_direct_listen(SMB_DIRECT_PORT); + ret = smb_direct_listen(smb_direct_port); if (ret) { destroy_workqueue(smb_direct_wq); smb_direct_wq = NULL; @@ -2036,36 +2150,67 @@ int ksmbd_rdma_init(void) return 0; } -int ksmbd_rdma_destroy(void) +void ksmbd_rdma_destroy(void) { - if (smb_direct_listener.cm_id) - rdma_destroy_id(smb_direct_listener.cm_id); + if (!smb_direct_listener.cm_id) + return; + + ib_unregister_client(&smb_direct_ib_client); + rdma_destroy_id(smb_direct_listener.cm_id); + smb_direct_listener.cm_id = NULL; if (smb_direct_wq) { destroy_workqueue(smb_direct_wq); smb_direct_wq = NULL; } - return 0; } bool ksmbd_rdma_capable_netdev(struct net_device *netdev) { - struct ib_device *ibdev; + struct smb_direct_device *smb_dev; + int i; bool rdma_capable = false; - ibdev = ib_device_get_by_netdev(netdev, RDMA_DRIVER_UNKNOWN); - if (ibdev) { - if (rdma_frwr_is_supported(&ibdev->attrs)) - rdma_capable = true; - ib_device_put(ibdev); + read_lock(&smb_direct_device_lock); + list_for_each_entry(smb_dev, &smb_direct_device_list, list) { + for (i = 0; i < smb_dev->ib_dev->phys_port_cnt; i++) { + struct net_device *ndev; + + ndev = smb_dev->ib_dev->ops.get_netdev(smb_dev->ib_dev, + i + 1); + if (!ndev) + continue; + + if (ndev == netdev) { + dev_put(ndev); + rdma_capable = true; + goto out; + } + dev_put(ndev); + } } +out: + read_unlock(&smb_direct_device_lock); + + if (rdma_capable == false) { + struct ib_device *ibdev; + + ibdev = ib_device_get_by_netdev(netdev, RDMA_DRIVER_UNKNOWN); + if (ibdev) { + if (rdma_frwr_is_supported(&ibdev->attrs)) + rdma_capable = true; + ib_device_put(ibdev); + } + } + return rdma_capable; } static struct ksmbd_transport_ops ksmbd_smb_direct_transport_ops = { .prepare = smb_direct_prepare, .disconnect = smb_direct_disconnect, + .shutdown = smb_direct_shutdown, .writev = smb_direct_writev, .read = smb_direct_read, .rdma_read = smb_direct_rdma_read, diff --git a/fs/ksmbd/transport_rdma.h b/fs/ksmbd/transport_rdma.h index 0fa8adc0776f..5567d93a6f96 100644 --- a/fs/ksmbd/transport_rdma.h +++ b/fs/ksmbd/transport_rdma.h @@ -7,8 +7,6 @@ #ifndef __KSMBD_TRANSPORT_RDMA_H__ #define __KSMBD_TRANSPORT_RDMA_H__ -#define SMB_DIRECT_PORT 5445 - /* SMB DIRECT negotiation request packet [MS-SMBD] 2.2.1 */ struct smb_direct_negotiate_req { __le16 min_version; @@ -52,7 +50,7 @@ struct smb_direct_data_transfer { #ifdef CONFIG_SMB_SERVER_SMBDIRECT int ksmbd_rdma_init(void); -int ksmbd_rdma_destroy(void); +void ksmbd_rdma_destroy(void); bool ksmbd_rdma_capable_netdev(struct net_device *netdev); #else static inline int ksmbd_rdma_init(void) { return 0; } diff --git a/fs/ksmbd/transport_tcp.c b/fs/ksmbd/transport_tcp.c index c14320e03b69..82a1429bbe12 100644 --- a/fs/ksmbd/transport_tcp.c +++ b/fs/ksmbd/transport_tcp.c @@ -404,7 +404,7 @@ static int create_socket(struct interface *iface) &ksmbd_socket); if (ret) { pr_err("Can't create socket for ipv4: %d\n", ret); - goto out_error; + goto out_clear; } sin.sin_family = PF_INET; @@ -462,6 +462,7 @@ static int create_socket(struct interface *iface) out_error: tcp_destroy_socket(ksmbd_socket); +out_clear: iface->ksmbd_socket = NULL; return ret; } diff --git a/fs/ksmbd/vfs_cache.h b/fs/ksmbd/vfs_cache.h index 448576fbe4b7..36239ce31afd 100644 --- a/fs/ksmbd/vfs_cache.h +++ b/fs/ksmbd/vfs_cache.h @@ -96,16 +96,6 @@ struct ksmbd_file { int durable_timeout; - /* for SMB1 */ - int pid; - - /* conflict lock fail count for SMB1 */ - unsigned int cflock_cnt; - /* last lock failure start offset for SMB1 */ - unsigned long long llock_fstart; - - int dirent_offset; - /* if ls is happening on directory, below is valid*/ struct ksmbd_readdir_data readdir_data; int dot_dotdot[2]; diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h index c43877c8a279..505533c43a92 100644 --- a/fs/xfs/libxfs/xfs_fs.h +++ b/fs/xfs/libxfs/xfs_fs.h @@ -92,21 +92,6 @@ struct getbmapx { #define XFS_FMR_OWN_COW FMR_OWNER('X', 7) /* cow staging */ #define XFS_FMR_OWN_DEFECTIVE FMR_OWNER('X', 8) /* bad blocks */ -/* - * Structure for XFS_IOC_FSSETDM. - * For use by backup and restore programs to set the XFS on-disk inode - * fields di_dmevmask and di_dmstate. These must be set to exactly and - * only values previously obtained via xfs_bulkstat! (Specifically the - * struct xfs_bstat fields bs_dmevmask and bs_dmstate.) - */ -#ifndef HAVE_FSDMIDATA -struct fsdmidata { - __u32 fsd_dmevmask; /* corresponds to di_dmevmask */ - __u16 fsd_padding; - __u16 fsd_dmstate; /* corresponds to di_dmstate */ -}; -#endif - /* * File segment locking set data type for 64 bit access. * Also used for all the RESV/FREE interfaces. @@ -562,16 +547,10 @@ typedef struct xfs_fsop_handlereq { /* * Compound structures for passing args through Handle Request interfaces - * xfs_fssetdm_by_handle, xfs_attrlist_by_handle, xfs_attrmulti_by_handle - * - ioctls: XFS_IOC_FSSETDM_BY_HANDLE, XFS_IOC_ATTRLIST_BY_HANDLE, and - * XFS_IOC_ATTRMULTI_BY_HANDLE + * xfs_attrlist_by_handle, xfs_attrmulti_by_handle + * - ioctls: XFS_IOC_ATTRLIST_BY_HANDLE, and XFS_IOC_ATTRMULTI_BY_HANDLE */ -typedef struct xfs_fsop_setdm_handlereq { - struct xfs_fsop_handlereq hreq; /* handle information */ - struct fsdmidata __user *data; /* DMAPI data */ -} xfs_fsop_setdm_handlereq_t; - /* * Flags passed in xfs_attr_multiop.am_flags for the attr ioctl interface. * @@ -781,15 +760,15 @@ struct xfs_scrub_metadata { * For 'documentation' purposed more than anything else, * the "cmd #" field reflects the IRIX fcntl number. */ -#define XFS_IOC_ALLOCSP _IOW ('X', 10, struct xfs_flock64) -#define XFS_IOC_FREESP _IOW ('X', 11, struct xfs_flock64) +/* XFS_IOC_ALLOCSP ------- deprecated 10 */ +/* XFS_IOC_FREESP -------- deprecated 11 */ #define XFS_IOC_DIOINFO _IOR ('X', 30, struct dioattr) #define XFS_IOC_FSGETXATTR FS_IOC_FSGETXATTR #define XFS_IOC_FSSETXATTR FS_IOC_FSSETXATTR -#define XFS_IOC_ALLOCSP64 _IOW ('X', 36, struct xfs_flock64) -#define XFS_IOC_FREESP64 _IOW ('X', 37, struct xfs_flock64) +/* XFS_IOC_ALLOCSP64 ----- deprecated 36 */ +/* XFS_IOC_FREESP64 ------ deprecated 37 */ #define XFS_IOC_GETBMAP _IOWR('X', 38, struct getbmap) -#define XFS_IOC_FSSETDM _IOW ('X', 39, struct fsdmidata) +/* XFS_IOC_FSSETDM ------- deprecated 39 */ #define XFS_IOC_RESVSP _IOW ('X', 40, struct xfs_flock64) #define XFS_IOC_UNRESVSP _IOW ('X', 41, struct xfs_flock64) #define XFS_IOC_RESVSP64 _IOW ('X', 42, struct xfs_flock64) @@ -831,7 +810,7 @@ struct xfs_scrub_metadata { #define XFS_IOC_FREEZE _IOWR('X', 119, int) /* aka FIFREEZE */ #define XFS_IOC_THAW _IOWR('X', 120, int) /* aka FITHAW */ -#define XFS_IOC_FSSETDM_BY_HANDLE _IOW ('X', 121, struct xfs_fsop_setdm_handlereq) +/* XFS_IOC_FSSETDM_BY_HANDLE -- deprecated 121 */ #define XFS_IOC_ATTRLIST_BY_HANDLE _IOW ('X', 122, struct xfs_fsop_attrlist_handlereq) #define XFS_IOC_ATTRMULTI_BY_HANDLE _IOW ('X', 123, struct xfs_fsop_attrmulti_handlereq) #define XFS_IOC_FSGEOMETRY_V4 _IOR ('X', 124, struct xfs_fsop_geom_v4) diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 797ea0c8b14e..d4a387d3d0ce 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -771,8 +771,7 @@ int xfs_alloc_file_space( struct xfs_inode *ip, xfs_off_t offset, - xfs_off_t len, - int alloc_type) + xfs_off_t len) { xfs_mount_t *mp = ip->i_mount; xfs_off_t count; @@ -865,8 +864,8 @@ xfs_alloc_file_space( goto error; error = xfs_bmapi_write(tp, ip, startoffset_fsb, - allocatesize_fsb, alloc_type, 0, imapp, - &nimaps); + allocatesize_fsb, XFS_BMAPI_PREALLOC, 0, imapp, + &nimaps); if (error) goto error; diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h index 9f993168b55b..24b37d211f1d 100644 --- a/fs/xfs/xfs_bmap_util.h +++ b/fs/xfs/xfs_bmap_util.h @@ -54,7 +54,7 @@ int xfs_bmap_last_extent(struct xfs_trans *tp, struct xfs_inode *ip, /* preallocation and hole punch interface */ int xfs_alloc_file_space(struct xfs_inode *ip, xfs_off_t offset, - xfs_off_t len, int alloc_type); + xfs_off_t len); int xfs_free_file_space(struct xfs_inode *ip, xfs_off_t offset, xfs_off_t len); int xfs_collapse_file_space(struct xfs_inode *, xfs_off_t offset, diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 8d4c5ca261bd..22ad207bedf4 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -1051,8 +1051,7 @@ xfs_file_fallocate( } if (!xfs_is_always_cow_inode(ip)) { - error = xfs_alloc_file_space(ip, offset, len, - XFS_BMAPI_PREALLOC); + error = xfs_alloc_file_space(ip, offset, len); if (error) goto out_unlock; } diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 8ea47a9d5aad..03a6198c97f6 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -627,87 +627,6 @@ xfs_attrmulti_by_handle( return error; } -int -xfs_ioc_space( - struct file *filp, - xfs_flock64_t *bf) -{ - struct inode *inode = file_inode(filp); - struct xfs_inode *ip = XFS_I(inode); - struct iattr iattr; - enum xfs_prealloc_flags flags = XFS_PREALLOC_CLEAR; - uint iolock = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL; - int error; - - if (inode->i_flags & (S_IMMUTABLE|S_APPEND)) - return -EPERM; - - if (!(filp->f_mode & FMODE_WRITE)) - return -EBADF; - - if (!S_ISREG(inode->i_mode)) - return -EINVAL; - - if (xfs_is_always_cow_inode(ip)) - return -EOPNOTSUPP; - - if (filp->f_flags & O_DSYNC) - flags |= XFS_PREALLOC_SYNC; - if (filp->f_mode & FMODE_NOCMTIME) - flags |= XFS_PREALLOC_INVISIBLE; - - error = mnt_want_write_file(filp); - if (error) - return error; - - xfs_ilock(ip, iolock); - error = xfs_break_layouts(inode, &iolock, BREAK_UNMAP); - if (error) - goto out_unlock; - inode_dio_wait(inode); - - switch (bf->l_whence) { - case 0: /*SEEK_SET*/ - break; - case 1: /*SEEK_CUR*/ - bf->l_start += filp->f_pos; - break; - case 2: /*SEEK_END*/ - bf->l_start += XFS_ISIZE(ip); - break; - default: - error = -EINVAL; - goto out_unlock; - } - - if (bf->l_start < 0 || bf->l_start > inode->i_sb->s_maxbytes) { - error = -EINVAL; - goto out_unlock; - } - - if (bf->l_start > XFS_ISIZE(ip)) { - error = xfs_alloc_file_space(ip, XFS_ISIZE(ip), - bf->l_start - XFS_ISIZE(ip), - XFS_BMAPI_PREALLOC); - if (error) - goto out_unlock; - } - - iattr.ia_valid = ATTR_SIZE; - iattr.ia_size = bf->l_start; - error = xfs_vn_setattr_size(file_mnt_user_ns(filp), file_dentry(filp), - &iattr); - if (error) - goto out_unlock; - - error = xfs_update_prealloc_flags(ip, flags); - -out_unlock: - xfs_iunlock(ip, iolock); - mnt_drop_write_file(filp); - return error; -} - /* Return 0 on success or positive error */ int xfs_fsbulkstat_one_fmt( @@ -1935,6 +1854,15 @@ xfs_fs_eofblocks_from_user( return 0; } +/* + * These long-unused ioctls were removed from the official ioctl API in 5.17, + * but retain these definitions so that we can log warnings about them. + */ +#define XFS_IOC_ALLOCSP _IOW ('X', 10, struct xfs_flock64) +#define XFS_IOC_FREESP _IOW ('X', 11, struct xfs_flock64) +#define XFS_IOC_ALLOCSP64 _IOW ('X', 36, struct xfs_flock64) +#define XFS_IOC_FREESP64 _IOW ('X', 37, struct xfs_flock64) + /* * Note: some of the ioctl's return positive numbers as a * byte count indicating success, such as readlink_by_handle. @@ -1965,13 +1893,11 @@ xfs_file_ioctl( case XFS_IOC_ALLOCSP: case XFS_IOC_FREESP: case XFS_IOC_ALLOCSP64: - case XFS_IOC_FREESP64: { - xfs_flock64_t bf; - - if (copy_from_user(&bf, arg, sizeof(bf))) - return -EFAULT; - return xfs_ioc_space(filp, &bf); - } + case XFS_IOC_FREESP64: + xfs_warn_once(mp, + "%s should use fallocate; XFS_IOC_{ALLOC,FREE}SP ioctl unsupported", + current->comm); + return -ENOTTY; case XFS_IOC_DIOINFO: { struct xfs_buftarg *target = xfs_inode_buftarg(ip); struct dioattr da; diff --git a/fs/xfs/xfs_ioctl.h b/fs/xfs/xfs_ioctl.h index 845d3bcab74b..d4abba2c13c1 100644 --- a/fs/xfs/xfs_ioctl.h +++ b/fs/xfs/xfs_ioctl.h @@ -10,12 +10,6 @@ struct xfs_bstat; struct xfs_ibulk; struct xfs_inogrp; - -extern int -xfs_ioc_space( - struct file *filp, - xfs_flock64_t *bf); - int xfs_ioc_swapext( xfs_swapext_t *sxp); diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c index 8783af203cfc..004ed2a251e8 100644 --- a/fs/xfs/xfs_ioctl32.c +++ b/fs/xfs/xfs_ioctl32.c @@ -27,22 +27,6 @@ _IOC(_IOC_DIR(cmd), _IOC_TYPE(cmd), _IOC_NR(cmd), sizeof(type)) #ifdef BROKEN_X86_ALIGNMENT -STATIC int -xfs_compat_flock64_copyin( - xfs_flock64_t *bf, - compat_xfs_flock64_t __user *arg32) -{ - if (get_user(bf->l_type, &arg32->l_type) || - get_user(bf->l_whence, &arg32->l_whence) || - get_user(bf->l_start, &arg32->l_start) || - get_user(bf->l_len, &arg32->l_len) || - get_user(bf->l_sysid, &arg32->l_sysid) || - get_user(bf->l_pid, &arg32->l_pid) || - copy_from_user(bf->l_pad, &arg32->l_pad, 4*sizeof(u32))) - return -EFAULT; - return 0; -} - STATIC int xfs_compat_ioc_fsgeometry_v1( struct xfs_mount *mp, @@ -445,17 +429,6 @@ xfs_file_compat_ioctl( switch (cmd) { #if defined(BROKEN_X86_ALIGNMENT) - case XFS_IOC_ALLOCSP_32: - case XFS_IOC_FREESP_32: - case XFS_IOC_ALLOCSP64_32: - case XFS_IOC_FREESP64_32: { - struct xfs_flock64 bf; - - if (xfs_compat_flock64_copyin(&bf, arg)) - return -EFAULT; - cmd = _NATIVE_IOC(cmd, struct xfs_flock64); - return xfs_ioc_space(filp, &bf); - } case XFS_IOC_FSGEOMETRY_V1_32: return xfs_compat_ioc_fsgeometry_v1(ip->i_mount, arg); case XFS_IOC_FSGROWFSDATA_32: { diff --git a/fs/xfs/xfs_ioctl32.h b/fs/xfs/xfs_ioctl32.h index 9929482bf358..fc5a91f3a5e0 100644 --- a/fs/xfs/xfs_ioctl32.h +++ b/fs/xfs/xfs_ioctl32.h @@ -154,10 +154,6 @@ typedef struct compat_xfs_flock64 { __s32 l_pad[4]; /* reserve area */ } compat_xfs_flock64_t; -#define XFS_IOC_ALLOCSP_32 _IOW('X', 10, struct compat_xfs_flock64) -#define XFS_IOC_FREESP_32 _IOW('X', 11, struct compat_xfs_flock64) -#define XFS_IOC_ALLOCSP64_32 _IOW('X', 36, struct compat_xfs_flock64) -#define XFS_IOC_FREESP64_32 _IOW('X', 37, struct compat_xfs_flock64) #define XFS_IOC_RESVSP_32 _IOW('X', 40, struct compat_xfs_flock64) #define XFS_IOC_UNRESVSP_32 _IOW('X', 41, struct compat_xfs_flock64) #define XFS_IOC_RESVSP64_32 _IOW('X', 42, struct compat_xfs_flock64) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 6e947cd91152..fa517ae604ad 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -316,7 +316,12 @@ enum bpf_type_flag { */ MEM_RDONLY = BIT(1 + BPF_BASE_TYPE_BITS), - __BPF_TYPE_LAST_FLAG = MEM_RDONLY, + /* MEM was "allocated" from a different helper, and cannot be mixed + * with regular non-MEM_ALLOC'ed MEM types. + */ + MEM_ALLOC = BIT(2 + BPF_BASE_TYPE_BITS), + + __BPF_TYPE_LAST_FLAG = MEM_ALLOC, }; /* Max number of base types. */ @@ -400,7 +405,7 @@ enum bpf_return_type { RET_PTR_TO_SOCKET_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_SOCKET, RET_PTR_TO_TCP_SOCK_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_TCP_SOCK, RET_PTR_TO_SOCK_COMMON_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_SOCK_COMMON, - RET_PTR_TO_ALLOC_MEM_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_ALLOC_MEM, + RET_PTR_TO_ALLOC_MEM_OR_NULL = PTR_MAYBE_NULL | MEM_ALLOC | RET_PTR_TO_ALLOC_MEM, RET_PTR_TO_BTF_ID_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_BTF_ID, /* This must be the last entry. Its purpose is to ensure the enum is diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 143401d4c9d9..e9993172f892 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -519,8 +519,8 @@ bpf_prog_offload_replace_insn(struct bpf_verifier_env *env, u32 off, void bpf_prog_offload_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt); -int check_ctx_reg(struct bpf_verifier_env *env, - const struct bpf_reg_state *reg, int regno); +int check_ptr_off_reg(struct bpf_verifier_env *env, + const struct bpf_reg_state *reg, int regno); int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, u32 regno, u32 mem_size); diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 309acbcb5a8a..6a89ea410e43 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -295,12 +295,13 @@ extern bool libceph_compatible(void *data); extern const char *ceph_msg_type_name(int type); extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid); +extern int ceph_parse_fsid(const char *str, struct ceph_fsid *fsid); struct fs_parameter; struct fc_log; struct ceph_options *ceph_alloc_options(void); int ceph_parse_mon_ips(const char *buf, size_t len, struct ceph_options *opt, - struct fc_log *l); + struct fc_log *l, char delim); int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt, struct fc_log *l); int ceph_print_client_options(struct seq_file *m, struct ceph_client *client, diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 0e6e9ad3c3bf..ff99ce094cfa 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -532,7 +532,7 @@ extern const char *ceph_pr_addr(const struct ceph_entity_addr *addr); extern int ceph_parse_ips(const char *c, const char *end, struct ceph_entity_addr *addr, - int max_count, int *count); + int max_count, int *count, char delim); extern int ceph_msgr_init(void); extern void ceph_msgr_exit(void); diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 48cc5795ceda..63540be0fc34 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -117,8 +117,15 @@ int fqdir_init(struct fqdir **fqdirp, struct inet_frags *f, struct net *net); static inline void fqdir_pre_exit(struct fqdir *fqdir) { - fqdir->high_thresh = 0; /* prevent creation of new frags */ - fqdir->dead = true; + /* Prevent creation of new frags. + * Pairs with READ_ONCE() in inet_frag_find(). + */ + WRITE_ONCE(fqdir->high_thresh, 0); + + /* Pairs with READ_ONCE() in inet_frag_kill(), ip_expire() + * and ip6frag_expire_frag_queue(). + */ + WRITE_ONCE(fqdir->dead, true); } void fqdir_exit(struct fqdir *fqdir); diff --git a/include/net/ipv6_frag.h b/include/net/ipv6_frag.h index 851029ecff13..0a4779175a52 100644 --- a/include/net/ipv6_frag.h +++ b/include/net/ipv6_frag.h @@ -67,7 +67,8 @@ ip6frag_expire_frag_queue(struct net *net, struct frag_queue *fq) struct sk_buff *head; rcu_read_lock(); - if (fq->q.fqdir->dead) + /* Paired with the WRITE_ONCE() in fqdir_pre_exit(). */ + if (READ_ONCE(fq->q.fqdir->dead)) goto out_rcu_unlock; spin_lock(&fq->q.lock); diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index ebef45e821af..676cb8ea9e15 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -218,8 +218,10 @@ static inline int tcf_exts_init(struct tcf_exts *exts, struct net *net, #ifdef CONFIG_NET_CLS_ACT exts->type = 0; exts->nr_actions = 0; + /* Note: we do not own yet a reference on net. + * This reference might be taken later from tcf_exts_get_net(). + */ exts->net = net; - netns_tracker_alloc(net, &exts->ns_tracker, GFP_KERNEL); exts->actions = kcalloc(TCA_ACT_MAX_PRIO, sizeof(struct tc_action *), GFP_KERNEL); if (!exts->actions) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index c11dbac5abb2..472843eedbae 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -1244,6 +1244,7 @@ struct psched_ratecfg { u64 rate_bytes_ps; /* bytes per second */ u32 mult; u16 overhead; + u16 mpu; u8 linklayer; u8 shift; }; @@ -1253,6 +1254,9 @@ static inline u64 psched_l2t_ns(const struct psched_ratecfg *r, { len += r->overhead; + if (len < r->mpu) + len = r->mpu; + if (unlikely(r->linklayer == TC_LINKLAYER_ATM)) return ((u64)(DIV_ROUND_UP(len,48)*53) * r->mult) >> r->shift; @@ -1275,6 +1279,7 @@ static inline void psched_ratecfg_getrate(struct tc_ratespec *res, res->rate = min_t(u64, r->rate_bytes_ps, ~0U); res->overhead = r->overhead; + res->mpu = r->mpu; res->linklayer = (r->linklayer & TC_LINKLAYER_MASK); } diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h index dae5df88ab10..0425cd79af9a 100644 --- a/include/uapi/linux/magic.h +++ b/include/uapi/linux/magic.h @@ -6,6 +6,7 @@ #define AFFS_SUPER_MAGIC 0xadff #define AFS_SUPER_MAGIC 0x5346414F #define AUTOFS_SUPER_MAGIC 0x0187 +#define CEPH_SUPER_MAGIC 0x00c36400 #define CODA_SUPER_MAGIC 0x73757245 #define CRAMFS_MAGIC 0x28cd3d45 /* some random number */ #define CRAMFS_MAGIC_WEND 0x453dcd28 /* magic number with the wrong endianess */ diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 33bb8ae4a804..e16dafeb2450 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -5686,7 +5686,7 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, i, btf_type_str(t)); return -EINVAL; } - if (check_ctx_reg(env, reg, regno)) + if (check_ptr_off_reg(env, reg, regno)) return -EINVAL; } else if (is_kfunc && (reg->type == PTR_TO_BTF_ID || reg2btf_ids[reg->type])) { const struct btf_type *reg_ref_t; diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index 80da1db47c68..5a8d9f7467bf 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -648,12 +648,22 @@ static int bpf_parse_param(struct fs_context *fc, struct fs_parameter *param) int opt; opt = fs_parse(fc, bpf_fs_parameters, param, &result); - if (opt < 0) + if (opt < 0) { /* We might like to report bad mount options here, but * traditionally we've ignored all mount options, so we'd * better continue to ignore non-existing options for bpf. */ - return opt == -ENOPARAM ? 0 : opt; + if (opt == -ENOPARAM) { + opt = vfs_parse_fs_param_source(fc, param); + if (opt != -ENOPARAM) + return opt; + + return 0; + } + + if (opt < 0) + return opt; + } switch (opt) { case OPT_MODE: diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index bfb45381fb3f..a39eedecc93a 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -570,6 +570,8 @@ static const char *reg_type_str(struct bpf_verifier_env *env, if (type & MEM_RDONLY) strncpy(prefix, "rdonly_", 16); + if (type & MEM_ALLOC) + strncpy(prefix, "alloc_", 16); snprintf(env->type_str_buf, TYPE_STR_BUF_LEN, "%s%s%s", prefix, str[base_type(type)], postfix); @@ -616,7 +618,7 @@ static void mark_reg_scratched(struct bpf_verifier_env *env, u32 regno) static void mark_stack_slot_scratched(struct bpf_verifier_env *env, u32 spi) { - env->scratched_stack_slots |= 1UL << spi; + env->scratched_stack_slots |= 1ULL << spi; } static bool reg_scratched(const struct bpf_verifier_env *env, u32 regno) @@ -637,14 +639,14 @@ static bool verifier_state_scratched(const struct bpf_verifier_env *env) static void mark_verifier_state_clean(struct bpf_verifier_env *env) { env->scratched_regs = 0U; - env->scratched_stack_slots = 0UL; + env->scratched_stack_slots = 0ULL; } /* Used for printing the entire verifier state. */ static void mark_verifier_state_scratched(struct bpf_verifier_env *env) { env->scratched_regs = ~0U; - env->scratched_stack_slots = ~0UL; + env->scratched_stack_slots = ~0ULL; } /* The reg state of a pointer or a bounded scalar was saved when @@ -3969,16 +3971,17 @@ static int get_callee_stack_depth(struct bpf_verifier_env *env, } #endif -int check_ctx_reg(struct bpf_verifier_env *env, - const struct bpf_reg_state *reg, int regno) +static int __check_ptr_off_reg(struct bpf_verifier_env *env, + const struct bpf_reg_state *reg, int regno, + bool fixed_off_ok) { - /* Access to ctx or passing it to a helper is only allowed in - * its original, unmodified form. + /* Access to this pointer-typed register or passing it to a helper + * is only allowed in its original, unmodified form. */ - if (reg->off) { - verbose(env, "dereference of modified ctx ptr R%d off=%d disallowed\n", - regno, reg->off); + if (!fixed_off_ok && reg->off) { + verbose(env, "dereference of modified %s ptr R%d off=%d disallowed\n", + reg_type_str(env, reg->type), regno, reg->off); return -EACCES; } @@ -3986,13 +3989,20 @@ int check_ctx_reg(struct bpf_verifier_env *env, char tn_buf[48]; tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); - verbose(env, "variable ctx access var_off=%s disallowed\n", tn_buf); + verbose(env, "variable %s access var_off=%s disallowed\n", + reg_type_str(env, reg->type), tn_buf); return -EACCES; } return 0; } +int check_ptr_off_reg(struct bpf_verifier_env *env, + const struct bpf_reg_state *reg, int regno) +{ + return __check_ptr_off_reg(env, reg, regno, false); +} + static int __check_buffer_access(struct bpf_verifier_env *env, const char *buf_info, const struct bpf_reg_state *reg, @@ -4437,7 +4447,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn return -EACCES; } - err = check_ctx_reg(env, reg, regno); + err = check_ptr_off_reg(env, reg, regno); if (err < 0) return err; @@ -5127,6 +5137,7 @@ static const struct bpf_reg_types mem_types = { PTR_TO_MAP_KEY, PTR_TO_MAP_VALUE, PTR_TO_MEM, + PTR_TO_MEM | MEM_ALLOC, PTR_TO_BUF, }, }; @@ -5144,7 +5155,7 @@ static const struct bpf_reg_types int_ptr_types = { static const struct bpf_reg_types fullsock_types = { .types = { PTR_TO_SOCKET } }; static const struct bpf_reg_types scalar_types = { .types = { SCALAR_VALUE } }; static const struct bpf_reg_types context_types = { .types = { PTR_TO_CTX } }; -static const struct bpf_reg_types alloc_mem_types = { .types = { PTR_TO_MEM } }; +static const struct bpf_reg_types alloc_mem_types = { .types = { PTR_TO_MEM | MEM_ALLOC } }; static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_TO_MAP } }; static const struct bpf_reg_types btf_ptr_types = { .types = { PTR_TO_BTF_ID } }; static const struct bpf_reg_types spin_lock_types = { .types = { PTR_TO_MAP_VALUE } }; @@ -5244,12 +5255,6 @@ found: kernel_type_name(btf_vmlinux, *arg_btf_id)); return -EACCES; } - - if (!tnum_is_const(reg->var_off) || reg->var_off.value) { - verbose(env, "R%d is a pointer to in-kernel struct with non-zero offset\n", - regno); - return -EACCES; - } } return 0; @@ -5304,10 +5309,33 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg, if (err) return err; - if (type == PTR_TO_CTX) { - err = check_ctx_reg(env, reg, regno); + switch ((u32)type) { + case SCALAR_VALUE: + /* Pointer types where reg offset is explicitly allowed: */ + case PTR_TO_PACKET: + case PTR_TO_PACKET_META: + case PTR_TO_MAP_KEY: + case PTR_TO_MAP_VALUE: + case PTR_TO_MEM: + case PTR_TO_MEM | MEM_RDONLY: + case PTR_TO_MEM | MEM_ALLOC: + case PTR_TO_BUF: + case PTR_TO_BUF | MEM_RDONLY: + case PTR_TO_STACK: + /* Some of the argument types nevertheless require a + * zero register offset. + */ + if (arg_type == ARG_PTR_TO_ALLOC_MEM) + goto force_off_check; + break; + /* All the rest must be rejected: */ + default: +force_off_check: + err = __check_ptr_off_reg(env, reg, regno, + type == PTR_TO_BTF_ID); if (err < 0) return err; + break; } skip_type_check: @@ -9507,9 +9535,13 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn) return 0; } - if (insn->src_reg == BPF_PSEUDO_BTF_ID) { - mark_reg_known_zero(env, regs, insn->dst_reg); + /* All special src_reg cases are listed below. From this point onwards + * we either succeed and assign a corresponding dst_reg->type after + * zeroing the offset, or fail and reject the program. + */ + mark_reg_known_zero(env, regs, insn->dst_reg); + if (insn->src_reg == BPF_PSEUDO_BTF_ID) { dst_reg->type = aux->btf_var.reg_type; switch (base_type(dst_reg->type)) { case PTR_TO_MEM: @@ -9547,7 +9579,6 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn) } map = env->used_maps[aux->map_index]; - mark_reg_known_zero(env, regs, insn->dst_reg); dst_reg->map_ptr = map; if (insn->src_reg == BPF_PSEUDO_MAP_VALUE || @@ -9651,7 +9682,7 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn) return err; } - err = check_ctx_reg(env, ®s[ctx_reg], ctx_reg); + err = check_ptr_off_reg(env, ®s[ctx_reg], ctx_reg); if (err < 0) return err; diff --git a/lib/ref_tracker.c b/lib/ref_tracker.c index 0ae2e66dcf0f..a6789c0c626b 100644 --- a/lib/ref_tracker.c +++ b/lib/ref_tracker.c @@ -69,9 +69,12 @@ int ref_tracker_alloc(struct ref_tracker_dir *dir, unsigned long entries[REF_TRACKER_STACK_ENTRIES]; struct ref_tracker *tracker; unsigned int nr_entries; + gfp_t gfp_mask = gfp; unsigned long flags; - *trackerp = tracker = kzalloc(sizeof(*tracker), gfp | __GFP_NOFAIL); + if (gfp & __GFP_DIRECT_RECLAIM) + gfp_mask |= __GFP_NOFAIL; + *trackerp = tracker = kzalloc(sizeof(*tracker), gfp_mask); if (unlikely(!tracker)) { pr_err_once("memory allocation failure, unreliable refcount tracker.\n"); refcount_inc(&dir->untracked); diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index a52ad81596b7..55f47cadb114 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -615,6 +615,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev, err = dev_set_allmulti(dev, 1); if (err) { br_multicast_del_port(p); + dev_put_track(dev, &p->dev_tracker); kfree(p); /* kobject not yet init'd, manually free */ goto err1; } @@ -724,10 +725,10 @@ err3: sysfs_remove_link(br->ifobj, p->dev->name); err2: br_multicast_del_port(p); + dev_put_track(dev, &p->dev_tracker); kobject_put(&p->kobj); dev_set_allmulti(dev, -1); err1: - dev_put(dev); return err; } diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index 9441b4a4912b..ecc400a0b7bb 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -190,14 +190,14 @@ int ceph_compare_options(struct ceph_options *new_opt, } EXPORT_SYMBOL(ceph_compare_options); -static int parse_fsid(const char *str, struct ceph_fsid *fsid) +int ceph_parse_fsid(const char *str, struct ceph_fsid *fsid) { int i = 0; char tmp[3]; int err = -EINVAL; int d; - dout("parse_fsid '%s'\n", str); + dout("%s '%s'\n", __func__, str); tmp[2] = 0; while (*str && i < 16) { if (ispunct(*str)) { @@ -217,9 +217,10 @@ static int parse_fsid(const char *str, struct ceph_fsid *fsid) if (i == 16) err = 0; - dout("parse_fsid ret %d got fsid %pU\n", err, fsid); + dout("%s ret %d got fsid %pU\n", __func__, err, fsid); return err; } +EXPORT_SYMBOL(ceph_parse_fsid); /* * ceph options @@ -395,14 +396,14 @@ out: } int ceph_parse_mon_ips(const char *buf, size_t len, struct ceph_options *opt, - struct fc_log *l) + struct fc_log *l, char delim) { struct p_log log = {.prefix = "libceph", .log = l}; int ret; - /* ip1[:port1][,ip2[:port2]...] */ + /* ip1[:port1][ip2[:port2]...] */ ret = ceph_parse_ips(buf, buf + len, opt->mon_addr, CEPH_MAX_MON, - &opt->num_mon); + &opt->num_mon, delim); if (ret) { error_plog(&log, "Failed to parse monitor IPs: %d", ret); return ret; @@ -428,8 +429,7 @@ int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt, case Opt_ip: err = ceph_parse_ips(param->string, param->string + param->size, - &opt->my_addr, - 1, NULL); + &opt->my_addr, 1, NULL, ','); if (err) { error_plog(&log, "Failed to parse ip: %d", err); return err; @@ -438,7 +438,7 @@ int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt, break; case Opt_fsid: - err = parse_fsid(param->string, &opt->fsid); + err = ceph_parse_fsid(param->string, &opt->fsid); if (err) { error_plog(&log, "Failed to parse fsid: %d", err); return err; diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 7b891be799d2..45eba2dcb67a 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -1267,30 +1267,31 @@ static int ceph_parse_server_name(const char *name, size_t namelen, */ int ceph_parse_ips(const char *c, const char *end, struct ceph_entity_addr *addr, - int max_count, int *count) + int max_count, int *count, char delim) { int i, ret = -EINVAL; const char *p = c; dout("parse_ips on '%.*s'\n", (int)(end-c), c); for (i = 0; i < max_count; i++) { + char cur_delim = delim; const char *ipend; int port; - char delim = ','; if (*p == '[') { - delim = ']'; + cur_delim = ']'; p++; } - ret = ceph_parse_server_name(p, end - p, &addr[i], delim, &ipend); + ret = ceph_parse_server_name(p, end - p, &addr[i], cur_delim, + &ipend); if (ret) goto bad; ret = -EINVAL; p = ipend; - if (delim == ']') { + if (cur_delim == ']') { if (*p != ']') { dout("missing matching ']'\n"); goto bad; @@ -1326,11 +1327,11 @@ int ceph_parse_ips(const char *c, const char *end, addr[i].type = CEPH_ENTITY_ADDR_TYPE_LEGACY; addr[i].nonce = 0; - dout("parse_ips got %s\n", ceph_pr_addr(&addr[i])); + dout("%s got %s\n", __func__, ceph_pr_addr(&addr[i])); if (p == end) break; - if (*p != ',') + if (*p != delim) goto bad; p++; } diff --git a/net/core/dev.c b/net/core/dev.c index 37fe87c1e8dd..674bb8d1a95f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -8988,6 +8988,12 @@ static int bpf_xdp_link_update(struct bpf_link *link, struct bpf_prog *new_prog, goto out_unlock; } old_prog = link->prog; + if (old_prog->type != new_prog->type || + old_prog->expected_attach_type != new_prog->expected_attach_type) { + err = -EINVAL; + goto out_unlock; + } + if (old_prog == new_prog) { /* no-op, don't disturb drivers */ bpf_prog_put(new_prog); diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 9b7171c40434..a5b5bb99c644 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -164,8 +164,10 @@ static void ops_exit_list(const struct pernet_operations *ops, { struct net *net; if (ops->exit) { - list_for_each_entry(net, net_exit_list, exit_list) + list_for_each_entry(net, net_exit_list, exit_list) { ops->exit(net); + cond_resched(); + } } if (ops->exit_batch) ops->exit_batch(net_exit_list); diff --git a/net/core/of_net.c b/net/core/of_net.c index 95a64c813ae5..f1a9bf7578e7 100644 --- a/net/core/of_net.c +++ b/net/core/of_net.c @@ -61,7 +61,7 @@ static int of_get_mac_addr_nvmem(struct device_node *np, u8 *addr) { struct platform_device *pdev = of_find_device_by_node(np); struct nvmem_cell *cell; - const void *buf; + const void *mac; size_t len; int ret; @@ -78,32 +78,21 @@ static int of_get_mac_addr_nvmem(struct device_node *np, u8 *addr) if (IS_ERR(cell)) return PTR_ERR(cell); - buf = nvmem_cell_read(cell, &len); + mac = nvmem_cell_read(cell, &len); nvmem_cell_put(cell); - if (IS_ERR(buf)) - return PTR_ERR(buf); + if (IS_ERR(mac)) + return PTR_ERR(mac); - ret = 0; - if (len == ETH_ALEN) { - if (is_valid_ether_addr(buf)) - memcpy(addr, buf, ETH_ALEN); - else - ret = -EINVAL; - } else if (len == 3 * ETH_ALEN - 1) { - u8 mac[ETH_ALEN]; - - if (mac_pton(buf, mac)) - memcpy(addr, mac, ETH_ALEN); - else - ret = -EINVAL; - } else { - ret = -EINVAL; + if (len != ETH_ALEN || !is_valid_ether_addr(mac)) { + kfree(mac); + return -EINVAL; } - kfree(buf); + memcpy(addr, mac, ETH_ALEN); + kfree(mac); - return ret; + return 0; } /** diff --git a/net/core/sock.c b/net/core/sock.c index e21485ab285d..4ff806d71921 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -844,6 +844,8 @@ static int sock_timestamping_bind_phc(struct sock *sk, int phc_index) } num = ethtool_get_phc_vclocks(dev, &vclock_index); + dev_put(dev); + for (i = 0; i < num; i++) { if (*(vclock_index + i) == phc_index) { match = true; @@ -2047,6 +2049,9 @@ void sk_destruct(struct sock *sk) { bool use_call_rcu = sock_flag(sk, SOCK_RCU_FREE); + WARN_ON_ONCE(!llist_empty(&sk->defer_list)); + sk_defer_free_flush(sk); + if (rcu_access_pointer(sk->sk_reuseport_cb)) { reuseport_detach_sock(sk); use_call_rcu = true; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 828de171708f..b4589861b84c 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -51,6 +52,7 @@ static DEFINE_SPINLOCK(fib_info_lock); static struct hlist_head *fib_info_hash; static struct hlist_head *fib_info_laddrhash; static unsigned int fib_info_hash_size; +static unsigned int fib_info_hash_bits; static unsigned int fib_info_cnt; #define DEVINDEX_HASHBITS 8 @@ -249,7 +251,6 @@ void free_fib_info(struct fib_info *fi) pr_warn("Freeing alive fib_info %p\n", fi); return; } - fib_info_cnt--; call_rcu(&fi->rcu, free_fib_info_rcu); } @@ -260,6 +261,10 @@ void fib_release_info(struct fib_info *fi) spin_lock_bh(&fib_info_lock); if (fi && refcount_dec_and_test(&fi->fib_treeref)) { hlist_del(&fi->fib_hash); + + /* Paired with READ_ONCE() in fib_create_info(). */ + WRITE_ONCE(fib_info_cnt, fib_info_cnt - 1); + if (fi->fib_prefsrc) hlist_del(&fi->fib_lhash); if (fi->nh) { @@ -316,11 +321,15 @@ static inline int nh_comp(struct fib_info *fi, struct fib_info *ofi) static inline unsigned int fib_devindex_hashfn(unsigned int val) { - unsigned int mask = DEVINDEX_HASHSIZE - 1; + return hash_32(val, DEVINDEX_HASHBITS); +} - return (val ^ - (val >> DEVINDEX_HASHBITS) ^ - (val >> (DEVINDEX_HASHBITS * 2))) & mask; +static struct hlist_head * +fib_info_devhash_bucket(const struct net_device *dev) +{ + u32 val = net_hash_mix(dev_net(dev)) ^ dev->ifindex; + + return &fib_info_devhash[fib_devindex_hashfn(val)]; } static unsigned int fib_info_hashfn_1(int init_val, u8 protocol, u8 scope, @@ -430,12 +439,11 @@ int ip_fib_check_default(__be32 gw, struct net_device *dev) { struct hlist_head *head; struct fib_nh *nh; - unsigned int hash; spin_lock(&fib_info_lock); - hash = fib_devindex_hashfn(dev->ifindex); - head = &fib_info_devhash[hash]; + head = fib_info_devhash_bucket(dev); + hlist_for_each_entry(nh, head, nh_hash) { if (nh->fib_nh_dev == dev && nh->fib_nh_gw4 == gw && @@ -1240,13 +1248,13 @@ int fib_check_nh(struct net *net, struct fib_nh *nh, u32 table, u8 scope, return err; } -static inline unsigned int fib_laddr_hashfn(__be32 val) +static struct hlist_head * +fib_info_laddrhash_bucket(const struct net *net, __be32 val) { - unsigned int mask = (fib_info_hash_size - 1); + u32 slot = hash_32(net_hash_mix(net) ^ (__force u32)val, + fib_info_hash_bits); - return ((__force u32)val ^ - ((__force u32)val >> 7) ^ - ((__force u32)val >> 14)) & mask; + return &fib_info_laddrhash[slot]; } static struct hlist_head *fib_info_hash_alloc(int bytes) @@ -1282,6 +1290,7 @@ static void fib_info_hash_move(struct hlist_head *new_info_hash, old_info_hash = fib_info_hash; old_laddrhash = fib_info_laddrhash; fib_info_hash_size = new_size; + fib_info_hash_bits = ilog2(new_size); for (i = 0; i < old_size; i++) { struct hlist_head *head = &fib_info_hash[i]; @@ -1299,21 +1308,20 @@ static void fib_info_hash_move(struct hlist_head *new_info_hash, } fib_info_hash = new_info_hash; + fib_info_laddrhash = new_laddrhash; for (i = 0; i < old_size; i++) { - struct hlist_head *lhead = &fib_info_laddrhash[i]; + struct hlist_head *lhead = &old_laddrhash[i]; struct hlist_node *n; struct fib_info *fi; hlist_for_each_entry_safe(fi, n, lhead, fib_lhash) { struct hlist_head *ldest; - unsigned int new_hash; - new_hash = fib_laddr_hashfn(fi->fib_prefsrc); - ldest = &new_laddrhash[new_hash]; + ldest = fib_info_laddrhash_bucket(fi->fib_net, + fi->fib_prefsrc); hlist_add_head(&fi->fib_lhash, ldest); } } - fib_info_laddrhash = new_laddrhash; spin_unlock_bh(&fib_info_lock); @@ -1430,7 +1438,9 @@ struct fib_info *fib_create_info(struct fib_config *cfg, #endif err = -ENOBUFS; - if (fib_info_cnt >= fib_info_hash_size) { + + /* Paired with WRITE_ONCE() in fib_release_info() */ + if (READ_ONCE(fib_info_cnt) >= fib_info_hash_size) { unsigned int new_size = fib_info_hash_size << 1; struct hlist_head *new_info_hash; struct hlist_head *new_laddrhash; @@ -1462,7 +1472,6 @@ struct fib_info *fib_create_info(struct fib_config *cfg, return ERR_PTR(err); } - fib_info_cnt++; fi->fib_net = net; fi->fib_protocol = cfg->fc_protocol; fi->fib_scope = cfg->fc_scope; @@ -1591,12 +1600,13 @@ link_it: refcount_set(&fi->fib_treeref, 1); refcount_set(&fi->fib_clntref, 1); spin_lock_bh(&fib_info_lock); + fib_info_cnt++; hlist_add_head(&fi->fib_hash, &fib_info_hash[fib_info_hashfn(fi)]); if (fi->fib_prefsrc) { struct hlist_head *head; - head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)]; + head = fib_info_laddrhash_bucket(net, fi->fib_prefsrc); hlist_add_head(&fi->fib_lhash, head); } if (fi->nh) { @@ -1604,12 +1614,10 @@ link_it: } else { change_nexthops(fi) { struct hlist_head *head; - unsigned int hash; if (!nexthop_nh->fib_nh_dev) continue; - hash = fib_devindex_hashfn(nexthop_nh->fib_nh_dev->ifindex); - head = &fib_info_devhash[hash]; + head = fib_info_devhash_bucket(nexthop_nh->fib_nh_dev); hlist_add_head(&nexthop_nh->nh_hash, head); } endfor_nexthops(fi) } @@ -1870,16 +1878,16 @@ nla_put_failure: */ int fib_sync_down_addr(struct net_device *dev, __be32 local) { - int ret = 0; - unsigned int hash = fib_laddr_hashfn(local); - struct hlist_head *head = &fib_info_laddrhash[hash]; int tb_id = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN; struct net *net = dev_net(dev); + struct hlist_head *head; struct fib_info *fi; + int ret = 0; if (!fib_info_laddrhash || local == 0) return 0; + head = fib_info_laddrhash_bucket(net, local); hlist_for_each_entry(fi, head, fib_lhash) { if (!net_eq(fi->fib_net, net) || fi->fib_tb_id != tb_id) @@ -1961,8 +1969,7 @@ void fib_nhc_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig) void fib_sync_mtu(struct net_device *dev, u32 orig_mtu) { - unsigned int hash = fib_devindex_hashfn(dev->ifindex); - struct hlist_head *head = &fib_info_devhash[hash]; + struct hlist_head *head = fib_info_devhash_bucket(dev); struct fib_nh *nh; hlist_for_each_entry(nh, head, nh_hash) { @@ -1981,12 +1988,11 @@ void fib_sync_mtu(struct net_device *dev, u32 orig_mtu) */ int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force) { - int ret = 0; - int scope = RT_SCOPE_NOWHERE; + struct hlist_head *head = fib_info_devhash_bucket(dev); struct fib_info *prev_fi = NULL; - unsigned int hash = fib_devindex_hashfn(dev->ifindex); - struct hlist_head *head = &fib_info_devhash[hash]; + int scope = RT_SCOPE_NOWHERE; struct fib_nh *nh; + int ret = 0; if (force) scope = -1; @@ -2131,7 +2137,6 @@ out: int fib_sync_up(struct net_device *dev, unsigned char nh_flags) { struct fib_info *prev_fi; - unsigned int hash; struct hlist_head *head; struct fib_nh *nh; int ret; @@ -2147,8 +2152,7 @@ int fib_sync_up(struct net_device *dev, unsigned char nh_flags) } prev_fi = NULL; - hash = fib_devindex_hashfn(dev->ifindex); - head = &fib_info_devhash[hash]; + head = fib_info_devhash_bucket(dev); ret = 0; hlist_for_each_entry(nh, head, nh_hash) { diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 05cd198d7a6b..341096807100 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -235,9 +235,9 @@ void inet_frag_kill(struct inet_frag_queue *fq) /* The RCU read lock provides a memory barrier * guaranteeing that if fqdir->dead is false then * the hash table destruction will not start until - * after we unlock. Paired with inet_frags_exit_net(). + * after we unlock. Paired with fqdir_pre_exit(). */ - if (!fqdir->dead) { + if (!READ_ONCE(fqdir->dead)) { rhashtable_remove_fast(&fqdir->rhashtable, &fq->node, fqdir->f->rhash_params); refcount_dec(&fq->refcnt); @@ -352,9 +352,11 @@ static struct inet_frag_queue *inet_frag_create(struct fqdir *fqdir, /* TODO : call from rcu_read_lock() and no longer use refcount_inc_not_zero() */ struct inet_frag_queue *inet_frag_find(struct fqdir *fqdir, void *key) { + /* This pairs with WRITE_ONCE() in fqdir_pre_exit(). */ + long high_thresh = READ_ONCE(fqdir->high_thresh); struct inet_frag_queue *fq = NULL, *prev; - if (!fqdir->high_thresh || frag_mem_limit(fqdir) > fqdir->high_thresh) + if (!high_thresh || frag_mem_limit(fqdir) > high_thresh) return NULL; rcu_read_lock(); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index cfeb8890f94e..fad803d2d711 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -144,7 +144,8 @@ static void ip_expire(struct timer_list *t) rcu_read_lock(); - if (qp->q.fqdir->dead) + /* Paired with WRITE_ONCE() in fqdir_pre_exit(). */ + if (READ_ONCE(qp->q.fqdir->dead)) goto out_rcu_unlock; spin_lock(&qp->q.lock); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 2ac2b95c5694..99db2e41ed10 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -604,8 +604,9 @@ static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) key = &info->key; ip_tunnel_init_flow(&fl4, IPPROTO_GRE, key->u.ipv4.dst, key->u.ipv4.src, - tunnel_id_to_key32(key->tun_id), key->tos, 0, - skb->mark, skb_get_hash(skb)); + tunnel_id_to_key32(key->tun_id), + key->tos & ~INET_ECN_MASK, 0, skb->mark, + skb_get_hash(skb)); rt = ip_route_output_key(dev_net(dev), &fl4); if (IS_ERR(rt)) return PTR_ERR(rt); diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index a618dce7e0bc..c0b138c20992 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -956,7 +956,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst, fl4.saddr); } - if (rt->rt_type != RTN_UNICAST) { + if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) { ip_rt_put(rt); dev->stats.tx_carrier_errors++; goto tx_error_icmp; diff --git a/net/mctp/test/route-test.c b/net/mctp/test/route-test.c index 86ad15abf897..750f9f9b4daf 100644 --- a/net/mctp/test/route-test.c +++ b/net/mctp/test/route-test.c @@ -285,7 +285,7 @@ static void __mctp_route_test_init(struct kunit *test, struct mctp_test_route **rtp, struct socket **sockp) { - struct sockaddr_mctp addr; + struct sockaddr_mctp addr = {0}; struct mctp_test_route *rt; struct mctp_test_dev *dev; struct socket *sock; diff --git a/net/netfilter/nft_connlimit.c b/net/netfilter/nft_connlimit.c index 58dcafe8bf79..7d00a1452b1d 100644 --- a/net/netfilter/nft_connlimit.c +++ b/net/netfilter/nft_connlimit.c @@ -206,7 +206,7 @@ static int nft_connlimit_clone(struct nft_expr *dst, const struct nft_expr *src) struct nft_connlimit *priv_src = nft_expr_priv(src); priv_dst->list = kmalloc(sizeof(*priv_dst->list), GFP_ATOMIC); - if (priv_dst->list) + if (!priv_dst->list) return -ENOMEM; nf_conncount_list_init(priv_dst->list); diff --git a/net/netfilter/nft_last.c b/net/netfilter/nft_last.c index 5ee33d0ccd4e..4f745a409d34 100644 --- a/net/netfilter/nft_last.c +++ b/net/netfilter/nft_last.c @@ -106,7 +106,7 @@ static int nft_last_clone(struct nft_expr *dst, const struct nft_expr *src) struct nft_last_priv *priv_dst = nft_expr_priv(dst); priv_dst->last = kzalloc(sizeof(*priv_dst->last), GFP_ATOMIC); - if (priv_dst->last) + if (!priv_dst->last) return -ENOMEM; return 0; diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c index f04be5be73a0..c4f308460dd1 100644 --- a/net/netfilter/nft_limit.c +++ b/net/netfilter/nft_limit.c @@ -145,7 +145,7 @@ static int nft_limit_clone(struct nft_limit_priv *priv_dst, priv_dst->invert = priv_src->invert; priv_dst->limit = kmalloc(sizeof(*priv_dst->limit), GFP_ATOMIC); - if (priv_dst->limit) + if (!priv_dst->limit) return -ENOMEM; spin_lock_init(&priv_dst->limit->lock); diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c index 0484aef74273..f394a0b562f6 100644 --- a/net/netfilter/nft_quota.c +++ b/net/netfilter/nft_quota.c @@ -237,7 +237,7 @@ static int nft_quota_clone(struct nft_expr *dst, const struct nft_expr *src) struct nft_quota *priv_dst = nft_expr_priv(dst); priv_dst->consumed = kmalloc(sizeof(*priv_dst->consumed), GFP_ATOMIC); - if (priv_dst->consumed) + if (!priv_dst->consumed) return -ENOMEM; atomic64_set(priv_dst->consumed, 0); diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index 6cfd30fc0798..0b93a17b9f11 100644 --- a/net/nfc/llcp_sock.c +++ b/net/nfc/llcp_sock.c @@ -789,6 +789,11 @@ static int llcp_sock_sendmsg(struct socket *sock, struct msghdr *msg, lock_sock(sk); + if (!llcp_sock->local) { + release_sock(sk); + return -ENODEV; + } + if (sk->sk_type == SOCK_DGRAM) { DECLARE_SOCKADDR(struct sockaddr_nfc_llcp *, addr, msg->msg_name); diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index c9c6f49f9c28..2cb496c84878 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1062,7 +1062,7 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, qdisc_offload_graft_root(dev, new, old, extack); - if (new && new->ops->attach) + if (new && new->ops->attach && !ingress) goto skip; for (i = 0; i < num_q; i++) { diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index b07bd1c7330f..f893d9a81b01 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -1529,6 +1529,7 @@ void psched_ratecfg_precompute(struct psched_ratecfg *r, { memset(r, 0, sizeof(*r)); r->overhead = conf->overhead; + r->mpu = conf->mpu; r->rate_bytes_ps = max_t(u64, conf->rate, rate64); r->linklayer = (conf->linklayer & TC_LINKLAYER_MASK); psched_ratecfg_precompute__(r->rate_bytes_ps, &r->mult, &r->shift); diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index aa3bcaaeabf7..961854e56736 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -634,9 +634,13 @@ static void smc_conn_abort(struct smc_sock *smc, int local_first) { struct smc_connection *conn = &smc->conn; struct smc_link_group *lgr = conn->lgr; + bool lgr_valid = false; + + if (smc_conn_lgr_valid(conn)) + lgr_valid = true; smc_conn_free(conn); - if (local_first) + if (local_first && lgr_valid) smc_lgr_cleanup_early(lgr); } diff --git a/net/smc/smc.h b/net/smc/smc.h index 1a4fc1c6c4ab..3d0b8e300deb 100644 --- a/net/smc/smc.h +++ b/net/smc/smc.h @@ -221,6 +221,7 @@ struct smc_connection { */ u64 peer_token; /* SMC-D token of peer */ u8 killed : 1; /* abnormal termination */ + u8 freed : 1; /* normal termiation */ u8 out_of_sync : 1; /* out of sync with peer */ }; diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index 84c8a4374fdd..9d5a97168969 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -197,7 +197,8 @@ int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn) { int rc; - if (!conn->lgr || (conn->lgr->is_smcd && conn->lgr->peer_shutdown)) + if (!smc_conn_lgr_valid(conn) || + (conn->lgr->is_smcd && conn->lgr->peer_shutdown)) return -EPIPE; if (conn->lgr->is_smcd) { diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index 6be95a2a7b25..ce27399b38b1 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -774,7 +774,7 @@ int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info, u8 version) dclc.os_type = version == SMC_V1 ? 0 : SMC_CLC_OS_LINUX; dclc.hdr.typev2 = (peer_diag_info == SMC_CLC_DECL_SYNCERR) ? SMC_FIRST_CONTACT_MASK : 0; - if ((!smc->conn.lgr || !smc->conn.lgr->is_smcd) && + if ((!smc_conn_lgr_valid(&smc->conn) || !smc->conn.lgr->is_smcd) && smc_ib_is_valid_local_systemid()) memcpy(dclc.id_for_peer, local_systemid, sizeof(local_systemid)); diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 8935ef4811b0..29525d03b253 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -211,14 +211,13 @@ static void smc_lgr_unregister_conn(struct smc_connection *conn) { struct smc_link_group *lgr = conn->lgr; - if (!lgr) + if (!smc_conn_lgr_valid(conn)) return; write_lock_bh(&lgr->conns_lock); if (conn->alert_token_local) { __smc_lgr_unregister_conn(conn); } write_unlock_bh(&lgr->conns_lock); - conn->lgr = NULL; } int smc_nl_get_sys_info(struct sk_buff *skb, struct netlink_callback *cb) @@ -749,9 +748,12 @@ int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk, } get_device(&lnk->smcibdev->ibdev->dev); atomic_inc(&lnk->smcibdev->lnk_cnt); + refcount_set(&lnk->refcnt, 1); /* link refcnt is set to 1 */ + lnk->clearing = 0; lnk->path_mtu = lnk->smcibdev->pattr[lnk->ibport - 1].active_mtu; lnk->link_id = smcr_next_link_id(lgr); lnk->lgr = lgr; + smc_lgr_hold(lgr); /* lgr_put in smcr_link_clear() */ lnk->link_idx = link_idx; smc_ibdev_cnt_inc(lnk); smcr_copy_dev_info_to_link(lnk); @@ -806,6 +808,7 @@ out: lnk->state = SMC_LNK_UNUSED; if (!atomic_dec_return(&smcibdev->lnk_cnt)) wake_up(&smcibdev->lnks_deleted); + smc_lgr_put(lgr); /* lgr_hold above */ return rc; } @@ -844,6 +847,7 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini) lgr->terminating = 0; lgr->freeing = 0; lgr->vlan_id = ini->vlan_id; + refcount_set(&lgr->refcnt, 1); /* set lgr refcnt to 1 */ mutex_init(&lgr->sndbufs_lock); mutex_init(&lgr->rmbs_lock); rwlock_init(&lgr->conns_lock); @@ -996,8 +1000,12 @@ void smc_switch_link_and_count(struct smc_connection *conn, struct smc_link *to_lnk) { atomic_dec(&conn->lnk->conn_cnt); + /* link_hold in smc_conn_create() */ + smcr_link_put(conn->lnk); conn->lnk = to_lnk; atomic_inc(&conn->lnk->conn_cnt); + /* link_put in smc_conn_free() */ + smcr_link_hold(conn->lnk); } struct smc_link *smc_switch_conns(struct smc_link_group *lgr, @@ -1130,8 +1138,19 @@ void smc_conn_free(struct smc_connection *conn) { struct smc_link_group *lgr = conn->lgr; - if (!lgr) + if (!lgr || conn->freed) + /* Connection has never been registered in a + * link group, or has already been freed. + */ return; + + conn->freed = 1; + if (!smc_conn_lgr_valid(conn)) + /* Connection has already unregistered from + * link group. + */ + goto lgr_put; + if (lgr->is_smcd) { if (!list_empty(&lgr->list)) smc_ism_unset_conn(conn); @@ -1148,6 +1167,10 @@ void smc_conn_free(struct smc_connection *conn) if (!lgr->conns_num) smc_lgr_schedule_free_work(lgr); +lgr_put: + if (!lgr->is_smcd) + smcr_link_put(conn->lnk); /* link_hold in smc_conn_create() */ + smc_lgr_put(lgr); /* lgr_hold in smc_conn_create() */ } /* unregister a link from a buf_desc */ @@ -1203,21 +1226,11 @@ static void smcr_rtoken_clear_link(struct smc_link *lnk) } } -/* must be called under lgr->llc_conf_mutex lock */ -void smcr_link_clear(struct smc_link *lnk, bool log) +static void __smcr_link_clear(struct smc_link *lnk) { + struct smc_link_group *lgr = lnk->lgr; struct smc_ib_device *smcibdev; - if (!lnk->lgr || lnk->state == SMC_LNK_UNUSED) - return; - lnk->peer_qpn = 0; - smc_llc_link_clear(lnk, log); - smcr_buf_unmap_lgr(lnk); - smcr_rtoken_clear_link(lnk); - smc_ib_modify_qp_error(lnk); - smc_wr_free_link(lnk); - smc_ib_destroy_queue_pair(lnk); - smc_ib_dealloc_protection_domain(lnk); smc_wr_free_link_mem(lnk); smc_ibdev_cnt_dec(lnk); put_device(&lnk->smcibdev->ibdev->dev); @@ -1226,6 +1239,36 @@ void smcr_link_clear(struct smc_link *lnk, bool log) lnk->state = SMC_LNK_UNUSED; if (!atomic_dec_return(&smcibdev->lnk_cnt)) wake_up(&smcibdev->lnks_deleted); + smc_lgr_put(lgr); /* lgr_hold in smcr_link_init() */ +} + +/* must be called under lgr->llc_conf_mutex lock */ +void smcr_link_clear(struct smc_link *lnk, bool log) +{ + if (!lnk->lgr || lnk->clearing || + lnk->state == SMC_LNK_UNUSED) + return; + lnk->clearing = 1; + lnk->peer_qpn = 0; + smc_llc_link_clear(lnk, log); + smcr_buf_unmap_lgr(lnk); + smcr_rtoken_clear_link(lnk); + smc_ib_modify_qp_error(lnk); + smc_wr_free_link(lnk); + smc_ib_destroy_queue_pair(lnk); + smc_ib_dealloc_protection_domain(lnk); + smcr_link_put(lnk); /* theoretically last link_put */ +} + +void smcr_link_hold(struct smc_link *lnk) +{ + refcount_inc(&lnk->refcnt); +} + +void smcr_link_put(struct smc_link *lnk) +{ + if (refcount_dec_and_test(&lnk->refcnt)) + __smcr_link_clear(lnk); } static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb, @@ -1290,6 +1333,21 @@ static void smc_lgr_free_bufs(struct smc_link_group *lgr) __smc_lgr_free_bufs(lgr, true); } +/* won't be freed until no one accesses to lgr anymore */ +static void __smc_lgr_free(struct smc_link_group *lgr) +{ + smc_lgr_free_bufs(lgr); + if (lgr->is_smcd) { + if (!atomic_dec_return(&lgr->smcd->lgr_cnt)) + wake_up(&lgr->smcd->lgrs_deleted); + } else { + smc_wr_free_lgr_mem(lgr); + if (!atomic_dec_return(&lgr_cnt)) + wake_up(&lgrs_deleted); + } + kfree(lgr); +} + /* remove a link group */ static void smc_lgr_free(struct smc_link_group *lgr) { @@ -1305,19 +1363,23 @@ static void smc_lgr_free(struct smc_link_group *lgr) smc_llc_lgr_clear(lgr); } - smc_lgr_free_bufs(lgr); destroy_workqueue(lgr->tx_wq); if (lgr->is_smcd) { smc_ism_put_vlan(lgr->smcd, lgr->vlan_id); put_device(&lgr->smcd->dev); - if (!atomic_dec_return(&lgr->smcd->lgr_cnt)) - wake_up(&lgr->smcd->lgrs_deleted); - } else { - smc_wr_free_lgr_mem(lgr); - if (!atomic_dec_return(&lgr_cnt)) - wake_up(&lgrs_deleted); } - kfree(lgr); + smc_lgr_put(lgr); /* theoretically last lgr_put */ +} + +void smc_lgr_hold(struct smc_link_group *lgr) +{ + refcount_inc(&lgr->refcnt); +} + +void smc_lgr_put(struct smc_link_group *lgr) +{ + if (refcount_dec_and_test(&lgr->refcnt)) + __smc_lgr_free(lgr); } static void smc_sk_wake_ups(struct smc_sock *smc) @@ -1469,16 +1531,11 @@ void smc_smcd_terminate_all(struct smcd_dev *smcd) /* Called when an SMCR device is removed or the smc module is unloaded. * If smcibdev is given, all SMCR link groups using this device are terminated. * If smcibdev is NULL, all SMCR link groups are terminated. - * - * We must wait here for QPs been destroyed before we destroy the CQs, - * or we won't received any CQEs and cdc_pend_tx_wr cannot reach 0 thus - * smc_sock cannot be released. */ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev) { struct smc_link_group *lgr, *lg; LIST_HEAD(lgr_free_list); - LIST_HEAD(lgr_linkdown_list); int i; spin_lock_bh(&smc_lgr_list.lock); @@ -1490,7 +1547,7 @@ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev) list_for_each_entry_safe(lgr, lg, &smc_lgr_list.list, list) { for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { if (lgr->lnk[i].smcibdev == smcibdev) - list_move_tail(&lgr->list, &lgr_linkdown_list); + smcr_link_down_cond_sched(&lgr->lnk[i]); } } } @@ -1502,16 +1559,6 @@ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev) __smc_lgr_terminate(lgr, false); } - list_for_each_entry_safe(lgr, lg, &lgr_linkdown_list, list) { - for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { - if (lgr->lnk[i].smcibdev == smcibdev) { - mutex_lock(&lgr->llc_conf_mutex); - smcr_link_down_cond(&lgr->lnk[i]); - mutex_unlock(&lgr->llc_conf_mutex); - } - } - } - if (smcibdev) { if (atomic_read(&smcibdev->lnk_cnt)) wait_event(smcibdev->lnks_deleted, @@ -1856,6 +1903,10 @@ create: goto out; } } + smc_lgr_hold(conn->lgr); /* lgr_put in smc_conn_free() */ + if (!conn->lgr->is_smcd) + smcr_link_hold(conn->lnk); /* link_put in smc_conn_free() */ + conn->freed = 0; conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE; conn->local_tx_ctrl.len = SMC_WR_TX_SIZE; conn->urg_state = SMC_URG_READ; @@ -2240,14 +2291,16 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb) void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn) { - if (!conn->lgr || conn->lgr->is_smcd || !smc_link_active(conn->lnk)) + if (!smc_conn_lgr_valid(conn) || conn->lgr->is_smcd || + !smc_link_active(conn->lnk)) return; smc_ib_sync_sg_for_cpu(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE); } void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn) { - if (!conn->lgr || conn->lgr->is_smcd || !smc_link_active(conn->lnk)) + if (!smc_conn_lgr_valid(conn) || conn->lgr->is_smcd || + !smc_link_active(conn->lnk)) return; smc_ib_sync_sg_for_device(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE); } @@ -2256,7 +2309,7 @@ void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn) { int i; - if (!conn->lgr || conn->lgr->is_smcd) + if (!smc_conn_lgr_valid(conn) || conn->lgr->is_smcd) return; for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { if (!smc_link_active(&conn->lgr->lnk[i])) @@ -2270,7 +2323,7 @@ void smc_rmb_sync_sg_for_device(struct smc_connection *conn) { int i; - if (!conn->lgr || conn->lgr->is_smcd) + if (!smc_conn_lgr_valid(conn) || conn->lgr->is_smcd) return; for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { if (!smc_link_active(&conn->lgr->lnk[i])) diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index 521c64a3d8d3..4cb03e942364 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -137,6 +137,8 @@ struct smc_link { u8 peer_link_uid[SMC_LGR_ID_SIZE]; /* peer uid */ u8 link_idx; /* index in lgr link array */ u8 link_is_asym; /* is link asymmetric? */ + u8 clearing : 1; /* link is being cleared */ + refcount_t refcnt; /* link reference count */ struct smc_link_group *lgr; /* parent link group */ struct work_struct link_down_wrk; /* wrk to bring link down */ char ibname[IB_DEVICE_NAME_MAX]; /* ib device name */ @@ -249,6 +251,7 @@ struct smc_link_group { u8 terminating : 1;/* lgr is terminating */ u8 freeing : 1; /* lgr is being freed */ + refcount_t refcnt; /* lgr reference count */ bool is_smcd; /* SMC-R or SMC-D */ u8 smc_version; u8 negotiated_eid[SMC_MAX_EID_LEN]; @@ -409,6 +412,11 @@ static inline struct smc_connection *smc_lgr_find_conn( return res; } +static inline bool smc_conn_lgr_valid(struct smc_connection *conn) +{ + return conn->lgr && conn->alert_token_local; +} + /* * Returns true if the specified link is usable. * @@ -487,6 +495,8 @@ struct smc_clc_msg_accept_confirm; void smc_lgr_cleanup_early(struct smc_link_group *lgr); void smc_lgr_terminate_sched(struct smc_link_group *lgr); +void smc_lgr_hold(struct smc_link_group *lgr); +void smc_lgr_put(struct smc_link_group *lgr); void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport); void smcr_port_err(struct smc_ib_device *smcibdev, u8 ibport); void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, @@ -518,6 +528,8 @@ void smc_core_exit(void); int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk, u8 link_idx, struct smc_init_info *ini); void smcr_link_clear(struct smc_link *lnk, bool log); +void smcr_link_hold(struct smc_link *lnk); +void smcr_link_put(struct smc_link *lnk); void smc_switch_link_and_count(struct smc_connection *conn, struct smc_link *to_lnk); int smcr_buf_map_lgr(struct smc_link *lnk); diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c index 7c8dad28c18d..b8898c787d23 100644 --- a/net/smc/smc_diag.c +++ b/net/smc/smc_diag.c @@ -89,7 +89,7 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb, r->diag_state = sk->sk_state; if (smc->use_fallback) r->diag_mode = SMC_DIAG_MODE_FALLBACK_TCP; - else if (smc->conn.lgr && smc->conn.lgr->is_smcd) + else if (smc_conn_lgr_valid(&smc->conn) && smc->conn.lgr->is_smcd) r->diag_mode = SMC_DIAG_MODE_SMCD; else r->diag_mode = SMC_DIAG_MODE_SMCR; @@ -142,7 +142,7 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb, goto errout; } - if (smc->conn.lgr && !smc->conn.lgr->is_smcd && + if (smc_conn_lgr_valid(&smc->conn) && !smc->conn.lgr->is_smcd && (req->diag_ext & (1 << (SMC_DIAG_LGRINFO - 1))) && !list_empty(&smc->conn.lgr->list)) { struct smc_link *link = smc->conn.lnk; @@ -164,7 +164,7 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb, if (nla_put(skb, SMC_DIAG_LGRINFO, sizeof(linfo), &linfo) < 0) goto errout; } - if (smc->conn.lgr && smc->conn.lgr->is_smcd && + if (smc_conn_lgr_valid(&smc->conn) && smc->conn.lgr->is_smcd && (req->diag_ext & (1 << (SMC_DIAG_DMBINFO - 1))) && !list_empty(&smc->conn.lgr->list)) { struct smc_connection *conn = &smc->conn; diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index db9825c01e0a..291f1484a1b7 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -369,7 +369,8 @@ static int smc_pnet_add_eth(struct smc_pnettable *pnettable, struct net *net, memcpy(new_pe->pnet_name, pnet_name, SMC_MAX_PNETID_LEN); strncpy(new_pe->eth_name, eth_name, IFNAMSIZ); new_pe->ndev = ndev; - netdev_tracker_alloc(ndev, &new_pe->dev_tracker, GFP_KERNEL); + if (ndev) + netdev_tracker_alloc(ndev, &new_pe->dev_tracker, GFP_KERNEL); rc = -EEXIST; new_netdev = true; write_lock(&pnettable->lock); diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h index 47512ccce5ef..a54e90a1110f 100644 --- a/net/smc/smc_wr.h +++ b/net/smc/smc_wr.h @@ -125,10 +125,6 @@ int smc_wr_tx_v2_send(struct smc_link *link, int smc_wr_tx_send_wait(struct smc_link *link, struct smc_wr_tx_pend_priv *priv, unsigned long timeout); void smc_wr_tx_cq_handler(struct ib_cq *ib_cq, void *cq_context); -void smc_wr_tx_dismiss_slots(struct smc_link *lnk, u8 wr_rx_hdr_type, - smc_wr_tx_filter filter, - smc_wr_tx_dismisser dismisser, - unsigned long data); void smc_wr_tx_wait_no_pending_sends(struct smc_link *link); int smc_wr_rx_register_handler(struct smc_wr_rx_handler *handler); diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 95e774f1b91f..efc84845bb6b 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -2059,6 +2059,7 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, splice_read_end: release_sock(sk); + sk_defer_free_flush(sk); return copied ? : err; } diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 12e2ddaf887f..d45d5366115a 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -192,8 +192,11 @@ void wait_for_unix_gc(void) { /* If number of inflight sockets is insane, * force a garbage collect right now. + * Paired with the WRITE_ONCE() in unix_inflight(), + * unix_notinflight() and gc_in_progress(). */ - if (unix_tot_inflight > UNIX_INFLIGHT_TRIGGER_GC && !gc_in_progress) + if (READ_ONCE(unix_tot_inflight) > UNIX_INFLIGHT_TRIGGER_GC && + !READ_ONCE(gc_in_progress)) unix_gc(); wait_event(unix_gc_wait, gc_in_progress == false); } @@ -213,7 +216,9 @@ void unix_gc(void) if (gc_in_progress) goto out; - gc_in_progress = true; + /* Paired with READ_ONCE() in wait_for_unix_gc(). */ + WRITE_ONCE(gc_in_progress, true); + /* First, select candidates for garbage collection. Only * in-flight sockets are considered, and from those only ones * which don't have any external reference. @@ -299,7 +304,10 @@ void unix_gc(void) /* All candidates should have been detached by now. */ BUG_ON(!list_empty(&gc_candidates)); - gc_in_progress = false; + + /* Paired with READ_ONCE() in wait_for_unix_gc(). */ + WRITE_ONCE(gc_in_progress, false); + wake_up(&unix_gc_wait); out: diff --git a/net/unix/scm.c b/net/unix/scm.c index 052ae709ce28..aa27a02478dc 100644 --- a/net/unix/scm.c +++ b/net/unix/scm.c @@ -60,7 +60,8 @@ void unix_inflight(struct user_struct *user, struct file *fp) } else { BUG_ON(list_empty(&u->link)); } - unix_tot_inflight++; + /* Paired with READ_ONCE() in wait_for_unix_gc() */ + WRITE_ONCE(unix_tot_inflight, unix_tot_inflight + 1); } user->unix_inflight++; spin_unlock(&unix_gc_lock); @@ -80,7 +81,8 @@ void unix_notinflight(struct user_struct *user, struct file *fp) if (atomic_long_dec_and_test(&u->inflight)) list_del_init(&u->link); - unix_tot_inflight--; + /* Paired with READ_ONCE() in wait_for_unix_gc() */ + WRITE_ONCE(unix_tot_inflight, unix_tot_inflight - 1); } user->unix_inflight--; spin_unlock(&unix_gc_lock); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index dccb8f3318ef..04d1ce9b510f 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -3295,7 +3296,7 @@ decode_session4(struct sk_buff *skb, struct flowi *fl, bool reverse) fl4->flowi4_proto = iph->protocol; fl4->daddr = reverse ? iph->saddr : iph->daddr; fl4->saddr = reverse ? iph->daddr : iph->saddr; - fl4->flowi4_tos = iph->tos; + fl4->flowi4_tos = iph->tos & ~INET_ECN_MASK; if (!ip_is_fragment(iph)) { switch (iph->protocol) { diff --git a/tools/testing/selftests/bpf/prog_tests/d_path.c b/tools/testing/selftests/bpf/prog_tests/d_path.c index 32fc5b3b5cf6..911345c526e6 100644 --- a/tools/testing/selftests/bpf/prog_tests/d_path.c +++ b/tools/testing/selftests/bpf/prog_tests/d_path.c @@ -10,6 +10,7 @@ #include "test_d_path.skel.h" #include "test_d_path_check_rdonly_mem.skel.h" +#include "test_d_path_check_types.skel.h" static int duration; @@ -167,6 +168,16 @@ static void test_d_path_check_rdonly_mem(void) test_d_path_check_rdonly_mem__destroy(skel); } +static void test_d_path_check_types(void) +{ + struct test_d_path_check_types *skel; + + skel = test_d_path_check_types__open_and_load(); + ASSERT_ERR_PTR(skel, "unexpected_load_passing_wrong_type"); + + test_d_path_check_types__destroy(skel); +} + void test_d_path(void) { if (test__start_subtest("basic")) @@ -174,4 +185,7 @@ void test_d_path(void) if (test__start_subtest("check_rdonly_mem")) test_d_path_check_rdonly_mem(); + + if (test__start_subtest("check_alloc_mem")) + test_d_path_check_types(); } diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_link.c b/tools/testing/selftests/bpf/prog_tests/xdp_link.c index 983ab0b47d30..b2b357f8c74c 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_link.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_link.c @@ -8,46 +8,47 @@ void serial_test_xdp_link(void) { - __u32 duration = 0, id1, id2, id0 = 0, prog_fd1, prog_fd2, err; DECLARE_LIBBPF_OPTS(bpf_xdp_set_link_opts, opts, .old_fd = -1); struct test_xdp_link *skel1 = NULL, *skel2 = NULL; + __u32 id1, id2, id0 = 0, prog_fd1, prog_fd2; struct bpf_link_info link_info; struct bpf_prog_info prog_info; struct bpf_link *link; + int err; __u32 link_info_len = sizeof(link_info); __u32 prog_info_len = sizeof(prog_info); skel1 = test_xdp_link__open_and_load(); - if (CHECK(!skel1, "skel_load", "skeleton open and load failed\n")) + if (!ASSERT_OK_PTR(skel1, "skel_load")) goto cleanup; prog_fd1 = bpf_program__fd(skel1->progs.xdp_handler); skel2 = test_xdp_link__open_and_load(); - if (CHECK(!skel2, "skel_load", "skeleton open and load failed\n")) + if (!ASSERT_OK_PTR(skel2, "skel_load")) goto cleanup; prog_fd2 = bpf_program__fd(skel2->progs.xdp_handler); memset(&prog_info, 0, sizeof(prog_info)); err = bpf_obj_get_info_by_fd(prog_fd1, &prog_info, &prog_info_len); - if (CHECK(err, "fd_info1", "failed %d\n", -errno)) + if (!ASSERT_OK(err, "fd_info1")) goto cleanup; id1 = prog_info.id; memset(&prog_info, 0, sizeof(prog_info)); err = bpf_obj_get_info_by_fd(prog_fd2, &prog_info, &prog_info_len); - if (CHECK(err, "fd_info2", "failed %d\n", -errno)) + if (!ASSERT_OK(err, "fd_info2")) goto cleanup; id2 = prog_info.id; /* set initial prog attachment */ err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, prog_fd1, XDP_FLAGS_REPLACE, &opts); - if (CHECK(err, "fd_attach", "initial prog attach failed: %d\n", err)) + if (!ASSERT_OK(err, "fd_attach")) goto cleanup; /* validate prog ID */ err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0); - CHECK(err || id0 != id1, "id1_check", - "loaded prog id %u != id1 %u, err %d", id0, id1, err); + if (!ASSERT_OK(err, "id1_check_err") || !ASSERT_EQ(id0, id1, "id1_check_val")) + goto cleanup; /* BPF link is not allowed to replace prog attachment */ link = bpf_program__attach_xdp(skel1->progs.xdp_handler, IFINDEX_LO); @@ -62,7 +63,7 @@ void serial_test_xdp_link(void) /* detach BPF program */ opts.old_fd = prog_fd1; err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, -1, XDP_FLAGS_REPLACE, &opts); - if (CHECK(err, "prog_detach", "failed %d\n", err)) + if (!ASSERT_OK(err, "prog_detach")) goto cleanup; /* now BPF link should attach successfully */ @@ -73,24 +74,23 @@ void serial_test_xdp_link(void) /* validate prog ID */ err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0); - if (CHECK(err || id0 != id1, "id1_check", - "loaded prog id %u != id1 %u, err %d", id0, id1, err)) + if (!ASSERT_OK(err, "id1_check_err") || !ASSERT_EQ(id0, id1, "id1_check_val")) goto cleanup; /* BPF prog attach is not allowed to replace BPF link */ opts.old_fd = prog_fd1; err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, prog_fd2, XDP_FLAGS_REPLACE, &opts); - if (CHECK(!err, "prog_attach_fail", "unexpected success\n")) + if (!ASSERT_ERR(err, "prog_attach_fail")) goto cleanup; /* Can't force-update when BPF link is active */ err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd2, 0); - if (CHECK(!err, "prog_update_fail", "unexpected success\n")) + if (!ASSERT_ERR(err, "prog_update_fail")) goto cleanup; /* Can't force-detach when BPF link is active */ err = bpf_set_link_xdp_fd(IFINDEX_LO, -1, 0); - if (CHECK(!err, "prog_detach_fail", "unexpected success\n")) + if (!ASSERT_ERR(err, "prog_detach_fail")) goto cleanup; /* BPF link is not allowed to replace another BPF link */ @@ -110,40 +110,39 @@ void serial_test_xdp_link(void) skel2->links.xdp_handler = link; err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0); - if (CHECK(err || id0 != id2, "id2_check", - "loaded prog id %u != id2 %u, err %d", id0, id1, err)) + if (!ASSERT_OK(err, "id2_check_err") || !ASSERT_EQ(id0, id2, "id2_check_val")) goto cleanup; /* updating program under active BPF link works as expected */ err = bpf_link__update_program(link, skel1->progs.xdp_handler); - if (CHECK(err, "link_upd", "failed: %d\n", err)) + if (!ASSERT_OK(err, "link_upd")) goto cleanup; memset(&link_info, 0, sizeof(link_info)); err = bpf_obj_get_info_by_fd(bpf_link__fd(link), &link_info, &link_info_len); - if (CHECK(err, "link_info", "failed: %d\n", err)) + if (!ASSERT_OK(err, "link_info")) goto cleanup; - CHECK(link_info.type != BPF_LINK_TYPE_XDP, "link_type", - "got %u != exp %u\n", link_info.type, BPF_LINK_TYPE_XDP); - CHECK(link_info.prog_id != id1, "link_prog_id", - "got %u != exp %u\n", link_info.prog_id, id1); - CHECK(link_info.xdp.ifindex != IFINDEX_LO, "link_ifindex", - "got %u != exp %u\n", link_info.xdp.ifindex, IFINDEX_LO); + ASSERT_EQ(link_info.type, BPF_LINK_TYPE_XDP, "link_type"); + ASSERT_EQ(link_info.prog_id, id1, "link_prog_id"); + ASSERT_EQ(link_info.xdp.ifindex, IFINDEX_LO, "link_ifindex"); + + /* updating program under active BPF link with different type fails */ + err = bpf_link__update_program(link, skel1->progs.tc_handler); + if (!ASSERT_ERR(err, "link_upd_invalid")) + goto cleanup; err = bpf_link__detach(link); - if (CHECK(err, "link_detach", "failed %d\n", err)) + if (!ASSERT_OK(err, "link_detach")) goto cleanup; memset(&link_info, 0, sizeof(link_info)); err = bpf_obj_get_info_by_fd(bpf_link__fd(link), &link_info, &link_info_len); - if (CHECK(err, "link_info", "failed: %d\n", err)) - goto cleanup; - CHECK(link_info.prog_id != id1, "link_prog_id", - "got %u != exp %u\n", link_info.prog_id, id1); + + ASSERT_OK(err, "link_info"); + ASSERT_EQ(link_info.prog_id, id1, "link_prog_id"); /* ifindex should be zeroed out */ - CHECK(link_info.xdp.ifindex != 0, "link_ifindex", - "got %u != exp %u\n", link_info.xdp.ifindex, 0); + ASSERT_EQ(link_info.xdp.ifindex, 0, "link_ifindex"); cleanup: test_xdp_link__destroy(skel1); diff --git a/tools/testing/selftests/bpf/progs/test_d_path_check_types.c b/tools/testing/selftests/bpf/progs/test_d_path_check_types.c new file mode 100644 index 000000000000..7e02b7361307 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_d_path_check_types.c @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "vmlinux.h" +#include +#include + +extern const int bpf_prog_active __ksym; + +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); + __uint(max_entries, 1 << 12); +} ringbuf SEC(".maps"); + +SEC("fentry/security_inode_getattr") +int BPF_PROG(d_path_check_rdonly_mem, struct path *path, struct kstat *stat, + __u32 request_mask, unsigned int query_flags) +{ + void *active; + u32 cpu; + + cpu = bpf_get_smp_processor_id(); + active = (void *)bpf_per_cpu_ptr(&bpf_prog_active, cpu); + if (active) { + /* FAIL here! 'active' points to 'regular' memory. It + * cannot be submitted to ring buffer. + */ + bpf_ringbuf_submit(active, 0); + } + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_link.c b/tools/testing/selftests/bpf/progs/test_xdp_link.c index ee7d6ac0f615..64ff32eaae92 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_link.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_link.c @@ -10,3 +10,9 @@ int xdp_handler(struct xdp_md *xdp) { return 0; } + +SEC("tc") +int tc_handler(struct __sk_buff *skb) +{ + return 0; +} diff --git a/tools/testing/selftests/bpf/verifier/ringbuf.c b/tools/testing/selftests/bpf/verifier/ringbuf.c new file mode 100644 index 000000000000..b64d33e4833c --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/ringbuf.c @@ -0,0 +1,95 @@ +{ + "ringbuf: invalid reservation offset 1", + .insns = { + /* reserve 8 byte ringbuf memory */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_2, 8), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_reserve), + /* store a pointer to the reserved memory in R6 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + /* check whether the reservation was successful */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), + /* spill R6(mem) into the stack */ + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, -8), + /* fill it back in R7 */ + BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_10, -8), + /* should be able to access *(R7) = 0 */ + BPF_ST_MEM(BPF_DW, BPF_REG_7, 0, 0), + /* submit the reserved ringbuf memory */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + /* add invalid offset to reserved ringbuf memory */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xcafe), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_submit), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_ringbuf = { 1 }, + .result = REJECT, + .errstr = "dereference of modified alloc_mem ptr R1", +}, +{ + "ringbuf: invalid reservation offset 2", + .insns = { + /* reserve 8 byte ringbuf memory */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_2, 8), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_reserve), + /* store a pointer to the reserved memory in R6 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + /* check whether the reservation was successful */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), + /* spill R6(mem) into the stack */ + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, -8), + /* fill it back in R7 */ + BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_10, -8), + /* add invalid offset to reserved ringbuf memory */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, 0xcafe), + /* should be able to access *(R7) = 0 */ + BPF_ST_MEM(BPF_DW, BPF_REG_7, 0, 0), + /* submit the reserved ringbuf memory */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_submit), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_ringbuf = { 1 }, + .result = REJECT, + .errstr = "R7 min value is outside of the allowed memory range", +}, +{ + "ringbuf: check passing rb mem to helpers", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + /* reserve 8 byte ringbuf memory */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_2, 8), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_reserve), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + /* check whether the reservation was successful */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + /* pass allocated ring buffer memory to fib lookup */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_3, 8), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_fib_lookup), + /* submit the ringbuf memory */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_submit), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_ringbuf = { 2 }, + .prog_type = BPF_PROG_TYPE_XDP, + .result = ACCEPT, +}, diff --git a/tools/testing/selftests/bpf/verifier/spill_fill.c b/tools/testing/selftests/bpf/verifier/spill_fill.c index 1a8eb9672bd1..8cfc5349d2a8 100644 --- a/tools/testing/selftests/bpf/verifier/spill_fill.c +++ b/tools/testing/selftests/bpf/verifier/spill_fill.c @@ -84,7 +84,7 @@ }, .fixup_map_ringbuf = { 1 }, .result = REJECT, - .errstr = "R0 pointer arithmetic on mem_or_null prohibited", + .errstr = "R0 pointer arithmetic on alloc_mem_or_null prohibited", }, { "check corrupted spill/fill", diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh index 412d85205546..3f4c8cfe7aca 100755 --- a/tools/testing/selftests/net/fcnal-test.sh +++ b/tools/testing/selftests/net/fcnal-test.sh @@ -4059,6 +4059,9 @@ usage: ${0##*/} OPTS -p Pause on fail -P Pause after each test -v Be verbose + +Tests: + $TESTS_IPV4 $TESTS_IPV6 $TESTS_OTHER EOF } diff --git a/tools/testing/selftests/net/settings b/tools/testing/selftests/net/settings index 694d70710ff0..dfc27cdc6c05 100644 --- a/tools/testing/selftests/net/settings +++ b/tools/testing/selftests/net/settings @@ -1 +1 @@ -timeout=300 +timeout=1500