diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index 6b5e31f470dc..e916c1eddf1b 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -86,12 +86,28 @@ Description: The unit size is one block, now only support configuring in range of [1, 512]. +What: /sys/fs/f2fs//umount_discard_timeout +Date: January 2019 +Contact: "Jaegeuk Kim" +Description: + Set timeout to issue discard commands during umount. + Default: 5 secs + What: /sys/fs/f2fs//max_victim_search Date: January 2014 Contact: "Jaegeuk Kim" Description: Controls the number of trials to find a victim segment. +What: /sys/fs/f2fs//migration_granularity +Date: October 2018 +Contact: "Chao Yu" +Description: + Controls migration granularity of garbage collection on large + section, it can let GC move partial segment{s} of one section + in one GC cycle, so that dispersing heavy overhead GC to + multiple lightweight one. + What: /sys/fs/f2fs//dir_level Date: March 2014 Contact: "Jaegeuk Kim" diff --git a/Documentation/Makefile b/Documentation/Makefile index fc759598c4c9..59d516b7afcb 100644 --- a/Documentation/Makefile +++ b/Documentation/Makefile @@ -1,4 +1,3 @@ subdir-y := accounting auxdisplay blackfin connector \ filesystems filesystems ia64 laptops misc-devices \ - networking pcmcia prctl ptp spi timers vDSO video4linux \ - watchdog + pcmcia prctl ptp spi timers vDSO video4linux watchdog diff --git a/Documentation/arm/kernel_mode_neon.txt b/Documentation/arm/kernel_mode_neon.txt index 525452726d31..b9e060c5b61e 100644 --- a/Documentation/arm/kernel_mode_neon.txt +++ b/Documentation/arm/kernel_mode_neon.txt @@ -6,7 +6,7 @@ TL;DR summary * Use only NEON instructions, or VFP instructions that don't rely on support code * Isolate your NEON code in a separate compilation unit, and compile it with - '-mfpu=neon -mfloat-abi=softfp' + '-march=armv7-a -mfpu=neon -mfloat-abi=softfp' * Put kernel_neon_begin() and kernel_neon_end() calls around the calls into your NEON code * Don't sleep in your NEON code, and be aware that it will be executed with @@ -87,7 +87,7 @@ instructions appearing in unexpected places if no special care is taken. Therefore, the recommended and only supported way of using NEON/VFP in the kernel is by adhering to the following rules: * isolate the NEON code in a separate compilation unit and compile it with - '-mfpu=neon -mfloat-abi=softfp'; + '-march=armv7-a -mfpu=neon -mfloat-abi=softfp'; * issue the calls to kernel_neon_begin(), kernel_neon_end() as well as the calls into the unit containing the NEON code from a compilation unit which is *not* built with the GCC flag '-mfpu=neon' set. diff --git a/Documentation/arm64/silicon-errata.txt b/Documentation/arm64/silicon-errata.txt index 58b71ddf9b60..ba4b6acfc545 100644 --- a/Documentation/arm64/silicon-errata.txt +++ b/Documentation/arm64/silicon-errata.txt @@ -56,3 +56,4 @@ stable kernels. | | | | | | Cavium | ThunderX ITS | #22375, #24313 | CAVIUM_ERRATUM_22375 | | Cavium | ThunderX GICv3 | #23154 | CAVIUM_ERRATUM_23154 | +| Cavium | ThunderX Core | #27456 | CAVIUM_ERRATUM_27456 | diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index 348d80440433..564ccc6c7bad 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -126,6 +126,8 @@ disable_ext_identify Disable the extension list configured by mkfs, so f2fs does not aware of cold files such as media files. inline_xattr Enable the inline xattrs feature. noinline_xattr Disable the inline xattrs feature. +inline_xattr_size=%u Support configuring inline xattr size, it depends on + flexible inline xattr feature. inline_data Enable the inline data feature: New created small(<~3.4k) files can be written into inode block. inline_dentry Enable the inline dir feature: data in new created diff --git a/Documentation/filesystems/overlayfs.txt b/Documentation/filesystems/overlayfs.txt index 28091457b71a..771bb220449b 100644 --- a/Documentation/filesystems/overlayfs.txt +++ b/Documentation/filesystems/overlayfs.txt @@ -82,6 +82,29 @@ Only the lists of names from directories are merged. Other content such as metadata and extended attributes are reported for the upper directory only. These attributes of the lower directory are hidden. +credentials +----------- + +By default, all access to the upper, lower and work directories is the +recorded mounter's MAC and DAC credentials. The incoming accesses are +checked against the caller's credentials. + +In the case where caller MAC or DAC credentials do not overlap, a +use case available in older versions of the driver, the +override_creds mount flag can be turned off and help when the use +pattern has caller with legitimate credentials where the mounter +does not. Several unintended side effects will occur though. The +caller without certain key capabilities or lower privilege will not +always be able to delete files or directories, create nodes, or +search some restricted directories. The ability to search and read +a directory entry is spotty as a result of the cache mechanism not +retesting the credentials because of the assumption, a privileged +caller can fill cache, then a lower privilege can read the directory +cache. The uneven security model where cache, upperdir and workdir +are opened at privilege, but accessed without creating a form of +privilege escalation, should only be used with strict understanding +of the side effects and of the security policies. + whiteouts and opaque directories -------------------------------- diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index d157847f769d..9b8aad3eb3b8 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -470,7 +470,9 @@ manner. The codes are the following: Note that there is no guarantee that every flag and associated mnemonic will be present in all further kernel releases. Things get changed, the flags may -be vanished or the reverse -- new added. +be vanished or the reverse -- new added. Interpretation of their meaning +might change in future as well. So each consumer of these flags has to +follow each specific kernel version for the exact semantic. The "Name" field will only be present on a mapping that has been named by userspace, and will show the name passed in by userspace. diff --git a/Documentation/networking/Makefile b/Documentation/networking/Makefile deleted file mode 100644 index 4c5d7c485439..000000000000 --- a/Documentation/networking/Makefile +++ /dev/null @@ -1 +0,0 @@ -subdir-y := timestamping diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 5f1ea84ed72b..c3e905163637 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -112,14 +112,11 @@ min_adv_mss - INTEGER IP Fragmentation: -ipfrag_high_thresh - INTEGER - Maximum memory used to reassemble IP fragments. When - ipfrag_high_thresh bytes of memory is allocated for this purpose, - the fragment handler will toss packets until ipfrag_low_thresh - is reached. This also serves as a maximum limit to namespaces - different from the initial one. +ipfrag_high_thresh - LONG INTEGER + Maximum memory used to reassemble IP fragments. -ipfrag_low_thresh - INTEGER +ipfrag_low_thresh - LONG INTEGER + (Obsolete since linux-4.4.174, backported from linux-4.17) Maximum memory used to reassemble IP fragments before the kernel begins to remove incomplete fragment queues to free up resources. The kernel still accepts new fragments for defragmentation. diff --git a/Documentation/networking/timestamping/Makefile b/Documentation/networking/timestamping/Makefile deleted file mode 100644 index 8c20dfaa4d6e..000000000000 --- a/Documentation/networking/timestamping/Makefile +++ /dev/null @@ -1,14 +0,0 @@ -# To compile, from the source root -# -# make headers_install -# make M=documentation - -# List of programs to build -hostprogs-y := hwtstamp_config timestamping txtimestamp - -# Tell kbuild to always build the programs -always := $(hostprogs-y) - -HOSTCFLAGS_timestamping.o += -I$(objtree)/usr/include -HOSTCFLAGS_txtimestamp.o += -I$(objtree)/usr/include -HOSTCFLAGS_hwtstamp_config.o += -I$(objtree)/usr/include diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index df8ab4fc240a..496673adcb6b 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -13,7 +13,7 @@ of a virtual machine. The ioctls belong to three classes - VM ioctls: These query and set attributes that affect an entire virtual machine, for example memory layout. In addition a VM ioctl is used to - create virtual cpus (vcpus). + create virtual cpus (vcpus) and devices. Only run VM ioctls from the same process (address space) that was used to create the VM. @@ -24,6 +24,11 @@ of a virtual machine. The ioctls belong to three classes Only run vcpu ioctls from the same thread that was used to create the vcpu. + - device ioctls: These query and set attributes that control the operation + of a single device. + + device ioctls must be issued from the same process (address space) that + was used to create the VM. 2. File descriptors ------------------- @@ -32,10 +37,11 @@ The kvm API is centered around file descriptors. An initial open("/dev/kvm") obtains a handle to the kvm subsystem; this handle can be used to issue system ioctls. A KVM_CREATE_VM ioctl on this handle will create a VM file descriptor which can be used to issue VM -ioctls. A KVM_CREATE_VCPU ioctl on a VM fd will create a virtual cpu -and return a file descriptor pointing to it. Finally, ioctls on a vcpu -fd can be used to control the vcpu, including the important task of -actually running guest code. +ioctls. A KVM_CREATE_VCPU or KVM_CREATE_DEVICE ioctl on a VM fd will +create a virtual cpu or device and return a file descriptor pointing to +the new resource. Finally, ioctls on a vcpu or device fd can be used +to control the vcpu or device. For vcpus, this includes the important +task of actually running guest code. In general file descriptors can be migrated among processes by means of fork() and the SCM_RIGHTS facility of unix domain socket. These diff --git a/MAINTAINERS b/MAINTAINERS index d09e4c9cf8f2..9568c6aa622f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11484,6 +11484,19 @@ S: Maintained F: drivers/media/v4l2-core/videobuf2-* F: include/media/videobuf2-* +VIRTIO AND VHOST VSOCK DRIVER +M: Stefan Hajnoczi +L: kvm@vger.kernel.org +L: virtualization@lists.linux-foundation.org +L: netdev@vger.kernel.org +S: Maintained +F: include/linux/virtio_vsock.h +F: include/uapi/linux/virtio_vsock.h +F: net/vmw_vsock/virtio_transport_common.c +F: net/vmw_vsock/virtio_transport.c +F: drivers/vhost/vsock.c +F: drivers/vhost/vsock.h + VIRTUAL SERIO DEVICE DRIVER M: Stephen Chandler Paul S: Maintained diff --git a/Makefile b/Makefile index 9fc62e6bb88c..093eebd8efac 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 4 -SUBLEVEL = 167 +SUBLEVEL = 179 EXTRAVERSION = NAME = Blurry Fish Butt @@ -639,7 +639,7 @@ CLANG_TARGET := --target=$(notdir $(CLANG_TRIPLE:%-=%)) ifeq ($(shell $(srctree)/scripts/clang-android.sh $(CC) $(CLANG_TARGET)), y) $(error "Clang with Android --target detected. Did you specify CLANG_TRIPLE?") endif -GCC_TOOLCHAIN_DIR := $(dir $(shell which $(LD))) +GCC_TOOLCHAIN_DIR := $(dir $(shell which $(CROSS_COMPILE)elfedit)) CLANG_PREFIX := --prefix=$(GCC_TOOLCHAIN_DIR) GCC_TOOLCHAIN := $(realpath $(GCC_TOOLCHAIN_DIR)/..) endif @@ -668,7 +668,7 @@ KBUILD_CFLAGS += $(call cc-disable-warning, int-in-bool-context) KBUILD_CFLAGS += $(call cc-disable-warning, attribute-alias) ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE -KBUILD_CFLAGS += $(call cc-option,-Oz,-Os) +KBUILD_CFLAGS += -Os else ifdef CONFIG_PROFILE_ALL_BRANCHES KBUILD_CFLAGS += -O2 diff --git a/arch/alpha/include/asm/irq.h b/arch/alpha/include/asm/irq.h index 06377400dc09..469642801a68 100644 --- a/arch/alpha/include/asm/irq.h +++ b/arch/alpha/include/asm/irq.h @@ -55,15 +55,15 @@ #elif defined(CONFIG_ALPHA_DP264) || \ defined(CONFIG_ALPHA_LYNX) || \ - defined(CONFIG_ALPHA_SHARK) || \ - defined(CONFIG_ALPHA_EIGER) + defined(CONFIG_ALPHA_SHARK) # define NR_IRQS 64 #elif defined(CONFIG_ALPHA_TITAN) #define NR_IRQS 80 #elif defined(CONFIG_ALPHA_RAWHIDE) || \ - defined(CONFIG_ALPHA_TAKARA) + defined(CONFIG_ALPHA_TAKARA) || \ + defined(CONFIG_ALPHA_EIGER) # define NR_IRQS 128 #elif defined(CONFIG_ALPHA_WILDFIRE) diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c index 4a905bd667e2..0f68f0de9b5e 100644 --- a/arch/alpha/mm/fault.c +++ b/arch/alpha/mm/fault.c @@ -77,7 +77,7 @@ __load_new_mm_context(struct mm_struct *next_mm) /* Macro for exception fixup code to access integer registers. */ #define dpf_reg(r) \ (((unsigned long *)regs)[(r) <= 8 ? (r) : (r) <= 15 ? (r)-16 : \ - (r) <= 18 ? (r)+8 : (r)-10]) + (r) <= 18 ? (r)+10 : (r)-10]) asmlinkage void do_page_fault(unsigned long address, unsigned long mmcsr, diff --git a/arch/arc/include/asm/bitops.h b/arch/arc/include/asm/bitops.h index 0352fb8d21b9..9623ae002f5b 100644 --- a/arch/arc/include/asm/bitops.h +++ b/arch/arc/include/asm/bitops.h @@ -286,7 +286,7 @@ static inline __attribute__ ((const)) int __fls(unsigned long x) /* * __ffs: Similar to ffs, but zero based (0-31) */ -static inline __attribute__ ((const)) int __ffs(unsigned long word) +static inline __attribute__ ((const)) unsigned long __ffs(unsigned long word) { if (!word) return word; @@ -346,9 +346,9 @@ static inline __attribute__ ((const)) int ffs(unsigned long x) /* * __ffs: Similar to ffs, but zero based (0-31) */ -static inline __attribute__ ((const)) int __ffs(unsigned long x) +static inline __attribute__ ((const)) unsigned long __ffs(unsigned long x) { - int n; + unsigned long n; asm volatile( " ffs.f %0, %1 \n" /* 0:31; 31(Z) if src 0 */ diff --git a/arch/arc/include/asm/io.h b/arch/arc/include/asm/io.h index cb69299a492e..f120d823e8c2 100644 --- a/arch/arc/include/asm/io.h +++ b/arch/arc/include/asm/io.h @@ -12,6 +12,7 @@ #include #include #include +#include #ifdef CONFIG_ISA_ARCV2 #include @@ -85,6 +86,42 @@ static inline u32 __raw_readl(const volatile void __iomem *addr) return w; } +/* + * {read,write}s{b,w,l}() repeatedly access the same IO address in + * native endianness in 8-, 16-, 32-bit chunks {into,from} memory, + * @count times + */ +#define __raw_readsx(t,f) \ +static inline void __raw_reads##f(const volatile void __iomem *addr, \ + void *ptr, unsigned int count) \ +{ \ + bool is_aligned = ((unsigned long)ptr % ((t) / 8)) == 0; \ + u##t *buf = ptr; \ + \ + if (!count) \ + return; \ + \ + /* Some ARC CPU's don't support unaligned accesses */ \ + if (is_aligned) { \ + do { \ + u##t x = __raw_read##f(addr); \ + *buf++ = x; \ + } while (--count); \ + } else { \ + do { \ + u##t x = __raw_read##f(addr); \ + put_unaligned(x, buf++); \ + } while (--count); \ + } \ +} + +#define __raw_readsb __raw_readsb +__raw_readsx(8, b) +#define __raw_readsw __raw_readsw +__raw_readsx(16, w) +#define __raw_readsl __raw_readsl +__raw_readsx(32, l) + #define __raw_writeb __raw_writeb static inline void __raw_writeb(u8 b, volatile void __iomem *addr) { @@ -117,6 +154,35 @@ static inline void __raw_writel(u32 w, volatile void __iomem *addr) } +#define __raw_writesx(t,f) \ +static inline void __raw_writes##f(volatile void __iomem *addr, \ + const void *ptr, unsigned int count) \ +{ \ + bool is_aligned = ((unsigned long)ptr % ((t) / 8)) == 0; \ + const u##t *buf = ptr; \ + \ + if (!count) \ + return; \ + \ + /* Some ARC CPU's don't support unaligned accesses */ \ + if (is_aligned) { \ + do { \ + __raw_write##f(*buf++, addr); \ + } while (--count); \ + } else { \ + do { \ + __raw_write##f(get_unaligned(buf++), addr); \ + } while (--count); \ + } \ +} + +#define __raw_writesb __raw_writesb +__raw_writesx(8, b) +#define __raw_writesw __raw_writesw +__raw_writesx(16, w) +#define __raw_writesl __raw_writesl +__raw_writesx(32, l) + /* * MMIO can also get buffered/optimized in micro-arch, so barriers needed * Based on ARM model for the typical use case @@ -132,10 +198,16 @@ static inline void __raw_writel(u32 w, volatile void __iomem *addr) #define readb(c) ({ u8 __v = readb_relaxed(c); __iormb(); __v; }) #define readw(c) ({ u16 __v = readw_relaxed(c); __iormb(); __v; }) #define readl(c) ({ u32 __v = readl_relaxed(c); __iormb(); __v; }) +#define readsb(p,d,l) ({ __raw_readsb(p,d,l); __iormb(); }) +#define readsw(p,d,l) ({ __raw_readsw(p,d,l); __iormb(); }) +#define readsl(p,d,l) ({ __raw_readsl(p,d,l); __iormb(); }) #define writeb(v,c) ({ __iowmb(); writeb_relaxed(v,c); }) #define writew(v,c) ({ __iowmb(); writew_relaxed(v,c); }) #define writel(v,c) ({ __iowmb(); writel_relaxed(v,c); }) +#define writesb(p,d,l) ({ __iowmb(); __raw_writesb(p,d,l); }) +#define writesw(p,d,l) ({ __iowmb(); __raw_writesw(p,d,l); }) +#define writesl(p,d,l) ({ __iowmb(); __raw_writesl(p,d,l); }) /* * Relaxed API for drivers which can handle barrier ordering themselves diff --git a/arch/arc/include/asm/perf_event.h b/arch/arc/include/asm/perf_event.h index 5f071762fb1c..6a2ae61748e4 100644 --- a/arch/arc/include/asm/perf_event.h +++ b/arch/arc/include/asm/perf_event.h @@ -103,7 +103,8 @@ static const char * const arc_pmu_ev_hw_map[] = { /* counts condition */ [PERF_COUNT_HW_INSTRUCTIONS] = "iall", - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = "ijmp", /* Excludes ZOL jumps */ + /* All jump instructions that are taken */ + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = "ijmptak", [PERF_COUNT_ARC_BPOK] = "bpok", /* NP-NT, PT-T, PNT-NT */ #ifdef CONFIG_ISA_ARCV2 [PERF_COUNT_HW_BRANCH_MISSES] = "bpmp", diff --git a/arch/arc/include/asm/uaccess.h b/arch/arc/include/asm/uaccess.h index 57387b567f34..f077a419cb51 100644 --- a/arch/arc/include/asm/uaccess.h +++ b/arch/arc/include/asm/uaccess.h @@ -209,7 +209,7 @@ __arc_copy_from_user(void *to, const void __user *from, unsigned long n) */ "=&r" (tmp), "+r" (to), "+r" (from) : - : "lp_count", "lp_start", "lp_end", "memory"); + : "lp_count", "memory"); return n; } @@ -438,7 +438,7 @@ __arc_copy_to_user(void __user *to, const void *from, unsigned long n) */ "=&r" (tmp), "+r" (to), "+r" (from) : - : "lp_count", "lp_start", "lp_end", "memory"); + : "lp_count", "memory"); return n; } @@ -658,7 +658,7 @@ static inline unsigned long __arc_clear_user(void __user *to, unsigned long n) " .previous \n" : "+r"(d_char), "+r"(res) : "i"(0) - : "lp_count", "lp_start", "lp_end", "memory"); + : "lp_count", "memory"); return res; } @@ -691,7 +691,7 @@ __arc_strncpy_from_user(char *dst, const char __user *src, long count) " .previous \n" : "+r"(res), "+r"(dst), "+r"(src), "=r"(val) : "g"(-EFAULT), "r"(count) - : "lp_count", "lp_start", "lp_end", "memory"); + : "lp_count", "memory"); return res; } diff --git a/arch/arc/kernel/head.S b/arch/arc/kernel/head.S index 689dd867fdff..cd64cb4ef7b0 100644 --- a/arch/arc/kernel/head.S +++ b/arch/arc/kernel/head.S @@ -17,6 +17,7 @@ #include #include #include +#include .macro CPU_EARLY_SETUP @@ -47,6 +48,15 @@ sr r5, [ARC_REG_DC_CTRL] 1: + +#ifdef CONFIG_ISA_ARCV2 + ; Unaligned access is disabled at reset, so re-enable early as + ; gcc 7.3.1 (ARC GNU 2018.03) onwards generates unaligned access + ; by default + lr r5, [status32] + bset r5, r5, STATUS_AD_BIT + kflag r5 +#endif .endm .section .init.text, "ax",@progbits diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 71658f55d4e8..9ebef2effd62 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -33,10 +33,10 @@ config ARM select HARDIRQS_SW_RESEND select HAVE_ARCH_AUDITSYSCALL if (AEABI && !OABI_COMPAT) select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6 + select HAVE_ARCH_HARDENED_USERCOPY select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 select HAVE_ARCH_MMAP_RND_BITS if MMU - select HAVE_ARCH_HARDENED_USERCOPY select HAVE_ARCH_SECCOMP_FILTER if (AEABI && !OABI_COMPAT) select HAVE_ARCH_TRACEHOOK select HAVE_ARM_SMCCC if CPU_V7 @@ -1466,6 +1466,7 @@ config NR_CPUS config HOTPLUG_CPU bool "Support for hot-pluggable CPUs" depends on SMP + select GENERIC_IRQ_MIGRATION help Say Y here to experiment with turning CPUs off and on. CPUs can be controlled through /sys/devices/system/cpu. diff --git a/arch/arm/boot/dts/da850-evm.dts b/arch/arm/boot/dts/da850-evm.dts index 6881757b03e8..67369f284b91 100644 --- a/arch/arm/boot/dts/da850-evm.dts +++ b/arch/arm/boot/dts/da850-evm.dts @@ -147,7 +147,7 @@ sound { compatible = "simple-audio-card"; - simple-audio-card,name = "DA850/OMAP-L138 EVM"; + simple-audio-card,name = "DA850-OMAPL138 EVM"; simple-audio-card,widgets = "Line", "Line In", "Line", "Line Out"; diff --git a/arch/arm/boot/dts/exynos3250.dtsi b/arch/arm/boot/dts/exynos3250.dtsi index 2f30d632f1cc..e81a27214188 100644 --- a/arch/arm/boot/dts/exynos3250.dtsi +++ b/arch/arm/boot/dts/exynos3250.dtsi @@ -150,6 +150,9 @@ interrupt-controller; #interrupt-cells = <3>; interrupt-parent = <&gic>; + clock-names = "clkout8"; + clocks = <&cmu CLK_FIN_PLL>; + #clock-cells = <1>; }; mipi_phy: video-phy@10020710 { diff --git a/arch/arm/boot/dts/exynos5420-tmu-sensor-conf.dtsi b/arch/arm/boot/dts/exynos5420-tmu-sensor-conf.dtsi new file mode 100644 index 000000000000..c8771c660550 --- /dev/null +++ b/arch/arm/boot/dts/exynos5420-tmu-sensor-conf.dtsi @@ -0,0 +1,25 @@ +/* + * Device tree sources for Exynos5420 TMU sensor configuration + * + * Copyright (c) 2014 Lukasz Majewski + * Copyright (c) 2017 Krzysztof Kozlowski + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include + +#thermal-sensor-cells = <0>; +samsung,tmu_gain = <8>; +samsung,tmu_reference_voltage = <16>; +samsung,tmu_noise_cancel_mode = <4>; +samsung,tmu_efuse_value = <55>; +samsung,tmu_min_efuse_value = <0>; +samsung,tmu_max_efuse_value = <100>; +samsung,tmu_first_point_trim = <25>; +samsung,tmu_second_point_trim = <85>; +samsung,tmu_default_temp_offset = <50>; +samsung,tmu_cal_type = ; diff --git a/arch/arm/boot/dts/exynos5420.dtsi b/arch/arm/boot/dts/exynos5420.dtsi index 1b3d6c769a3c..d5edb7766942 100644 --- a/arch/arm/boot/dts/exynos5420.dtsi +++ b/arch/arm/boot/dts/exynos5420.dtsi @@ -777,7 +777,7 @@ interrupts = <0 65 0>; clocks = <&clock CLK_TMU>; clock-names = "tmu_apbif"; - #include "exynos4412-tmu-sensor-conf.dtsi" + #include "exynos5420-tmu-sensor-conf.dtsi" }; tmu_cpu1: tmu@10064000 { @@ -786,7 +786,7 @@ interrupts = <0 183 0>; clocks = <&clock CLK_TMU>; clock-names = "tmu_apbif"; - #include "exynos4412-tmu-sensor-conf.dtsi" + #include "exynos5420-tmu-sensor-conf.dtsi" }; tmu_cpu2: tmu@10068000 { @@ -795,7 +795,7 @@ interrupts = <0 184 0>; clocks = <&clock CLK_TMU>, <&clock CLK_TMU>; clock-names = "tmu_apbif", "tmu_triminfo_apbif"; - #include "exynos4412-tmu-sensor-conf.dtsi" + #include "exynos5420-tmu-sensor-conf.dtsi" }; tmu_cpu3: tmu@1006c000 { @@ -804,7 +804,7 @@ interrupts = <0 185 0>; clocks = <&clock CLK_TMU>, <&clock CLK_TMU_GPU>; clock-names = "tmu_apbif", "tmu_triminfo_apbif"; - #include "exynos4412-tmu-sensor-conf.dtsi" + #include "exynos5420-tmu-sensor-conf.dtsi" }; tmu_gpu: tmu@100a0000 { @@ -813,7 +813,7 @@ interrupts = <0 215 0>; clocks = <&clock CLK_TMU_GPU>, <&clock CLK_TMU>; clock-names = "tmu_apbif", "tmu_triminfo_apbif"; - #include "exynos4412-tmu-sensor-conf.dtsi" + #include "exynos5420-tmu-sensor-conf.dtsi" }; thermal-zones { diff --git a/arch/arm/boot/dts/kirkwood-dnskw.dtsi b/arch/arm/boot/dts/kirkwood-dnskw.dtsi index 113dcf056dcf..1b2dacfa6132 100644 --- a/arch/arm/boot/dts/kirkwood-dnskw.dtsi +++ b/arch/arm/boot/dts/kirkwood-dnskw.dtsi @@ -35,8 +35,8 @@ compatible = "gpio-fan"; pinctrl-0 = <&pmx_fan_high_speed &pmx_fan_low_speed>; pinctrl-names = "default"; - gpios = <&gpio1 14 GPIO_ACTIVE_LOW - &gpio1 13 GPIO_ACTIVE_LOW>; + gpios = <&gpio1 14 GPIO_ACTIVE_HIGH + &gpio1 13 GPIO_ACTIVE_HIGH>; gpio-fan,speed-map = <0 0 3000 1 6000 2>; diff --git a/arch/arm/boot/dts/mmp2.dtsi b/arch/arm/boot/dts/mmp2.dtsi index 766bbb8495b6..47e5b63339d1 100644 --- a/arch/arm/boot/dts/mmp2.dtsi +++ b/arch/arm/boot/dts/mmp2.dtsi @@ -220,12 +220,15 @@ status = "disabled"; }; - twsi2: i2c@d4025000 { + twsi2: i2c@d4031000 { compatible = "mrvl,mmp-twsi"; - reg = <0xd4025000 0x1000>; - interrupts = <58>; + reg = <0xd4031000 0x1000>; + interrupt-parent = <&intcmux17>; + interrupts = <0>; clocks = <&soc_clocks MMP2_CLK_TWSI1>; resets = <&soc_clocks MMP2_CLK_TWSI1>; + #address-cells = <1>; + #size-cells = <0>; status = "disabled"; }; diff --git a/arch/arm/boot/dts/omap4-sdp.dts b/arch/arm/boot/dts/omap4-sdp.dts index f0bdc41f8eff..235d1493f8aa 100644 --- a/arch/arm/boot/dts/omap4-sdp.dts +++ b/arch/arm/boot/dts/omap4-sdp.dts @@ -33,6 +33,7 @@ gpio = <&gpio2 16 GPIO_ACTIVE_HIGH>; /* gpio line 48 */ enable-active-high; regulator-boot-on; + startup-delay-us = <25000>; }; vbat: fixedregulator-vbat { diff --git a/arch/arm/boot/dts/sama5d2-pinfunc.h b/arch/arm/boot/dts/sama5d2-pinfunc.h index 8a394f336003..ee65702f9645 100644 --- a/arch/arm/boot/dts/sama5d2-pinfunc.h +++ b/arch/arm/boot/dts/sama5d2-pinfunc.h @@ -517,7 +517,7 @@ #define PIN_PC9__GPIO PINMUX_PIN(PIN_PC9, 0, 0) #define PIN_PC9__FIQ PINMUX_PIN(PIN_PC9, 1, 3) #define PIN_PC9__GTSUCOMP PINMUX_PIN(PIN_PC9, 2, 1) -#define PIN_PC9__ISC_D0 PINMUX_PIN(PIN_PC9, 2, 1) +#define PIN_PC9__ISC_D0 PINMUX_PIN(PIN_PC9, 3, 1) #define PIN_PC9__TIOA4 PINMUX_PIN(PIN_PC9, 4, 2) #define PIN_PC10 74 #define PIN_PC10__GPIO PINMUX_PIN(PIN_PC10, 0, 0) diff --git a/arch/arm/configs/ranchu_defconfig b/arch/arm/configs/ranchu_defconfig index d5a16dfe678f..c0da14fa7366 100644 --- a/arch/arm/configs/ranchu_defconfig +++ b/arch/arm/configs/ranchu_defconfig @@ -186,7 +186,6 @@ CONFIG_TABLET_USB_GTCO=y CONFIG_TABLET_USB_HANWANG=y CONFIG_TABLET_USB_KBTAB=y CONFIG_INPUT_MISC=y -CONFIG_INPUT_KEYCHORD=y CONFIG_INPUT_UINPUT=y CONFIG_INPUT_GPIO=y # CONFIG_SERIO_SERPORT is not set diff --git a/arch/arm/crypto/sha256-armv4.pl b/arch/arm/crypto/sha256-armv4.pl index fac0533ea633..f64e8413ab9a 100644 --- a/arch/arm/crypto/sha256-armv4.pl +++ b/arch/arm/crypto/sha256-armv4.pl @@ -205,10 +205,11 @@ K256: .global sha256_block_data_order .type sha256_block_data_order,%function sha256_block_data_order: +.Lsha256_block_data_order: #if __ARM_ARCH__<7 sub r3,pc,#8 @ sha256_block_data_order #else - adr r3,sha256_block_data_order + adr r3,.Lsha256_block_data_order #endif #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) ldr r12,.LOPENSSL_armcap diff --git a/arch/arm/crypto/sha256-core.S_shipped b/arch/arm/crypto/sha256-core.S_shipped index 555a1a8eec90..72c248081d27 100644 --- a/arch/arm/crypto/sha256-core.S_shipped +++ b/arch/arm/crypto/sha256-core.S_shipped @@ -86,10 +86,11 @@ K256: .global sha256_block_data_order .type sha256_block_data_order,%function sha256_block_data_order: +.Lsha256_block_data_order: #if __ARM_ARCH__<7 sub r3,pc,#8 @ sha256_block_data_order #else - adr r3,sha256_block_data_order + adr r3,.Lsha256_block_data_order #endif #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) ldr r12,.LOPENSSL_armcap diff --git a/arch/arm/crypto/sha512-armv4.pl b/arch/arm/crypto/sha512-armv4.pl index a2b11a844357..5fe336420bcf 100644 --- a/arch/arm/crypto/sha512-armv4.pl +++ b/arch/arm/crypto/sha512-armv4.pl @@ -267,10 +267,11 @@ WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817) .global sha512_block_data_order .type sha512_block_data_order,%function sha512_block_data_order: +.Lsha512_block_data_order: #if __ARM_ARCH__<7 sub r3,pc,#8 @ sha512_block_data_order #else - adr r3,sha512_block_data_order + adr r3,.Lsha512_block_data_order #endif #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) ldr r12,.LOPENSSL_armcap diff --git a/arch/arm/crypto/sha512-core.S_shipped b/arch/arm/crypto/sha512-core.S_shipped index 3694c4d4ca2b..de9bd7f55242 100644 --- a/arch/arm/crypto/sha512-core.S_shipped +++ b/arch/arm/crypto/sha512-core.S_shipped @@ -134,10 +134,11 @@ WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817) .global sha512_block_data_order .type sha512_block_data_order,%function sha512_block_data_order: +.Lsha512_block_data_order: #if __ARM_ARCH__<7 sub r3,pc,#8 @ sha512_block_data_order #else - adr r3,sha512_block_data_order + adr r3,.Lsha512_block_data_order #endif #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) ldr r12,.LOPENSSL_armcap diff --git a/arch/arm/include/asm/barrier.h b/arch/arm/include/asm/barrier.h index 85eb7ef44a18..744d70e1d202 100644 --- a/arch/arm/include/asm/barrier.h +++ b/arch/arm/include/asm/barrier.h @@ -10,6 +10,8 @@ #define sev() __asm__ __volatile__ ("sev" : : : "memory") #define wfe() __asm__ __volatile__ ("wfe" : : : "memory") #define wfi() __asm__ __volatile__ ("wfi" : : : "memory") +#else +#define wfe() do { } while (0) #endif #if __LINUX_ARM_ARCH__ >= 7 diff --git a/arch/arm/include/asm/irq.h b/arch/arm/include/asm/irq.h index 1bd9510de1b9..cae4df39f02e 100644 --- a/arch/arm/include/asm/irq.h +++ b/arch/arm/include/asm/irq.h @@ -24,7 +24,6 @@ #ifndef __ASSEMBLY__ struct irqaction; struct pt_regs; -extern void migrate_irqs(void); extern void asm_do_IRQ(unsigned int, struct pt_regs *); void handle_IRQ(unsigned int, struct pt_regs *); diff --git a/arch/arm/include/asm/processor.h b/arch/arm/include/asm/processor.h index 8a1e8e995dae..08509183c7df 100644 --- a/arch/arm/include/asm/processor.h +++ b/arch/arm/include/asm/processor.h @@ -77,7 +77,11 @@ extern void release_thread(struct task_struct *); unsigned long get_wchan(struct task_struct *p); #if __LINUX_ARM_ARCH__ == 6 || defined(CONFIG_ARM_ERRATA_754327) -#define cpu_relax() smp_mb() +#define cpu_relax() \ + do { \ + smp_mb(); \ + __asm__ __volatile__("nop; nop; nop; nop; nop; nop; nop; nop; nop; nop;"); \ + } while (0) #else #define cpu_relax() barrier() #endif diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c index 1d45320ee125..900c591913d5 100644 --- a/arch/arm/kernel/irq.c +++ b/arch/arm/kernel/irq.c @@ -31,7 +31,6 @@ #include #include #include -#include #include #include #include @@ -119,64 +118,3 @@ int __init arch_probe_nr_irqs(void) return nr_irqs; } #endif - -#ifdef CONFIG_HOTPLUG_CPU -static bool migrate_one_irq(struct irq_desc *desc) -{ - struct irq_data *d = irq_desc_get_irq_data(desc); - const struct cpumask *affinity = irq_data_get_affinity_mask(d); - struct irq_chip *c; - bool ret = false; - - /* - * If this is a per-CPU interrupt, or the affinity does not - * include this CPU, then we have nothing to do. - */ - if (irqd_is_per_cpu(d) || !cpumask_test_cpu(smp_processor_id(), affinity)) - return false; - - if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { - affinity = cpu_online_mask; - ret = true; - } - - c = irq_data_get_irq_chip(d); - if (!c->irq_set_affinity) - pr_debug("IRQ%u: unable to set affinity\n", d->irq); - else if (c->irq_set_affinity(d, affinity, false) == IRQ_SET_MASK_OK && ret) - cpumask_copy(irq_data_get_affinity_mask(d), affinity); - - return ret; -} - -/* - * The current CPU has been marked offline. Migrate IRQs off this CPU. - * If the affinity settings do not allow other CPUs, force them onto any - * available CPU. - * - * Note: we must iterate over all IRQs, whether they have an attached - * action structure or not, as we need to get chained interrupts too. - */ -void migrate_irqs(void) -{ - unsigned int i; - struct irq_desc *desc; - unsigned long flags; - - local_irq_save(flags); - - for_each_irq_desc(i, desc) { - bool affinity_broken; - - raw_spin_lock(&desc->lock); - affinity_broken = migrate_one_irq(desc); - raw_spin_unlock(&desc->lock); - - if (affinity_broken) - pr_warn_ratelimited("IRQ%u no longer affine to CPU%u\n", - i, smp_processor_id()); - } - - local_irq_restore(flags); -} -#endif /* CONFIG_HOTPLUG_CPU */ diff --git a/arch/arm/kernel/machine_kexec.c b/arch/arm/kernel/machine_kexec.c index 8bf3b7c09888..46519916a465 100644 --- a/arch/arm/kernel/machine_kexec.c +++ b/arch/arm/kernel/machine_kexec.c @@ -87,8 +87,11 @@ void machine_crash_nonpanic_core(void *unused) set_cpu_online(smp_processor_id(), false); atomic_dec(&waiting_for_crash_ipi); - while (1) + + while (1) { cpu_relax(); + wfe(); + } } static void machine_kexec_mask_interrupts(void) diff --git a/arch/arm/kernel/patch.c b/arch/arm/kernel/patch.c index 69bda1a5707e..1f665acaa6a9 100644 --- a/arch/arm/kernel/patch.c +++ b/arch/arm/kernel/patch.c @@ -15,7 +15,7 @@ struct patch { unsigned int insn; }; -static DEFINE_SPINLOCK(patch_lock); +static DEFINE_RAW_SPINLOCK(patch_lock); static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags) __acquires(&patch_lock) @@ -32,7 +32,7 @@ static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags) return addr; if (flags) - spin_lock_irqsave(&patch_lock, *flags); + raw_spin_lock_irqsave(&patch_lock, *flags); else __acquire(&patch_lock); @@ -47,7 +47,7 @@ static void __kprobes patch_unmap(int fixmap, unsigned long *flags) clear_fixmap(fixmap); if (flags) - spin_unlock_irqrestore(&patch_lock, *flags); + raw_spin_unlock_irqrestore(&patch_lock, *flags); else __release(&patch_lock); } diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index e06307ad1d1c..fd739fe04834 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -93,77 +93,6 @@ void arch_cpu_idle_exit(void) idle_notifier_call_chain(IDLE_END); } -/* - * dump a block of kernel memory from around the given address - */ -static void show_data(unsigned long addr, int nbytes, const char *name) -{ - int i, j; - int nlines; - u32 *p; - - /* - * don't attempt to dump non-kernel addresses or - * values that are probably just small negative numbers - */ - if (addr < PAGE_OFFSET || addr > -256UL) - return; - - printk("\n%s: %#lx:\n", name, addr); - - /* - * round address down to a 32 bit boundary - * and always dump a multiple of 32 bytes - */ - p = (u32 *)(addr & ~(sizeof(u32) - 1)); - nbytes += (addr & (sizeof(u32) - 1)); - nlines = (nbytes + 31) / 32; - - - for (i = 0; i < nlines; i++) { - /* - * just display low 16 bits of address to keep - * each line of the dump < 80 characters - */ - printk("%04lx ", (unsigned long)p & 0xffff); - for (j = 0; j < 8; j++) { - u32 data; - if (probe_kernel_address(p, data)) { - printk(" ********"); - } else { - printk(" %08x", data); - } - ++p; - } - printk("\n"); - } -} - -static void show_extra_register_data(struct pt_regs *regs, int nbytes) -{ - mm_segment_t fs; - - fs = get_fs(); - set_fs(KERNEL_DS); - show_data(regs->ARM_pc - nbytes, nbytes * 2, "PC"); - show_data(regs->ARM_lr - nbytes, nbytes * 2, "LR"); - show_data(regs->ARM_sp - nbytes, nbytes * 2, "SP"); - show_data(regs->ARM_ip - nbytes, nbytes * 2, "IP"); - show_data(regs->ARM_fp - nbytes, nbytes * 2, "FP"); - show_data(regs->ARM_r0 - nbytes, nbytes * 2, "R0"); - show_data(regs->ARM_r1 - nbytes, nbytes * 2, "R1"); - show_data(regs->ARM_r2 - nbytes, nbytes * 2, "R2"); - show_data(regs->ARM_r3 - nbytes, nbytes * 2, "R3"); - show_data(regs->ARM_r4 - nbytes, nbytes * 2, "R4"); - show_data(regs->ARM_r5 - nbytes, nbytes * 2, "R5"); - show_data(regs->ARM_r6 - nbytes, nbytes * 2, "R6"); - show_data(regs->ARM_r7 - nbytes, nbytes * 2, "R7"); - show_data(regs->ARM_r8 - nbytes, nbytes * 2, "R8"); - show_data(regs->ARM_r9 - nbytes, nbytes * 2, "R9"); - show_data(regs->ARM_r10 - nbytes, nbytes * 2, "R10"); - set_fs(fs); -} - void __show_regs(struct pt_regs *regs) { unsigned long flags; @@ -251,8 +180,6 @@ void __show_regs(struct pt_regs *regs) printk("Control: %08x%s\n", ctrl, buf); } #endif - - show_extra_register_data(regs, 128); } void show_regs(struct pt_regs * regs) diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 37312f6749f3..2f9587f1d863 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -222,7 +222,7 @@ int __cpu_disable(void) /* * OK - migrate IRQs away from this CPU */ - migrate_irqs(); + irq_migrate_all_off_this_cpu(); /* * Flush user cache and TLB mappings, and then remove this CPU @@ -566,8 +566,10 @@ static void ipi_cpu_stop(unsigned int cpu) local_fiq_disable(); local_irq_disable(); - while (1) + while (1) { cpu_relax(); + wfe(); + } } static DEFINE_PER_CPU(struct completion *, cpu_completion); @@ -684,6 +686,21 @@ void smp_send_stop(void) pr_warn("SMP: failed to stop secondary CPUs\n"); } +/* In case panic() and panic() called at the same time on CPU1 and CPU2, + * and CPU 1 calls panic_smp_self_stop() before crash_smp_send_stop() + * CPU1 can't receive the ipi irqs from CPU2, CPU1 will be always online, + * kdump fails. So split out the panic_smp_self_stop() and add + * set_cpu_online(smp_processor_id(), false). + */ +void panic_smp_self_stop(void) +{ + pr_debug("CPU %u will stop doing anything useful since another CPU has paniced\n", + smp_processor_id()); + set_cpu_online(smp_processor_id(), false); + while (1) + cpu_relax(); +} + /* * not supported here */ diff --git a/arch/arm/kernel/unwind.c b/arch/arm/kernel/unwind.c index 0bee233fef9a..314cfb232a63 100644 --- a/arch/arm/kernel/unwind.c +++ b/arch/arm/kernel/unwind.c @@ -93,7 +93,7 @@ extern const struct unwind_idx __start_unwind_idx[]; static const struct unwind_idx *__origin_unwind_idx; extern const struct unwind_idx __stop_unwind_idx[]; -static DEFINE_SPINLOCK(unwind_lock); +static DEFINE_RAW_SPINLOCK(unwind_lock); static LIST_HEAD(unwind_tables); /* Convert a prel31 symbol to an absolute address */ @@ -201,7 +201,7 @@ static const struct unwind_idx *unwind_find_idx(unsigned long addr) /* module unwind tables */ struct unwind_table *table; - spin_lock_irqsave(&unwind_lock, flags); + raw_spin_lock_irqsave(&unwind_lock, flags); list_for_each_entry(table, &unwind_tables, list) { if (addr >= table->begin_addr && addr < table->end_addr) { @@ -213,7 +213,7 @@ static const struct unwind_idx *unwind_find_idx(unsigned long addr) break; } } - spin_unlock_irqrestore(&unwind_lock, flags); + raw_spin_unlock_irqrestore(&unwind_lock, flags); } pr_debug("%s: idx = %p\n", __func__, idx); @@ -529,9 +529,9 @@ struct unwind_table *unwind_table_add(unsigned long start, unsigned long size, tab->begin_addr = text_addr; tab->end_addr = text_addr + text_size; - spin_lock_irqsave(&unwind_lock, flags); + raw_spin_lock_irqsave(&unwind_lock, flags); list_add_tail(&tab->list, &unwind_tables); - spin_unlock_irqrestore(&unwind_lock, flags); + raw_spin_unlock_irqrestore(&unwind_lock, flags); return tab; } @@ -543,9 +543,9 @@ void unwind_table_del(struct unwind_table *tab) if (!tab) return; - spin_lock_irqsave(&unwind_lock, flags); + raw_spin_lock_irqsave(&unwind_lock, flags); list_del(&tab->list); - spin_unlock_irqrestore(&unwind_lock, flags); + raw_spin_unlock_irqrestore(&unwind_lock, flags); kfree(tab); } diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c index 387ee2a11e36..ae61e2ea7255 100644 --- a/arch/arm/kvm/mmio.c +++ b/arch/arm/kvm/mmio.c @@ -87,11 +87,10 @@ static unsigned long mmio_read_buf(char *buf, unsigned int len) /** * kvm_handle_mmio_return -- Handle MMIO loads after user space emulation + * or in-kernel IO emulation + * * @vcpu: The VCPU pointer * @run: The VCPU run struct containing the mmio data - * - * This should only be called after returning from userspace for MMIO load - * emulation. */ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run) { @@ -118,6 +117,12 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run) vcpu_set_reg(vcpu, vcpu->arch.mmio_decode.rt, data); } + /* + * The MMIO instruction is emulated and should not be re-executed + * in the guest. + */ + kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); + return 0; } @@ -151,11 +156,6 @@ static int decode_hsr(struct kvm_vcpu *vcpu, bool *is_write, int *len) vcpu->arch.mmio_decode.sign_extend = sign_extend; vcpu->arch.mmio_decode.rt = rt; - /* - * The MMIO instruction is emulated and should not be re-executed - * in the guest. - */ - kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); return 0; } @@ -206,14 +206,17 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, run->mmio.is_write = is_write; run->mmio.phys_addr = fault_ipa; run->mmio.len = len; - memcpy(run->mmio.data, data_buf, len); if (!ret) { /* We handled the access successfully in the kernel. */ + if (!is_write) + memcpy(run->mmio.data, data_buf, len); kvm_handle_mmio_return(vcpu, run); return 1; } + if (is_write) + memcpy(run->mmio.data, data_buf, len); run->exit_reason = KVM_EXIT_MMIO; return 0; } diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile index d8a780799506..06348a3d50c2 100644 --- a/arch/arm/lib/Makefile +++ b/arch/arm/lib/Makefile @@ -35,7 +35,7 @@ $(obj)/csumpartialcopy.o: $(obj)/csumpartialcopygeneric.S $(obj)/csumpartialcopyuser.o: $(obj)/csumpartialcopygeneric.S ifeq ($(CONFIG_KERNEL_MODE_NEON),y) - NEON_FLAGS := -mfloat-abi=softfp -mfpu=neon + NEON_FLAGS := -march=armv7-a -mfloat-abi=softfp -mfpu=neon CFLAGS_xor-neon.o += $(NEON_FLAGS) obj-$(CONFIG_XOR_BLOCKS) += xor-neon.o endif diff --git a/arch/arm/lib/xor-neon.c b/arch/arm/lib/xor-neon.c index 2c40aeab3eaa..c691b901092f 100644 --- a/arch/arm/lib/xor-neon.c +++ b/arch/arm/lib/xor-neon.c @@ -14,7 +14,7 @@ MODULE_LICENSE("GPL"); #ifndef __ARM_NEON__ -#error You should compile this file with '-mfloat-abi=softfp -mfpu=neon' +#error You should compile this file with '-march=armv7-a -mfloat-abi=softfp -mfpu=neon' #endif /* diff --git a/arch/arm/mach-cns3xxx/pcie.c b/arch/arm/mach-cns3xxx/pcie.c index 318394ed5c7a..5e11ad3164e0 100644 --- a/arch/arm/mach-cns3xxx/pcie.c +++ b/arch/arm/mach-cns3xxx/pcie.c @@ -83,7 +83,7 @@ static void __iomem *cns3xxx_pci_map_bus(struct pci_bus *bus, } else /* remote PCI bus */ base = cnspci->cfg1_regs + ((busno & 0xf) << 20); - return base + (where & 0xffc) + (devfn << 12); + return base + where + (devfn << 12); } static int cns3xxx_pci_read_config(struct pci_bus *bus, unsigned int devfn, diff --git a/arch/arm/mach-imx/cpuidle-imx6q.c b/arch/arm/mach-imx/cpuidle-imx6q.c index 353bb8774112..ec74c2812c1a 100644 --- a/arch/arm/mach-imx/cpuidle-imx6q.c +++ b/arch/arm/mach-imx/cpuidle-imx6q.c @@ -14,30 +14,23 @@ #include "cpuidle.h" #include "hardware.h" -static atomic_t master = ATOMIC_INIT(0); -static DEFINE_SPINLOCK(master_lock); +static int num_idle_cpus = 0; +static DEFINE_SPINLOCK(cpuidle_lock); static int imx6q_enter_wait(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index) { - if (atomic_inc_return(&master) == num_online_cpus()) { - /* - * With this lock, we prevent other cpu to exit and enter - * this function again and become the master. - */ - if (!spin_trylock(&master_lock)) - goto idle; + spin_lock(&cpuidle_lock); + if (++num_idle_cpus == num_online_cpus()) imx6_set_lpm(WAIT_UNCLOCKED); - cpu_do_idle(); - imx6_set_lpm(WAIT_CLOCKED); - spin_unlock(&master_lock); - goto done; - } + spin_unlock(&cpuidle_lock); -idle: cpu_do_idle(); -done: - atomic_dec(&master); + + spin_lock(&cpuidle_lock); + if (num_idle_cpus-- == num_online_cpus()) + imx6_set_lpm(WAIT_CLOCKED); + spin_unlock(&cpuidle_lock); return index; } diff --git a/arch/arm/mach-imx/cpuidle-imx6sx.c b/arch/arm/mach-imx/cpuidle-imx6sx.c index 3c6672b3796b..7f5df8992008 100644 --- a/arch/arm/mach-imx/cpuidle-imx6sx.c +++ b/arch/arm/mach-imx/cpuidle-imx6sx.c @@ -97,7 +97,7 @@ int __init imx6sx_cpuidle_init(void) * except for power up sw2iso which need to be * larger than LDO ramp up time. */ - imx_gpc_set_arm_power_up_timing(2, 1); + imx_gpc_set_arm_power_up_timing(0xf, 1); imx_gpc_set_arm_power_down_timing(1, 1); return cpuidle_register(&imx6sx_cpuidle_driver, NULL); diff --git a/arch/arm/mach-integrator/impd1.c b/arch/arm/mach-integrator/impd1.c index 38b0da300dd5..423a88ff908c 100644 --- a/arch/arm/mach-integrator/impd1.c +++ b/arch/arm/mach-integrator/impd1.c @@ -394,7 +394,11 @@ static int __init_refok impd1_probe(struct lm_device *dev) sizeof(*lookup) + 3 * sizeof(struct gpiod_lookup), GFP_KERNEL); chipname = devm_kstrdup(&dev->dev, devname, GFP_KERNEL); - mmciname = kasprintf(GFP_KERNEL, "lm%x:00700", dev->id); + mmciname = devm_kasprintf(&dev->dev, GFP_KERNEL, + "lm%x:00700", dev->id); + if (!lookup || !chipname || !mmciname) + return -ENOMEM; + lookup->dev_id = mmciname; /* * Offsets on GPIO block 1: diff --git a/arch/arm/mach-iop32x/n2100.c b/arch/arm/mach-iop32x/n2100.c index c1cd80ecc219..a904244264ce 100644 --- a/arch/arm/mach-iop32x/n2100.c +++ b/arch/arm/mach-iop32x/n2100.c @@ -75,8 +75,7 @@ void __init n2100_map_io(void) /* * N2100 PCI. */ -static int __init -n2100_pci_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +static int n2100_pci_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { int irq; diff --git a/arch/arm/mach-omap1/board-ams-delta.c b/arch/arm/mach-omap1/board-ams-delta.c index a95499ea8706..fa1d41edce68 100644 --- a/arch/arm/mach-omap1/board-ams-delta.c +++ b/arch/arm/mach-omap1/board-ams-delta.c @@ -511,6 +511,9 @@ static void modem_pm(struct uart_port *port, unsigned int state, unsigned old) { struct modem_private_data *priv = port->private_data; + if (!priv) + return; + if (IS_ERR(priv->regulator)) return; diff --git a/arch/arm/mach-omap2/display.c b/arch/arm/mach-omap2/display.c index 6ab13d18c636..cde86d1199cf 100644 --- a/arch/arm/mach-omap2/display.c +++ b/arch/arm/mach-omap2/display.c @@ -115,6 +115,7 @@ static int omap4_dsi_mux_pads(int dsi_id, unsigned lanes) u32 enable_mask, enable_shift; u32 pipd_mask, pipd_shift; u32 reg; + int ret; if (dsi_id == 0) { enable_mask = OMAP4_DSI1_LANEENABLE_MASK; @@ -130,7 +131,11 @@ static int omap4_dsi_mux_pads(int dsi_id, unsigned lanes) return -ENODEV; } - regmap_read(omap4_dsi_mux_syscon, OMAP4_DSIPHY_SYSCON_OFFSET, ®); + ret = regmap_read(omap4_dsi_mux_syscon, + OMAP4_DSIPHY_SYSCON_OFFSET, + ®); + if (ret) + return ret; reg &= ~enable_mask; reg &= ~pipd_mask; diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c index 147c90e70b2e..36706d32d656 100644 --- a/arch/arm/mach-omap2/omap_hwmod.c +++ b/arch/arm/mach-omap2/omap_hwmod.c @@ -2526,7 +2526,7 @@ static int __init _init(struct omap_hwmod *oh, void *data) * a stub; implementing this properly requires iclk autoidle usecounting in * the clock code. No return value. */ -static void __init _setup_iclk_autoidle(struct omap_hwmod *oh) +static void _setup_iclk_autoidle(struct omap_hwmod *oh) { struct omap_hwmod_ocp_if *os; struct list_head *p; @@ -2561,7 +2561,7 @@ static void __init _setup_iclk_autoidle(struct omap_hwmod *oh) * reset. Returns 0 upon success or a negative error code upon * failure. */ -static int __init _setup_reset(struct omap_hwmod *oh) +static int _setup_reset(struct omap_hwmod *oh) { int r; @@ -2622,7 +2622,7 @@ static int __init _setup_reset(struct omap_hwmod *oh) * * No return value. */ -static void __init _setup_postsetup(struct omap_hwmod *oh) +static void _setup_postsetup(struct omap_hwmod *oh) { u8 postsetup_state; diff --git a/arch/arm/mach-omap2/prm44xx.c b/arch/arm/mach-omap2/prm44xx.c index 30768003f854..8c505284bc0c 100644 --- a/arch/arm/mach-omap2/prm44xx.c +++ b/arch/arm/mach-omap2/prm44xx.c @@ -344,7 +344,7 @@ static void omap44xx_prm_reconfigure_io_chain(void) * to occur, WAKEUPENABLE bits must be set in the pad mux registers, and * omap44xx_prm_reconfigure_io_chain() must be called. No return value. */ -static void __init omap44xx_prm_enable_io_wakeup(void) +static void omap44xx_prm_enable_io_wakeup(void) { s32 inst = omap4_prmst_get_prm_dev_inst(); diff --git a/arch/arm/mach-omap2/prm_common.c b/arch/arm/mach-omap2/prm_common.c index 0ce4548ef7f0..4b9e9d1d8229 100644 --- a/arch/arm/mach-omap2/prm_common.c +++ b/arch/arm/mach-omap2/prm_common.c @@ -533,8 +533,10 @@ void omap_prm_reset_system(void) prm_ll_data->reset_system(); - while (1) + while (1) { cpu_relax(); + wfe(); + } } /** diff --git a/arch/arm/mach-pxa/cm-x300.c b/arch/arm/mach-pxa/cm-x300.c index a7dae60810e8..307fc18edede 100644 --- a/arch/arm/mach-pxa/cm-x300.c +++ b/arch/arm/mach-pxa/cm-x300.c @@ -547,7 +547,7 @@ static struct pxa3xx_u2d_platform_data cm_x300_u2d_platform_data = { .exit = cm_x300_u2d_exit, }; -static void cm_x300_init_u2d(void) +static void __init cm_x300_init_u2d(void) { pxa3xx_set_u2d_info(&cm_x300_u2d_platform_data); } diff --git a/arch/arm/mach-pxa/littleton.c b/arch/arm/mach-pxa/littleton.c index 5d665588c7eb..05aa7071efd6 100644 --- a/arch/arm/mach-pxa/littleton.c +++ b/arch/arm/mach-pxa/littleton.c @@ -183,7 +183,7 @@ static struct pxafb_mach_info littleton_lcd_info = { .lcd_conn = LCD_COLOR_TFT_16BPP, }; -static void littleton_init_lcd(void) +static void __init littleton_init_lcd(void) { pxa_set_fb_info(NULL, &littleton_lcd_info); } diff --git a/arch/arm/mach-pxa/zeus.c b/arch/arm/mach-pxa/zeus.c index d757cfb5f8a6..4da2458d7f32 100644 --- a/arch/arm/mach-pxa/zeus.c +++ b/arch/arm/mach-pxa/zeus.c @@ -558,7 +558,7 @@ static struct pxaohci_platform_data zeus_ohci_platform_data = { .flags = ENABLE_PORT_ALL | POWER_SENSE_LOW, }; -static void zeus_register_ohci(void) +static void __init zeus_register_ohci(void) { /* Port 2 is shared between host and client interface. */ UP2OCR = UP2OCR_HXOE | UP2OCR_HXS | UP2OCR_DMPDE | UP2OCR_DPPDE; diff --git a/arch/arm/mach-s3c24xx/mach-osiris-dvs.c b/arch/arm/mach-s3c24xx/mach-osiris-dvs.c index ce2db235dbaf..5e8a306163de 100644 --- a/arch/arm/mach-s3c24xx/mach-osiris-dvs.c +++ b/arch/arm/mach-s3c24xx/mach-osiris-dvs.c @@ -70,16 +70,16 @@ static int osiris_dvs_notify(struct notifier_block *nb, switch (val) { case CPUFREQ_PRECHANGE: - if (old_dvs & !new_dvs || - cur_dvs & !new_dvs) { + if ((old_dvs && !new_dvs) || + (cur_dvs && !new_dvs)) { pr_debug("%s: exiting dvs\n", __func__); cur_dvs = false; gpio_set_value(OSIRIS_GPIO_DVS, 1); } break; case CPUFREQ_POSTCHANGE: - if (!old_dvs & new_dvs || - !cur_dvs & new_dvs) { + if ((!old_dvs && new_dvs) || + (!cur_dvs && new_dvs)) { pr_debug("entering dvs\n"); cur_dvs = true; gpio_set_value(OSIRIS_GPIO_DVS, 0); diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S index a134d8a13d00..11d699af30ed 100644 --- a/arch/arm/mm/cache-v7.S +++ b/arch/arm/mm/cache-v7.S @@ -359,14 +359,16 @@ v7_dma_inv_range: ALT_UP(W(nop)) #endif mcrne p15, 0, r0, c7, c14, 1 @ clean & invalidate D / U line + addne r0, r0, r2 tst r1, r3 bic r1, r1, r3 mcrne p15, 0, r1, c7, c14, 1 @ clean & invalidate D / U line -1: - mcr p15, 0, r0, c7, c6, 1 @ invalidate D / U line - add r0, r0, r2 cmp r0, r1 +1: + mcrlo p15, 0, r0, c7, c6, 1 @ invalidate D / U line + addlo r0, r0, r2 + cmplo r0, r1 blo 1b dsb st ret lr diff --git a/arch/arm/plat-pxa/ssp.c b/arch/arm/plat-pxa/ssp.c index daa1a65f2eb7..6748827c2ec8 100644 --- a/arch/arm/plat-pxa/ssp.c +++ b/arch/arm/plat-pxa/ssp.c @@ -238,8 +238,6 @@ static int pxa_ssp_remove(struct platform_device *pdev) if (ssp == NULL) return -ENODEV; - iounmap(ssp->mmio_base); - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); release_mem_region(res->start, resource_size(res)); @@ -249,7 +247,6 @@ static int pxa_ssp_remove(struct platform_device *pdev) list_del(&ssp->node); mutex_unlock(&ssp_lock); - kfree(ssp); return 0; } diff --git a/arch/arm/plat-samsung/Kconfig b/arch/arm/plat-samsung/Kconfig index 57729b915003..b9396dcf836d 100644 --- a/arch/arm/plat-samsung/Kconfig +++ b/arch/arm/plat-samsung/Kconfig @@ -255,7 +255,7 @@ config S3C_PM_DEBUG_LED_SMDK config SAMSUNG_PM_CHECK bool "S3C2410 PM Suspend Memory CRC" - depends on PM + depends on PM && (PLAT_S3C24XX || ARCH_S3C64XX || ARCH_S5PV210) select CRC32 help Enable the PM code's memory area checksum over sleep. This option diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 986e1d3c96f6..bce55b3a4ce1 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -98,12 +98,11 @@ config ARM64 select PERF_USE_VMALLOC select POWER_RESET select POWER_SUPPLY - select RTC_LIB select SPARSE_IRQ select SYSCTL_EXCEPTION_TRACE select HAVE_CONTEXT_TRACKING - select HAVE_ARM_SMCCC select THREAD_INFO_IN_TASK + select HAVE_ARM_SMCCC help ARM 64-bit (AArch64) Linux support. diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 4832fd77f178..47b84864220c 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -39,8 +39,8 @@ KBUILD_CFLAGS += -mgeneral-regs-only endif KBUILD_CFLAGS += $(lseinstr) KBUILD_CFLAGS += -fno-pic -KBUILD_CFLAGS += $(call cc-option, -mpc-relative-literal-loads) KBUILD_CFLAGS += -fno-asynchronous-unwind-tables +KBUILD_CFLAGS += $(call cc-option, -mpc-relative-literal-loads) KBUILD_AFLAGS += $(lseinstr) ifeq ($(CONFIG_CPU_BIG_ENDIAN), y) diff --git a/arch/arm64/configs/cuttlefish_defconfig b/arch/arm64/configs/cuttlefish_defconfig index c620078b587c..c4ce7cc2d4e5 100644 --- a/arch/arm64/configs/cuttlefish_defconfig +++ b/arch/arm64/configs/cuttlefish_defconfig @@ -54,8 +54,6 @@ CONFIG_CP15_BARRIER_EMULATION=y CONFIG_SETEND_EMULATION=y CONFIG_ARM64_SW_TTBR0_PAN=y CONFIG_RANDOMIZE_BASE=y -CONFIG_CMDLINE="console=ttyAMA0" -CONFIG_CMDLINE_EXTEND=y # CONFIG_EFI is not set # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set CONFIG_COMPAT=y @@ -179,6 +177,8 @@ CONFIG_NET_CLS_U32=y CONFIG_NET_EMATCH=y CONFIG_NET_EMATCH_U32=y CONFIG_NET_CLS_ACT=y +CONFIG_VSOCKETS=y +CONFIG_VIRTIO_VSOCKETS=y CONFIG_CFG80211=y # CONFIG_CFG80211_DEFAULT_PS is not set CONFIG_MAC80211=y @@ -258,6 +258,7 @@ CONFIG_SERIAL_8250_MANY_PORTS=y CONFIG_SERIAL_8250_SHARE_IRQ=y CONFIG_SERIAL_AMBA_PL011=y CONFIG_SERIAL_AMBA_PL011_CONSOLE=y +CONFIG_SERIAL_OF_PLATFORM=y CONFIG_VIRTIO_CONSOLE=y CONFIG_HW_RANDOM=y CONFIG_HW_RANDOM_VIRTIO=y @@ -276,6 +277,12 @@ CONFIG_DRM=y CONFIG_DRM_VIRTIO_GPU=y CONFIG_SOUND=y CONFIG_SND=y +CONFIG_SND_HRTIMER=y +# CONFIG_SND_SUPPORT_OLD_API is not set +# CONFIG_SND_VERBOSE_PROCFS is not set +# CONFIG_SND_DRIVERS is not set +CONFIG_SND_INTEL8X0=y +# CONFIG_SND_USB is not set CONFIG_HIDRAW=y CONFIG_UHID=y CONFIG_HID_A4TECH=y @@ -347,12 +354,13 @@ CONFIG_USB_CONFIGFS_F_MIDI=y CONFIG_MMC=y # CONFIG_MMC_BLOCK is not set CONFIG_RTC_CLASS=y -# CONFIG_RTC_HCTOSYS is not set # CONFIG_RTC_SYSTOHC is not set +CONFIG_RTC_DRV_PL030=y CONFIG_RTC_DRV_PL031=y CONFIG_VIRTIO_PCI=y # CONFIG_VIRTIO_PCI_LEGACY is not set CONFIG_VIRTIO_BALLOON=y +CONFIG_VIRTIO_INPUT=y CONFIG_VIRTIO_MMIO=y CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y CONFIG_STAGING=y @@ -376,6 +384,7 @@ CONFIG_F2FS_FS_ENCRYPTION=y CONFIG_QUOTA=y CONFIG_QFMT_V2=y CONFIG_FUSE_FS=y +CONFIG_OVERLAY_FS=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_TMPFS=y diff --git a/arch/arm64/configs/ranchu64_defconfig b/arch/arm64/configs/ranchu64_defconfig index 7f847fc40d14..ceebab60da2b 100644 --- a/arch/arm64/configs/ranchu64_defconfig +++ b/arch/arm64/configs/ranchu64_defconfig @@ -214,7 +214,6 @@ CONFIG_TABLET_USB_GTCO=y CONFIG_TABLET_USB_HANWANG=y CONFIG_TABLET_USB_KBTAB=y CONFIG_INPUT_MISC=y -CONFIG_INPUT_KEYCHORD=y CONFIG_INPUT_UINPUT=y CONFIG_INPUT_GPIO=y # CONFIG_SERIO_SERPORT is not set diff --git a/arch/arm64/crypto/aes-ce-ccm-core.S b/arch/arm64/crypto/aes-ce-ccm-core.S index 3363560c79b7..7bc459d9235c 100644 --- a/arch/arm64/crypto/aes-ce-ccm-core.S +++ b/arch/arm64/crypto/aes-ce-ccm-core.S @@ -74,12 +74,13 @@ ENTRY(ce_aes_ccm_auth_data) beq 10f ext v0.16b, v0.16b, v0.16b, #1 /* rotate out the mac bytes */ b 7b -8: mov w7, w8 +8: cbz w8, 91f + mov w7, w8 add w8, w8, #16 9: ext v1.16b, v1.16b, v1.16b, #1 adds w7, w7, #1 bne 9b - eor v0.16b, v0.16b, v1.16b +91: eor v0.16b, v0.16b, v1.16b st1 {v0.16b}, [x0] 10: str w8, [x3] ret diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 87b9d9afc5fc..7ceae35e1ba8 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -33,11 +33,11 @@ #define ARM64_HAS_NO_HW_PREFETCH 8 #define ARM64_HAS_UAO 9 #define ARM64_ALT_PAN_NOT_UAO 10 - -#define ARM64_WORKAROUND_CAVIUM_27456 11 -#define ARM64_HAS_VIRT_HOST_EXTN 12 +#define ARM64_HAS_VIRT_HOST_EXTN 11 +#define ARM64_WORKAROUND_CAVIUM_27456 12 #define ARM64_HAS_32BIT_EL0 13 #define ARM64_UNMAP_KERNEL_AT_EL0 23 + #define ARM64_NCAPS 24 #ifndef __ASSEMBLY__ diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 8042c98ec040..0a23fb0600c8 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -59,7 +59,7 @@ MIDR_ARCHITECTURE_MASK | \ MIDR_PARTNUM_MASK) -#define MIDR_CPU_PART(imp, partnum) \ +#define MIDR_CPU_MODEL(imp, partnum) \ (((imp) << MIDR_IMPLEMENTOR_SHIFT) | \ (0xf << MIDR_ARCHITECTURE_SHIFT) | \ ((partnum) << MIDR_PARTNUM_SHIFT)) @@ -89,10 +89,10 @@ #define CAVIUM_CPU_PART_THUNDERX 0x0A1 -#define MIDR_CORTEX_A53 MIDR_CPU_PART(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53) -#define MIDR_CORTEX_A55 MIDR_CPU_PART(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A55) -#define MIDR_CORTEX_A57 MIDR_CPU_PART(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57) -#define MIDR_THUNDERX MIDR_CPU_PART(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) +#define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53) +#define MIDR_CORTEX_A55 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A55) +#define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57) +#define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) #ifndef __ASSEMBLY__ diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h index 5bb2fd4674e7..06820092775f 100644 --- a/arch/arm64/include/asm/futex.h +++ b/arch/arm64/include/asm/futex.h @@ -30,8 +30,8 @@ do { \ " prfm pstl1strm, %2\n" \ "1: ldxr %w1, %2\n" \ insn "\n" \ -"2: stlxr %w3, %w0, %2\n" \ -" cbnz %w3, 1b\n" \ +"2: stlxr %w0, %w3, %2\n" \ +" cbnz %w0, 1b\n" \ " dmb ish\n" \ "3:\n" \ " .pushsection .fixup,\"ax\"\n" \ @@ -56,23 +56,23 @@ arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr) switch (op) { case FUTEX_OP_SET: - __futex_atomic_op("mov %w0, %w4", + __futex_atomic_op("mov %w3, %w4", ret, oldval, uaddr, tmp, oparg); break; case FUTEX_OP_ADD: - __futex_atomic_op("add %w0, %w1, %w4", + __futex_atomic_op("add %w3, %w1, %w4", ret, oldval, uaddr, tmp, oparg); break; case FUTEX_OP_OR: - __futex_atomic_op("orr %w0, %w1, %w4", + __futex_atomic_op("orr %w3, %w1, %w4", ret, oldval, uaddr, tmp, oparg); break; case FUTEX_OP_ANDN: - __futex_atomic_op("and %w0, %w1, %w4", + __futex_atomic_op("and %w3, %w1, %w4", ret, oldval, uaddr, tmp, ~oparg); break; case FUTEX_OP_XOR: - __futex_atomic_op("eor %w0, %w1, %w4", + __futex_atomic_op("eor %w3, %w1, %w4", ret, oldval, uaddr, tmp, oparg); break; default: diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 6a889e943f4e..5385adcd157d 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -23,6 +23,8 @@ #include /* Hyp Configuration Register (HCR) bits */ +#define HCR_API (UL(1) << 41) +#define HCR_APK (UL(1) << 40) #define HCR_ID (UL(1) << 33) #define HCR_CD (UL(1) << 32) #define HCR_RW_SHIFT 31 @@ -81,6 +83,7 @@ HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW) #define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF) #define HCR_INT_OVERRIDE (HCR_FMO | HCR_IMO) +#define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK) /* TCR_EL2 Registers bits */ diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 1d870666b8d9..8f91985b8eec 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -86,6 +86,16 @@ #define KASAN_SHADOW_SIZE (0) #endif +/* + * The size of the KASAN shadow region. This should be 1/8th of the + * size of the entire kernel virtual address space. + */ +#ifdef CONFIG_KASAN +#define KASAN_SHADOW_SIZE (UL(1) << (VA_BITS - 3)) +#else +#define KASAN_SHADOW_SIZE (0) +#endif + /* * Physical vs virtual RAM address space conversion. These are * private definitions which should NOT be used outside memory.h diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 01c0b3881f88..e80720940bdf 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -41,9 +41,9 @@ arm64-obj-$(CONFIG_EFI) += efi.o efi-entry.stub.o arm64-obj-$(CONFIG_PCI) += pci.o arm64-obj-$(CONFIG_ARMV8_DEPRECATED) += armv8_deprecated.o arm64-obj-$(CONFIG_ACPI) += acpi.o +arm64-obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL) += acpi_parking_protocol.o arm64-obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o arm64-obj-$(CONFIG_HIBERNATION) += hibernate.o hibernate-asm.o -arm64-obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL) += acpi_parking_protocol.o arm64-obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o \ cpu-reset.o arm64-obj-$(CONFIG_CRASH_DUMP) += crash_dump.o diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S index 0f03a8fe2314..d18d15810d19 100644 --- a/arch/arm64/kernel/entry-ftrace.S +++ b/arch/arm64/kernel/entry-ftrace.S @@ -78,7 +78,6 @@ .macro mcount_get_lr reg ldr \reg, [x29] ldr \reg, [\reg, #8] - mcount_adjust_addr \reg, \reg .endm .macro mcount_get_lr_addr reg diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 3e99814b6463..e6b4741b0d84 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -504,7 +505,7 @@ CPU_LE( bic x0, x0, #(3 << 24) ) // Clear the EE and E0E bits for EL1 ret /* Hyp configuration. */ -2: mov x0, #(1 << 31) // 64-bit EL1 +2: mov_q x0, HCR_HOST_NVHE_FLAGS msr hcr_el2, x0 /* Generic timers. */ @@ -517,8 +518,7 @@ CPU_LE( bic x0, x0, #(3 << 24) ) // Clear the EE and E0E bits for EL1 /* GICv3 system register access */ mrs x0, id_aa64pfr0_el1 ubfx x0, x0, #24, #4 - cmp x0, #1 - b.ne 3f + cbz x0, 3f mrs_s x0, ICC_SRE_EL2 orr x0, x0, #ICC_SRE_EL2_SRE // Set ICC_SRE_EL2.SRE==1 diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S index d3b5f75e652e..fcb486d09555 100644 --- a/arch/arm64/kernel/hyp-stub.S +++ b/arch/arm64/kernel/hyp-stub.S @@ -28,6 +28,8 @@ #include .text + .pushsection .hyp.text, "ax" + .align 11 ENTRY(__hyp_stub_vectors) diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c index 814d0c51b2f9..c98ca98ec569 100644 --- a/arch/arm64/kernel/kgdb.c +++ b/arch/arm64/kernel/kgdb.c @@ -216,24 +216,33 @@ int kgdb_arch_handle_exception(int exception_vector, int signo, static int kgdb_brk_fn(struct pt_regs *regs, unsigned int esr) { + if (user_mode(regs)) + return DBG_HOOK_ERROR; + kgdb_handle_exception(1, SIGTRAP, 0, regs); - return 0; + return DBG_HOOK_HANDLED; } NOKPROBE_SYMBOL(kgdb_brk_fn) static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned int esr) { + if (user_mode(regs)) + return DBG_HOOK_ERROR; + compiled_break = 1; kgdb_handle_exception(1, SIGTRAP, 0, regs); - return 0; + return DBG_HOOK_HANDLED; } NOKPROBE_SYMBOL(kgdb_compiled_brk_fn); static int kgdb_step_brk_fn(struct pt_regs *regs, unsigned int esr) { + if (user_mode(regs)) + return DBG_HOOK_ERROR; + kgdb_handle_exception(1, SIGTRAP, 0, regs); - return 0; + return DBG_HOOK_HANDLED; } NOKPROBE_SYMBOL(kgdb_step_brk_fn); diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index f7ab14c4d5df..1d3851278f3f 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -828,6 +828,7 @@ static struct platform_driver armv8_pmu_driver = { .driver = { .name = "armv8-pmu", .of_match_table = armv8_pmu_of_device_ids, + .suppress_bind_attrs = true, }, .probe = armv8_pmu_device_probe, }; diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 61f8c5b66567..3c86fb75b167 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -153,11 +153,6 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk); - if (!tsk) - tsk = current; - - pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk); - if (!tsk) tsk = current; @@ -173,6 +168,11 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) else irq_stack_ptr = 0; + pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk); + + if (!tsk) + tsk = current; + if (tsk == current) { frame.fp = (unsigned long)__builtin_frame_address(0); frame.sp = current_stack_pointer; @@ -285,10 +285,12 @@ static DEFINE_RAW_SPINLOCK(die_lock); void die(const char *str, struct pt_regs *regs, int err) { int ret; + unsigned long flags; + + raw_spin_lock_irqsave(&die_lock, flags); oops_enter(); - raw_spin_lock_irq(&die_lock); console_verbose(); bust_spinlocks(1); ret = __die(str, err, regs); @@ -298,13 +300,15 @@ void die(const char *str, struct pt_regs *regs, int err) bust_spinlocks(0); add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE); - raw_spin_unlock_irq(&die_lock); oops_exit(); if (in_interrupt()) panic("Fatal exception in interrupt"); if (panic_on_oops) panic("Fatal exception"); + + raw_spin_unlock_irqrestore(&die_lock, flags); + if (ret != NOTIFY_STOP) do_exit(SIGSEGV); } diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 6e8ff5366933..342716339300 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -328,7 +328,8 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, } if (addr < USER_DS && is_permission_fault(esr, regs)) { - if (get_fs() == KERNEL_DS) + /* regs->orig_addr_limit may be 0 if we entered from EL0 */ + if (regs->orig_addr_limit == KERNEL_DS) die("Accessing user space memory with fs=KERNEL_DS", regs, esr); if (is_el1_instruction_abort(esr)) @@ -642,26 +643,40 @@ void __init hook_debug_fault_code(int nr, debug_fault_info[nr].name = name; } -asmlinkage int __exception do_debug_exception(unsigned long addr, +asmlinkage int __exception do_debug_exception(unsigned long addr_if_watchpoint, unsigned int esr, struct pt_regs *regs) { const struct fault_info *inf = debug_fault_info + DBG_ESR_EVT(esr); + unsigned long pc = instruction_pointer(regs); struct siginfo info; + int rv; - if (!inf->fn(addr, esr, regs)) - return 1; + /* + * Tell lockdep we disabled irqs in entry.S. Do nothing if they were + * already disabled to preserve the last enabled/disabled addresses. + */ + if (interrupts_enabled(regs)) + trace_hardirqs_off(); - pr_alert("Unhandled debug exception: %s (0x%08x) at 0x%016lx\n", - inf->name, esr, addr); + if (!inf->fn(addr_if_watchpoint, esr, regs)) { + rv = 1; + } else { + pr_alert("Unhandled debug exception: %s (0x%08x) at 0x%016lx\n", + inf->name, esr, pc); - info.si_signo = inf->sig; - info.si_errno = 0; - info.si_code = inf->code; - info.si_addr = (void __user *)addr; - arm64_notify_die("", regs, &info, 0); + info.si_signo = inf->sig; + info.si_errno = 0; + info.si_code = inf->code; + info.si_addr = (void __user *)pc; + arm64_notify_die("", regs, &info, 0); + rv = 0; + } - return 0; + if (interrupts_enabled(regs)) + trace_hardirqs_on(); + + return rv; } NOKPROBE_SYMBOL(do_debug_exception); diff --git a/arch/cris/arch-v32/drivers/cryptocop.c b/arch/cris/arch-v32/drivers/cryptocop.c index 877da1908234..98e2a5dbcfda 100644 --- a/arch/cris/arch-v32/drivers/cryptocop.c +++ b/arch/cris/arch-v32/drivers/cryptocop.c @@ -2724,7 +2724,6 @@ static int cryptocop_ioctl_process(struct inode *inode, struct file *filp, unsig (unsigned long int)(oper.indata + prev_ix), noinpages, 0, /* read access only for in data */ - 0, /* no force */ inpages, NULL); @@ -2740,8 +2739,7 @@ static int cryptocop_ioctl_process(struct inode *inode, struct file *filp, unsig current->mm, (unsigned long int)oper.cipher_outdata, nooutpages, - 1, /* write access for out data */ - 0, /* no force */ + FOLL_WRITE, /* write access for out data */ outpages, NULL); up_read(¤t->mm->mmap_sem); diff --git a/arch/h8300/Makefile b/arch/h8300/Makefile index e1c02ca230cb..073bba6f9f60 100644 --- a/arch/h8300/Makefile +++ b/arch/h8300/Makefile @@ -23,7 +23,7 @@ KBUILD_AFLAGS += $(aflags-y) LDFLAGS += $(ldflags-y) ifeq ($(CROSS_COMPILE),) -CROSS_COMPILE := h8300-unknown-linux- +CROSS_COMPILE := $(call cc-cross-prefix, h8300-unknown-linux- h8300-linux-) endif core-y += arch/$(ARCH)/kernel/ arch/$(ARCH)/mm/ diff --git a/arch/ia64/kernel/err_inject.c b/arch/ia64/kernel/err_inject.c index 0c161ed6d18e..8205b456de7a 100644 --- a/arch/ia64/kernel/err_inject.c +++ b/arch/ia64/kernel/err_inject.c @@ -143,7 +143,7 @@ store_virtual_to_phys(struct device *dev, struct device_attribute *attr, int ret; ret = get_user_pages(current, current->mm, virt_addr, - 1, VM_READ, 0, NULL, NULL); + 1, FOLL_WRITE, NULL, NULL); if (ret<=0) { #ifdef ERR_INJ_DEBUG printk("Virtual address %lx is not existing.\n",virt_addr); diff --git a/arch/m68k/Makefile b/arch/m68k/Makefile index 0b29dcfef69f..0c736ed58abd 100644 --- a/arch/m68k/Makefile +++ b/arch/m68k/Makefile @@ -59,7 +59,10 @@ cpuflags-$(CONFIG_M5206e) := $(call cc-option,-mcpu=5206e,-m5200) cpuflags-$(CONFIG_M5206) := $(call cc-option,-mcpu=5206,-m5200) KBUILD_AFLAGS += $(cpuflags-y) -KBUILD_CFLAGS += $(cpuflags-y) -pipe +KBUILD_CFLAGS += $(cpuflags-y) + +KBUILD_CFLAGS += -pipe -ffreestanding + ifdef CONFIG_MMU # without -fno-strength-reduce the 53c7xx.c driver fails ;-( KBUILD_CFLAGS += -fno-strength-reduce -ffixed-a2 diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 2c93e3b7d318..62c54336ab4f 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -813,6 +813,7 @@ config SIBYTE_SWARM select SYS_SUPPORTS_HIGHMEM select SYS_SUPPORTS_LITTLE_ENDIAN select ZONE_DMA32 if 64BIT + select SWIOTLB if ARCH_DMA_ADDR_T_64BIT && PCI config SIBYTE_LITTLESUR bool "Sibyte BCM91250C2-LittleSur" @@ -835,6 +836,7 @@ config SIBYTE_SENTOSA select SYS_HAS_CPU_SB1 select SYS_SUPPORTS_BIG_ENDIAN select SYS_SUPPORTS_LITTLE_ENDIAN + select SWIOTLB if ARCH_DMA_ADDR_T_64BIT && PCI config SIBYTE_BIGSUR bool "Sibyte BCM91480B-BigSur" @@ -848,6 +850,7 @@ config SIBYTE_BIGSUR select SYS_SUPPORTS_HIGHMEM select SYS_SUPPORTS_LITTLE_ENDIAN select ZONE_DMA32 if 64BIT + select SWIOTLB if ARCH_DMA_ADDR_T_64BIT && PCI config SNI_RM bool "SNI RM200/300/400" @@ -3031,6 +3034,7 @@ config MIPS32_O32 config MIPS32_N32 bool "Kernel support for n32 binaries" depends on 64BIT + select ARCH_WANT_COMPAT_IPC_PARSE_VERSION select COMPAT select MIPS32_COMPAT select SYSVIPC_COMPAT if SYSVIPC diff --git a/arch/mips/boot/compressed/calc_vmlinuz_load_addr.c b/arch/mips/boot/compressed/calc_vmlinuz_load_addr.c index 37fe58c19a90..542c3ede9722 100644 --- a/arch/mips/boot/compressed/calc_vmlinuz_load_addr.c +++ b/arch/mips/boot/compressed/calc_vmlinuz_load_addr.c @@ -13,6 +13,7 @@ #include #include #include +#include "../../../../include/linux/sizes.h" int main(int argc, char *argv[]) { @@ -45,11 +46,11 @@ int main(int argc, char *argv[]) vmlinuz_load_addr = vmlinux_load_addr + vmlinux_size; /* - * Align with 16 bytes: "greater than that used for any standard data - * types by a MIPS compiler." -- See MIPS Run Linux (Second Edition). + * Align with 64KB: KEXEC needs load sections to be aligned to PAGE_SIZE, + * which may be as large as 64KB depending on the kernel configuration. */ - vmlinuz_load_addr += (16 - vmlinux_size % 16); + vmlinuz_load_addr += (SZ_64K - vmlinux_size % SZ_64K); printf("0x%llx\n", vmlinuz_load_addr); diff --git a/arch/mips/configs/ath79_defconfig b/arch/mips/configs/ath79_defconfig index 134879c1310a..4ed369c0ec6a 100644 --- a/arch/mips/configs/ath79_defconfig +++ b/arch/mips/configs/ath79_defconfig @@ -74,6 +74,7 @@ CONFIG_SERIAL_8250_CONSOLE=y # CONFIG_SERIAL_8250_PCI is not set CONFIG_SERIAL_8250_NR_UARTS=1 CONFIG_SERIAL_8250_RUNTIME_UARTS=1 +CONFIG_SERIAL_OF_PLATFORM=y CONFIG_SERIAL_AR933X=y CONFIG_SERIAL_AR933X_CONSOLE=y # CONFIG_HW_RANDOM is not set diff --git a/arch/mips/include/asm/jump_label.h b/arch/mips/include/asm/jump_label.h index e77672539e8e..e4456e450f94 100644 --- a/arch/mips/include/asm/jump_label.h +++ b/arch/mips/include/asm/jump_label.h @@ -21,15 +21,15 @@ #endif #ifdef CONFIG_CPU_MICROMIPS -#define NOP_INSN "nop32" +#define B_INSN "b32" #else -#define NOP_INSN "nop" +#define B_INSN "b" #endif static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { - asm_volatile_goto("1:\t" NOP_INSN "\n\t" - "nop\n\t" + asm_volatile_goto("1:\t" B_INSN " 2f\n\t" + "2:\tnop\n\t" ".pushsection __jump_table, \"aw\"\n\t" WORD_INSN " 1b, %l[l_yes], %0\n\t" ".popsection\n\t" diff --git a/arch/mips/include/asm/pgtable-64.h b/arch/mips/include/asm/pgtable-64.h index cf661a2fb141..16fade4f49dd 100644 --- a/arch/mips/include/asm/pgtable-64.h +++ b/arch/mips/include/asm/pgtable-64.h @@ -189,6 +189,11 @@ static inline int pmd_bad(pmd_t pmd) static inline int pmd_present(pmd_t pmd) { +#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT + if (unlikely(pmd_val(pmd) & _PAGE_HUGE)) + return pmd_val(pmd) & _PAGE_PRESENT; +#endif + return pmd_val(pmd) != (unsigned long) invalid_pte_table; } diff --git a/arch/mips/include/uapi/asm/inst.h b/arch/mips/include/uapi/asm/inst.h index 65c2f97a4535..dc0baeb052a7 100644 --- a/arch/mips/include/uapi/asm/inst.h +++ b/arch/mips/include/uapi/asm/inst.h @@ -291,8 +291,8 @@ enum mm_32a_minor_op { mm_ext_op = 0x02c, mm_pool32axf_op = 0x03c, mm_srl32_op = 0x040, + mm_srlv32_op = 0x050, mm_sra_op = 0x080, - mm_srlv32_op = 0x090, mm_rotr_op = 0x0c0, mm_lwxs_op = 0x118, mm_addu32_op = 0x150, diff --git a/arch/mips/jazz/jazzdma.c b/arch/mips/jazz/jazzdma.c index db6f5afff4ff..ea897912bc71 100644 --- a/arch/mips/jazz/jazzdma.c +++ b/arch/mips/jazz/jazzdma.c @@ -71,14 +71,15 @@ static int __init vdma_init(void) get_order(VDMA_PGTBL_SIZE)); BUG_ON(!pgtbl); dma_cache_wback_inv((unsigned long)pgtbl, VDMA_PGTBL_SIZE); - pgtbl = (VDMA_PGTBL_ENTRY *)KSEG1ADDR(pgtbl); + pgtbl = (VDMA_PGTBL_ENTRY *)CKSEG1ADDR((unsigned long)pgtbl); /* * Clear the R4030 translation table */ vdma_pgtbl_init(); - r4030_write_reg32(JAZZ_R4030_TRSTBL_BASE, CPHYSADDR(pgtbl)); + r4030_write_reg32(JAZZ_R4030_TRSTBL_BASE, + CPHYSADDR((unsigned long)pgtbl)); r4030_write_reg32(JAZZ_R4030_TRSTBL_LIM, VDMA_PGTBL_SIZE); r4030_write_reg32(JAZZ_R4030_TRSTBL_INV, 0); diff --git a/arch/mips/kernel/irq.c b/arch/mips/kernel/irq.c index dc1180a8bfa1..66736397af9f 100644 --- a/arch/mips/kernel/irq.c +++ b/arch/mips/kernel/irq.c @@ -52,6 +52,7 @@ asmlinkage void spurious_interrupt(void) void __init init_IRQ(void) { int i; + unsigned int order = get_order(IRQ_STACK_SIZE); for (i = 0; i < NR_IRQS; i++) irq_set_noprobe(i); @@ -59,8 +60,7 @@ void __init init_IRQ(void) arch_init_irq(); for_each_possible_cpu(i) { - int irq_pages = IRQ_STACK_SIZE / PAGE_SIZE; - void *s = (void *)__get_free_pages(GFP_KERNEL, irq_pages); + void *s = (void *)__get_free_pages(GFP_KERNEL, order); irq_stack[i] = s; pr_debug("CPU%d IRQ stack at 0x%p - 0x%p\n", i, diff --git a/arch/mips/kernel/mips-cm.c b/arch/mips/kernel/mips-cm.c index 36bd476d0760..1385dabc1891 100644 --- a/arch/mips/kernel/mips-cm.c +++ b/arch/mips/kernel/mips-cm.c @@ -450,5 +450,5 @@ void mips_cm_error_report(void) } /* reprime cause register */ - write_gcr_error_cause(0); + write_gcr_error_cause(cm_error); } diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index 33984e1b5583..ee61876bf3b5 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -343,7 +343,7 @@ static inline int is_sp_move_ins(union mips_instruction *ip) static int get_frame_info(struct mips_frame_info *info) { bool is_mmips = IS_ENABLED(CONFIG_CPU_MICROMIPS); - union mips_instruction insn, *ip, *ip_end; + union mips_instruction insn, *ip; const unsigned int max_insns = 128; unsigned int last_insn_size = 0; unsigned int i; @@ -355,10 +355,9 @@ static int get_frame_info(struct mips_frame_info *info) if (!ip) goto err; - ip_end = (void *)ip + info->func_size; - - for (i = 0; i < max_insns && ip < ip_end; i++) { + for (i = 0; i < max_insns; i++) { ip = (void *)ip + last_insn_size; + if (is_mmips && mm_insn_16bit(ip->halfword[0])) { insn.halfword[0] = 0; insn.halfword[1] = ip->halfword[0]; diff --git a/arch/mips/loongson64/lemote-2f/irq.c b/arch/mips/loongson64/lemote-2f/irq.c index cab5f43e0e29..d371f0294cbb 100644 --- a/arch/mips/loongson64/lemote-2f/irq.c +++ b/arch/mips/loongson64/lemote-2f/irq.c @@ -102,7 +102,7 @@ static struct irqaction ip6_irqaction = { static struct irqaction cascade_irqaction = { .handler = no_action, .name = "cascade", - .flags = IRQF_NO_THREAD, + .flags = IRQF_NO_THREAD | IRQF_NO_SUSPEND, }; void __init mach_init_irq(void) diff --git a/arch/mips/mm/gup.c b/arch/mips/mm/gup.c index 349995d19c7f..e596e0a1cecc 100644 --- a/arch/mips/mm/gup.c +++ b/arch/mips/mm/gup.c @@ -303,7 +303,7 @@ slow_irqon: ret = get_user_pages_unlocked(current, mm, start, (end - start) >> PAGE_SHIFT, - write, 0, pages); + pages, write ? FOLL_WRITE : 0); /* Have to be a bit careful with return values */ if (nr > 0) { diff --git a/arch/mips/pci/msi-octeon.c b/arch/mips/pci/msi-octeon.c index 2a5bb849b10e..288b58b00dc8 100644 --- a/arch/mips/pci/msi-octeon.c +++ b/arch/mips/pci/msi-octeon.c @@ -369,7 +369,9 @@ int __init octeon_msi_initialize(void) int irq; struct irq_chip *msi; - if (octeon_dma_bar_type == OCTEON_DMA_BAR_TYPE_PCIE) { + if (octeon_dma_bar_type == OCTEON_DMA_BAR_TYPE_INVALID) { + return 0; + } else if (octeon_dma_bar_type == OCTEON_DMA_BAR_TYPE_PCIE) { msi_rcv_reg[0] = CVMX_PEXP_NPEI_MSI_RCV0; msi_rcv_reg[1] = CVMX_PEXP_NPEI_MSI_RCV1; msi_rcv_reg[2] = CVMX_PEXP_NPEI_MSI_RCV2; diff --git a/arch/mips/pci/pci-octeon.c b/arch/mips/pci/pci-octeon.c index c258cd406fbb..b36bbda31058 100644 --- a/arch/mips/pci/pci-octeon.c +++ b/arch/mips/pci/pci-octeon.c @@ -571,6 +571,11 @@ static int __init octeon_pci_setup(void) if (octeon_has_feature(OCTEON_FEATURE_PCIE)) return 0; + if (!octeon_is_pci_host()) { + pr_notice("Not in host mode, PCI Controller not initialized\n"); + return 0; + } + /* Point pcibios_map_irq() to the PCI version of it */ octeon_pcibios_map_irq = octeon_pci_pcibios_map_irq; @@ -582,11 +587,6 @@ static int __init octeon_pci_setup(void) else octeon_dma_bar_type = OCTEON_DMA_BAR_TYPE_BIG; - if (!octeon_is_pci_host()) { - pr_notice("Not in host mode, PCI Controller not initialized\n"); - return 0; - } - /* PCI I/O and PCI MEM values */ set_io_port_base(OCTEON_PCI_IOSPACE_BASE); ioport_resource.start = 0; diff --git a/arch/mips/sibyte/common/Makefile b/arch/mips/sibyte/common/Makefile index b3d6bf23a662..3ef3fb658136 100644 --- a/arch/mips/sibyte/common/Makefile +++ b/arch/mips/sibyte/common/Makefile @@ -1,4 +1,5 @@ obj-y := cfe.o +obj-$(CONFIG_SWIOTLB) += dma.o obj-$(CONFIG_SIBYTE_BUS_WATCHER) += bus_watcher.o obj-$(CONFIG_SIBYTE_CFE_CONSOLE) += cfe_console.o obj-$(CONFIG_SIBYTE_TBPROF) += sb_tbprof.o diff --git a/arch/mips/sibyte/common/dma.c b/arch/mips/sibyte/common/dma.c new file mode 100644 index 000000000000..eb47a94f3583 --- /dev/null +++ b/arch/mips/sibyte/common/dma.c @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * DMA support for Broadcom SiByte platforms. + * + * Copyright (c) 2018 Maciej W. Rozycki + */ + +#include +#include + +void __init plat_swiotlb_setup(void) +{ + swiotlb_init(1); +} diff --git a/arch/mips/vdso/Makefile b/arch/mips/vdso/Makefile index 6c7d78546eee..886005b1e87d 100644 --- a/arch/mips/vdso/Makefile +++ b/arch/mips/vdso/Makefile @@ -107,7 +107,7 @@ $(obj)/%-o32.o: $(src)/%.c FORCE $(call cmd,force_checksrc) $(call if_changed_rule,cc_o_c) -$(obj)/vdso-o32.lds: KBUILD_CPPFLAGS := -mabi=32 +$(obj)/vdso-o32.lds: KBUILD_CPPFLAGS := $(ccflags-vdso) -mabi=32 $(obj)/vdso-o32.lds: $(src)/vdso.lds.S FORCE $(call if_changed_dep,cpp_lds_S) @@ -143,7 +143,7 @@ $(obj)/%-n32.o: $(src)/%.c FORCE $(call cmd,force_checksrc) $(call if_changed_rule,cc_o_c) -$(obj)/vdso-n32.lds: KBUILD_CPPFLAGS := -mabi=n32 +$(obj)/vdso-n32.lds: KBUILD_CPPFLAGS := $(ccflags-vdso) -mabi=n32 $(obj)/vdso-n32.lds: $(src)/vdso.lds.S FORCE $(call if_changed_dep,cpp_lds_S) diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index 99e4487248ff..57003d1bd243 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -70,7 +70,8 @@ $(addprefix $(obj)/,$(zlib) cuboot-c2k.o gunzip_util.o main.o): \ libfdt := fdt.c fdt_ro.c fdt_wip.c fdt_sw.c fdt_rw.c fdt_strerror.c libfdtheader := fdt.h libfdt.h libfdt_internal.h -$(addprefix $(obj)/,$(libfdt) libfdt-wrapper.o simpleboot.o epapr.o): \ +$(addprefix $(obj)/,$(libfdt) libfdt-wrapper.o simpleboot.o epapr.o \ + treeboot-akebono.o treeboot-currituck.o treeboot-iss4xx.o): \ $(addprefix $(obj)/,$(libfdtheader)) src-wlib-y := string.S crt0.S crtsavres.S stdio.c main.c \ diff --git a/arch/powerpc/boot/crt0.S b/arch/powerpc/boot/crt0.S index 5c2199857aa8..a3550e8f1a77 100644 --- a/arch/powerpc/boot/crt0.S +++ b/arch/powerpc/boot/crt0.S @@ -15,7 +15,7 @@ RELA = 7 RELACOUNT = 0x6ffffff9 - .text + .data /* A procedure descriptor used when booting this as a COFF file. * When making COFF, this comes first in the link and we're * linked at 0x500000. @@ -23,6 +23,8 @@ RELACOUNT = 0x6ffffff9 .globl _zimage_start_opd _zimage_start_opd: .long 0x500000, 0, 0, 0 + .text + b _zimage_start #ifdef __powerpc64__ .balign 8 diff --git a/arch/powerpc/include/asm/epapr_hcalls.h b/arch/powerpc/include/asm/epapr_hcalls.h index 334459ad145b..90863245df53 100644 --- a/arch/powerpc/include/asm/epapr_hcalls.h +++ b/arch/powerpc/include/asm/epapr_hcalls.h @@ -508,7 +508,7 @@ static unsigned long epapr_hypercall(unsigned long *in, static inline long epapr_hypercall0_1(unsigned int nr, unsigned long *r2) { - unsigned long in[8]; + unsigned long in[8] = {0}; unsigned long out[8]; unsigned long r; @@ -520,7 +520,7 @@ static inline long epapr_hypercall0_1(unsigned int nr, unsigned long *r2) static inline long epapr_hypercall0(unsigned int nr) { - unsigned long in[8]; + unsigned long in[8] = {0}; unsigned long out[8]; return epapr_hypercall(in, out, nr); @@ -528,7 +528,7 @@ static inline long epapr_hypercall0(unsigned int nr) static inline long epapr_hypercall1(unsigned int nr, unsigned long p1) { - unsigned long in[8]; + unsigned long in[8] = {0}; unsigned long out[8]; in[0] = p1; @@ -538,7 +538,7 @@ static inline long epapr_hypercall1(unsigned int nr, unsigned long p1) static inline long epapr_hypercall2(unsigned int nr, unsigned long p1, unsigned long p2) { - unsigned long in[8]; + unsigned long in[8] = {0}; unsigned long out[8]; in[0] = p1; @@ -549,7 +549,7 @@ static inline long epapr_hypercall2(unsigned int nr, unsigned long p1, static inline long epapr_hypercall3(unsigned int nr, unsigned long p1, unsigned long p2, unsigned long p3) { - unsigned long in[8]; + unsigned long in[8] = {0}; unsigned long out[8]; in[0] = p1; @@ -562,7 +562,7 @@ static inline long epapr_hypercall4(unsigned int nr, unsigned long p1, unsigned long p2, unsigned long p3, unsigned long p4) { - unsigned long in[8]; + unsigned long in[8] = {0}; unsigned long out[8]; in[0] = p1; diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index db71448b9bb9..1542c4e009c8 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -59,7 +59,7 @@ #endif #define access_ok(type, addr, size) \ - (__chk_user_ptr(addr), \ + (__chk_user_ptr(addr), (void)(type), \ __access_ok((__force unsigned long)(addr), (size), get_fs())) /* diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 2405631e91a2..3728e617e17e 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -685,6 +685,9 @@ fast_exception_return: mtcr r10 lwz r10,_LINK(r11) mtlr r10 + /* Clear the exception_marker on the stack to avoid confusing stacktrace */ + li r10, 0 + stw r10, 8(r11) REST_GPR(10, r11) mtspr SPRN_SRR1,r9 mtspr SPRN_SRR0,r12 @@ -915,6 +918,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX) mtcrf 0xFF,r10 mtlr r11 + /* Clear the exception_marker on the stack to avoid confusing stacktrace */ + li r10, 0 + stw r10, 8(r1) /* * Once we put values in SRR0 and SRR1, we are in a state * where exceptions are not recoverable, since taking an @@ -952,6 +958,9 @@ exc_exit_restart_end: mtlr r11 lwz r10,_CCR(r1) mtcrf 0xff,r10 + /* Clear the exception_marker on the stack to avoid confusing stacktrace */ + li r10, 0 + stw r10, 8(r1) REST_2GPRS(9, r1) .globl exc_exit_restart exc_exit_restart: diff --git a/arch/powerpc/kernel/msi.c b/arch/powerpc/kernel/msi.c index dab616a33b8d..f2197654be07 100644 --- a/arch/powerpc/kernel/msi.c +++ b/arch/powerpc/kernel/msi.c @@ -34,5 +34,10 @@ void arch_teardown_msi_irqs(struct pci_dev *dev) { struct pci_controller *phb = pci_bus_to_host(dev->bus); - phb->controller_ops.teardown_msi_irqs(dev); + /* + * We can be called even when arch_setup_msi_irqs() returns -ENOSYS, + * so check the pointer again. + */ + if (phb->controller_ops.teardown_msi_irqs) + phb->controller_ops.teardown_msi_irqs(dev); } diff --git a/arch/powerpc/platforms/83xx/suspend-asm.S b/arch/powerpc/platforms/83xx/suspend-asm.S index 3d1ecd211776..8137f77abad5 100644 --- a/arch/powerpc/platforms/83xx/suspend-asm.S +++ b/arch/powerpc/platforms/83xx/suspend-asm.S @@ -26,13 +26,13 @@ #define SS_MSR 0x74 #define SS_SDR1 0x78 #define SS_LR 0x7c -#define SS_SPRG 0x80 /* 4 SPRGs */ -#define SS_DBAT 0x90 /* 8 DBATs */ -#define SS_IBAT 0xd0 /* 8 IBATs */ -#define SS_TB 0x110 -#define SS_CR 0x118 -#define SS_GPREG 0x11c /* r12-r31 */ -#define STATE_SAVE_SIZE 0x16c +#define SS_SPRG 0x80 /* 8 SPRGs */ +#define SS_DBAT 0xa0 /* 8 DBATs */ +#define SS_IBAT 0xe0 /* 8 IBATs */ +#define SS_TB 0x120 +#define SS_CR 0x128 +#define SS_GPREG 0x12c /* r12-r31 */ +#define STATE_SAVE_SIZE 0x17c .section .data .align 5 @@ -103,6 +103,16 @@ _GLOBAL(mpc83xx_enter_deep_sleep) stw r7, SS_SPRG+12(r3) stw r8, SS_SDR1(r3) + mfspr r4, SPRN_SPRG4 + mfspr r5, SPRN_SPRG5 + mfspr r6, SPRN_SPRG6 + mfspr r7, SPRN_SPRG7 + + stw r4, SS_SPRG+16(r3) + stw r5, SS_SPRG+20(r3) + stw r6, SS_SPRG+24(r3) + stw r7, SS_SPRG+28(r3) + mfspr r4, SPRN_DBAT0U mfspr r5, SPRN_DBAT0L mfspr r6, SPRN_DBAT1U @@ -493,6 +503,16 @@ mpc83xx_deep_resume: mtspr SPRN_IBAT7U, r6 mtspr SPRN_IBAT7L, r7 + lwz r4, SS_SPRG+16(r3) + lwz r5, SS_SPRG+20(r3) + lwz r6, SS_SPRG+24(r3) + lwz r7, SS_SPRG+28(r3) + + mtspr SPRN_SPRG4, r4 + mtspr SPRN_SPRG5, r5 + mtspr SPRN_SPRG6, r6 + mtspr SPRN_SPRG7, r7 + lwz r4, SS_SPRG+0(r3) lwz r5, SS_SPRG+4(r3) lwz r6, SS_SPRG+8(r3) diff --git a/arch/powerpc/platforms/embedded6xx/wii.c b/arch/powerpc/platforms/embedded6xx/wii.c index 352592d3e44e..7fd19a480422 100644 --- a/arch/powerpc/platforms/embedded6xx/wii.c +++ b/arch/powerpc/platforms/embedded6xx/wii.c @@ -104,6 +104,10 @@ unsigned long __init wii_mmu_mapin_mem2(unsigned long top) /* MEM2 64MB@0x10000000 */ delta = wii_hole_start + wii_hole_size; size = top - delta; + + if (__map_without_bats) + return delta; + for (bl = 128<<10; bl < max_size; bl <<= 1) { if (bl * 2 > size) break; diff --git a/arch/powerpc/platforms/powernv/opal-msglog.c b/arch/powerpc/platforms/powernv/opal-msglog.c index 44ed78af1a0d..9021b7272889 100644 --- a/arch/powerpc/platforms/powernv/opal-msglog.c +++ b/arch/powerpc/platforms/powernv/opal-msglog.c @@ -92,7 +92,7 @@ out: } static struct bin_attribute opal_msglog_attr = { - .attr = {.name = "msglog", .mode = 0444}, + .attr = {.name = "msglog", .mode = 0400}, .read = opal_msglog_read }; diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c index 96536c969c9c..a8efed3b4691 100644 --- a/arch/powerpc/platforms/pseries/dlpar.c +++ b/arch/powerpc/platforms/pseries/dlpar.c @@ -280,6 +280,8 @@ int dlpar_detach_node(struct device_node *dn) if (rc) return rc; + of_node_put(dn); + return 0; } diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 8eccead675d4..cc7b450a7766 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -224,10 +224,10 @@ static noinline __init void detect_machine_type(void) if (stsi(vmms, 3, 2, 2) || !vmms->count) return; - /* Running under KVM? If not we assume z/VM */ + /* Detect known hypervisors */ if (!memcmp(vmms->vm[0].cpi, "\xd2\xe5\xd4", 3)) S390_lowcore.machine_flags |= MACHINE_FLAG_KVM; - else + else if (!memcmp(vmms->vm[0].cpi, "\xa9\x61\xe5\xd4", 4)) S390_lowcore.machine_flags |= MACHINE_FLAG_VM; } diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 929c147e07b4..1b69bfdf59f9 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -344,6 +344,8 @@ static int __hw_perf_event_init(struct perf_event *event) break; case PERF_TYPE_HARDWARE: + if (is_sampling_event(event)) /* No sampling support */ + return -ENOENT; ev = attr->config; /* Count user space (problem-state) only */ if (!attr->exclude_user && attr->exclude_kernel) { diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index e7a43a30e3ff..47692c78d09c 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -833,6 +833,8 @@ void __init setup_arch(char **cmdline_p) pr_info("Linux is running under KVM in 64-bit mode\n"); else if (MACHINE_IS_LPAR) pr_info("Linux is running natively in 64-bit mode\n"); + else + pr_info("Linux is running as a guest in 64-bit mode\n"); /* Have one command line that is parsed and saved in /proc/cmdline */ /* boot_command_line has been already set up in early.c */ diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 77f4f334a465..29e5409c0d48 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -360,9 +360,13 @@ void smp_call_online_cpu(void (*func)(void *), void *data) */ void smp_call_ipl_cpu(void (*func)(void *), void *data) { + struct _lowcore *lc = pcpu_devices->lowcore; + + if (pcpu_devices[0].address == stap()) + lc = &S390_lowcore; + pcpu_delegate(&pcpu_devices[0], func, data, - pcpu_devices->lowcore->panic_stack - - PANIC_FRAME_OFFSET + PAGE_SIZE); + lc->panic_stack - PANIC_FRAME_OFFSET + PAGE_SIZE); } int smp_find_processor_id(u16 address) @@ -1152,7 +1156,11 @@ static ssize_t __ref rescan_store(struct device *dev, { int rc; + rc = lock_device_hotplug_sysfs(); + if (rc) + return rc; rc = smp_rescan_cpus(); + unlock_device_hotplug(); return rc ? rc : count; } static DEVICE_ATTR(rescan, 0200, NULL, rescan_store); diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c index 12bbf0e8478f..7ad41be8b373 100644 --- a/arch/s390/mm/gup.c +++ b/arch/s390/mm/gup.c @@ -242,7 +242,7 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write, start += nr << PAGE_SHIFT; pages += nr; ret = get_user_pages_unlocked(current, mm, start, - nr_pages - nr, write, 0, pages); + nr_pages - nr, pages, write ? FOLL_WRITE : 0); /* Have to be a bit careful with return values */ if (nr > 0) ret = (ret < 0) ? nr : ret + nr; diff --git a/arch/sh/mm/gup.c b/arch/sh/mm/gup.c index e7af6a65baab..8c51a0e94854 100644 --- a/arch/sh/mm/gup.c +++ b/arch/sh/mm/gup.c @@ -258,7 +258,8 @@ slow_irqon: pages += nr; ret = get_user_pages_unlocked(current, mm, start, - (end - start) >> PAGE_SHIFT, write, 0, pages); + (end - start) >> PAGE_SHIFT, pages, + write ? FOLL_WRITE : 0); /* Have to be a bit careful with return values */ if (nr > 0) { diff --git a/arch/sparc/mm/gup.c b/arch/sparc/mm/gup.c index 2e5c4fc2daa9..150f48303fb0 100644 --- a/arch/sparc/mm/gup.c +++ b/arch/sparc/mm/gup.c @@ -250,7 +250,8 @@ slow: pages += nr; ret = get_user_pages_unlocked(current, mm, start, - (end - start) >> PAGE_SHIFT, write, 0, pages); + (end - start) >> PAGE_SHIFT, pages, + write ? FOLL_WRITE : 0); /* Have to be a bit careful with return values */ if (nr > 0) { diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h index 18eb9924dda3..aeb430212947 100644 --- a/arch/um/include/asm/pgtable.h +++ b/arch/um/include/asm/pgtable.h @@ -197,12 +197,17 @@ static inline pte_t pte_mkold(pte_t pte) static inline pte_t pte_wrprotect(pte_t pte) { - pte_clear_bits(pte, _PAGE_RW); + if (likely(pte_get_bits(pte, _PAGE_RW))) + pte_clear_bits(pte, _PAGE_RW); + else + return pte; return(pte_mknewprot(pte)); } static inline pte_t pte_mkread(pte_t pte) { + if (unlikely(pte_get_bits(pte, _PAGE_USER))) + return pte; pte_set_bits(pte, _PAGE_USER); return(pte_mknewprot(pte)); } @@ -221,6 +226,8 @@ static inline pte_t pte_mkyoung(pte_t pte) static inline pte_t pte_mkwrite(pte_t pte) { + if (unlikely(pte_get_bits(pte, _PAGE_RW))) + return pte; pte_set_bits(pte, _PAGE_RW); return(pte_mknewprot(pte)); } diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 323f8df55e80..e01063a1875a 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1993,14 +1993,8 @@ config PHYSICAL_ALIGN Don't change this unless you know what you are doing. config HOTPLUG_CPU - bool "Support for hot-pluggable CPUs" + def_bool y depends on SMP - ---help--- - Say Y here to allow turning CPUs off and on. CPUs can be - controlled through /sys/devices/system/cpu. - ( Note: power management support will enable this option - automatically on SMP systems. ) - Say N if you want to disable CPU hotplug. config BOOTPARAM_HOTPLUG_CPU0 bool "Set default setting of cpu0_hotpluggable" diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index 6171d9153a33..3f54dfba7a0b 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -107,7 +107,7 @@ $(obj)/zoffset.h: $(obj)/compressed/vmlinux FORCE AFLAGS_header.o += -I$(obj) $(obj)/header.o: $(obj)/voffset.h $(obj)/zoffset.h -LDFLAGS_setup.elf := -T +LDFLAGS_setup.elf := -m elf_i386 -T $(obj)/setup.elf: $(src)/setup.ld $(SETUP_OBJS) FORCE $(call if_changed,ld) diff --git a/arch/x86/boot/compressed/aslr.c b/arch/x86/boot/compressed/aslr.c index 31dab2135188..21332b431f10 100644 --- a/arch/x86/boot/compressed/aslr.c +++ b/arch/x86/boot/compressed/aslr.c @@ -25,8 +25,8 @@ static inline u16 i8254(void) u16 status, timer; do { - outb(I8254_PORT_CONTROL, - I8254_CMD_READBACK | I8254_SELECT_COUNTER0); + outb(I8254_CMD_READBACK | I8254_SELECT_COUNTER0, + I8254_PORT_CONTROL); status = inb(I8254_PORT_COUNTER0); timer = inb(I8254_PORT_COUNTER0); timer |= inb(I8254_PORT_COUNTER0) << 8; diff --git a/arch/x86/configs/i386_ranchu_defconfig b/arch/x86/configs/i386_ranchu_defconfig index dde67eacd8e2..36efc448ab0e 100644 --- a/arch/x86/configs/i386_ranchu_defconfig +++ b/arch/x86/configs/i386_ranchu_defconfig @@ -254,7 +254,6 @@ CONFIG_TABLET_USB_HANWANG=y CONFIG_TABLET_USB_KBTAB=y CONFIG_INPUT_TOUCHSCREEN=y CONFIG_INPUT_MISC=y -CONFIG_INPUT_KEYCHORD=y CONFIG_INPUT_UINPUT=y CONFIG_INPUT_GPIO=y # CONFIG_SERIO is not set diff --git a/arch/x86/configs/x86_64_cuttlefish_defconfig b/arch/x86/configs/x86_64_cuttlefish_defconfig index 8e4a038e9ac2..faa0479f17bd 100644 --- a/arch/x86/configs/x86_64_cuttlefish_defconfig +++ b/arch/x86/configs/x86_64_cuttlefish_defconfig @@ -191,6 +191,8 @@ CONFIG_NET_CLS_U32=y CONFIG_NET_EMATCH=y CONFIG_NET_EMATCH_U32=y CONFIG_NET_CLS_ACT=y +CONFIG_VSOCKETS=y +CONFIG_VIRTIO_VSOCKETS=y CONFIG_CFG80211=y CONFIG_MAC80211=y CONFIG_RFKILL=y @@ -223,7 +225,6 @@ CONFIG_DM_MIRROR=y CONFIG_DM_ZERO=y CONFIG_DM_UEVENT=y CONFIG_DM_VERITY=y -CONFIG_DM_VERITY_HASH_PREFETCH_MIN_SIZE=1 CONFIG_DM_VERITY_FEC=y CONFIG_DM_ANDROID_VERITY=y CONFIG_NETDEVICES=y @@ -263,7 +264,6 @@ CONFIG_TABLET_USB_GTCO=y CONFIG_TABLET_USB_HANWANG=y CONFIG_TABLET_USB_KBTAB=y CONFIG_INPUT_MISC=y -CONFIG_INPUT_KEYCHORD=y CONFIG_INPUT_UINPUT=y CONFIG_INPUT_GPIO=y # CONFIG_SERIO_I8042 is not set @@ -305,6 +305,12 @@ CONFIG_DRM=y CONFIG_DRM_VIRTIO_GPU=y CONFIG_SOUND=y CONFIG_SND=y +CONFIG_SND_HRTIMER=y +# CONFIG_SND_SUPPORT_OLD_API is not set +# CONFIG_SND_VERBOSE_PROCFS is not set +# CONFIG_SND_DRIVERS is not set +CONFIG_SND_INTEL8X0=y +# CONFIG_SND_USB is not set CONFIG_HIDRAW=y CONFIG_UHID=y CONFIG_HID_A4TECH=y @@ -377,9 +383,9 @@ CONFIG_USB_CONFIGFS_F_AUDIO_SRC=y CONFIG_USB_CONFIGFS_UEVENT=y CONFIG_USB_CONFIGFS_F_MIDI=y CONFIG_RTC_CLASS=y -# CONFIG_RTC_HCTOSYS is not set CONFIG_VIRTIO_PCI=y CONFIG_VIRTIO_BALLOON=y +CONFIG_VIRTIO_INPUT=y CONFIG_VIRTIO_MMIO=y CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y CONFIG_STAGING=y @@ -409,6 +415,7 @@ CONFIG_QUOTA_NETLINK_INTERFACE=y CONFIG_QFMT_V2=y CONFIG_AUTOFS4_FS=y CONFIG_FUSE_FS=y +CONFIG_OVERLAY_FS=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_PROC_KCORE=y @@ -440,6 +447,7 @@ CONFIG_SCHEDSTATS=y CONFIG_TIMER_STATS=y CONFIG_RCU_CPU_STALL_TIMEOUT=60 CONFIG_ENABLE_DEFAULT_TRACERS=y +CONFIG_DEBUG_SET_MODULE_RONX=y CONFIG_IO_DELAY_NONE=y CONFIG_DEBUG_BOOT_PARAMS=y CONFIG_OPTIMIZE_INLINING=y diff --git a/arch/x86/configs/x86_64_ranchu_defconfig b/arch/x86/configs/x86_64_ranchu_defconfig index 5cf2450842ab..6c983f2262d3 100644 --- a/arch/x86/configs/x86_64_ranchu_defconfig +++ b/arch/x86/configs/x86_64_ranchu_defconfig @@ -281,7 +281,6 @@ CONFIG_TABLET_USB_HANWANG=y CONFIG_TABLET_USB_KBTAB=y CONFIG_INPUT_TOUCHSCREEN=y CONFIG_INPUT_MISC=y -CONFIG_INPUT_KEYCHORD=y CONFIG_INPUT_UINPUT=y CONFIG_INPUT_GPIO=y # CONFIG_SERIO is not set diff --git a/arch/x86/crypto/chacha20_glue.c b/arch/x86/crypto/chacha20_glue.c index 8baaff5af0b5..75b9d43069f1 100644 --- a/arch/x86/crypto/chacha20_glue.c +++ b/arch/x86/crypto/chacha20_glue.c @@ -77,6 +77,7 @@ static int chacha20_simd(struct blkcipher_desc *desc, struct scatterlist *dst, blkcipher_walk_init(&walk, dst, src, nbytes); err = blkcipher_walk_virt_block(desc, &walk, CHACHA20_BLOCK_SIZE); + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; crypto_chacha20_init(state, crypto_blkcipher_ctx(desc->tfm), walk.iv); diff --git a/arch/x86/crypto/poly1305-avx2-x86_64.S b/arch/x86/crypto/poly1305-avx2-x86_64.S index eff2f414e22b..ec234c43b3f4 100644 --- a/arch/x86/crypto/poly1305-avx2-x86_64.S +++ b/arch/x86/crypto/poly1305-avx2-x86_64.S @@ -321,6 +321,12 @@ ENTRY(poly1305_4block_avx2) vpaddq t2,t1,t1 vmovq t1x,d4 + # Now do a partial reduction mod (2^130)-5, carrying h0 -> h1 -> h2 -> + # h3 -> h4 -> h0 -> h1 to get h0,h2,h3,h4 < 2^26 and h1 < 2^26 + a small + # amount. Careful: we must not assume the carry bits 'd0 >> 26', + # 'd1 >> 26', 'd2 >> 26', 'd3 >> 26', and '(d4 >> 26) * 5' fit in 32-bit + # integers. It's true in a single-block implementation, but not here. + # d1 += d0 >> 26 mov d0,%rax shr $26,%rax @@ -359,16 +365,16 @@ ENTRY(poly1305_4block_avx2) # h0 += (d4 >> 26) * 5 mov d4,%rax shr $26,%rax - lea (%eax,%eax,4),%eax - add %eax,%ebx + lea (%rax,%rax,4),%rax + add %rax,%rbx # h4 = d4 & 0x3ffffff mov d4,%rax and $0x3ffffff,%eax mov %eax,h4 # h1 += h0 >> 26 - mov %ebx,%eax - shr $26,%eax + mov %rbx,%rax + shr $26,%rax add %eax,h1 # h0 = h0 & 0x3ffffff andl $0x3ffffff,%ebx diff --git a/arch/x86/crypto/poly1305-sse2-x86_64.S b/arch/x86/crypto/poly1305-sse2-x86_64.S index 338c748054ed..639d9760b089 100644 --- a/arch/x86/crypto/poly1305-sse2-x86_64.S +++ b/arch/x86/crypto/poly1305-sse2-x86_64.S @@ -251,16 +251,16 @@ ENTRY(poly1305_block_sse2) # h0 += (d4 >> 26) * 5 mov d4,%rax shr $26,%rax - lea (%eax,%eax,4),%eax - add %eax,%ebx + lea (%rax,%rax,4),%rax + add %rax,%rbx # h4 = d4 & 0x3ffffff mov d4,%rax and $0x3ffffff,%eax mov %eax,h4 # h1 += h0 >> 26 - mov %ebx,%eax - shr $26,%eax + mov %rbx,%rax + shr $26,%rax add %eax,h1 # h0 = h0 & 0x3ffffff andl $0x3ffffff,%ebx @@ -518,6 +518,12 @@ ENTRY(poly1305_2block_sse2) paddq t2,t1 movq t1,d4 + # Now do a partial reduction mod (2^130)-5, carrying h0 -> h1 -> h2 -> + # h3 -> h4 -> h0 -> h1 to get h0,h2,h3,h4 < 2^26 and h1 < 2^26 + a small + # amount. Careful: we must not assume the carry bits 'd0 >> 26', + # 'd1 >> 26', 'd2 >> 26', 'd3 >> 26', and '(d4 >> 26) * 5' fit in 32-bit + # integers. It's true in a single-block implementation, but not here. + # d1 += d0 >> 26 mov d0,%rax shr $26,%rax @@ -556,16 +562,16 @@ ENTRY(poly1305_2block_sse2) # h0 += (d4 >> 26) * 5 mov d4,%rax shr $26,%rax - lea (%eax,%eax,4),%eax - add %eax,%ebx + lea (%rax,%rax,4),%rax + add %rax,%rbx # h4 = d4 & 0x3ffffff mov d4,%rax and $0x3ffffff,%eax mov %eax,h4 # h1 += h0 >> 26 - mov %ebx,%eax - shr $26,%eax + mov %rbx,%rax + shr $26,%rax add %eax,h1 # h0 = h0 & 0x3ffffff andl $0x3ffffff,%ebx diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index 32acb36d0a9a..c2962dac577b 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -44,10 +44,8 @@ targets += $(vdso_img_sodbg) export CPPFLAGS_vdso.lds += -P -C -VDSO_LDFLAGS_vdso.lds = -m64 -Wl,-soname=linux-vdso.so.1 \ - -Wl,--no-undefined \ - -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096 \ - $(DISABLE_LTO) +VDSO_LDFLAGS_vdso.lds = -m elf_x86_64 -soname linux-vdso.so.1 --no-undefined \ + -z max-page-size=4096 $(obj)/vdso64.so.dbg: $(src)/vdso.lds $(vobjs) FORCE $(call if_changed,vdso) @@ -93,10 +91,8 @@ CFLAGS_REMOVE_vvar.o = -pg # CPPFLAGS_vdsox32.lds = $(CPPFLAGS_vdso.lds) -VDSO_LDFLAGS_vdsox32.lds = -Wl,-m,elf32_x86_64 \ - -Wl,-soname=linux-vdso.so.1 \ - -Wl,-z,max-page-size=4096 \ - -Wl,-z,common-page-size=4096 +VDSO_LDFLAGS_vdsox32.lds = -m elf32_x86_64 -soname linux-vdso.so.1 \ + -z max-page-size=4096 # 64-bit objects to re-brand as x32 vobjs64-for-x32 := $(filter-out $(vobjs-nox32),$(vobjs-y)) @@ -124,7 +120,7 @@ $(obj)/vdsox32.so.dbg: $(src)/vdsox32.lds $(vobjx32s) FORCE $(call if_changed,vdso) CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds) -VDSO_LDFLAGS_vdso32.lds = -m32 -Wl,-m,elf_i386 -Wl,-soname=linux-gate.so.1 +VDSO_LDFLAGS_vdso32.lds = -m elf_i386 -soname linux-gate.so.1 # This makes sure the $(obj) subdirectory exists even though vdso32/ # is not a kbuild sub-make subdirectory. @@ -160,14 +156,13 @@ $(obj)/vdso32.so.dbg: FORCE \ # The DSO images are built using a special linker script. # quiet_cmd_vdso = VDSO $@ - cmd_vdso = $(CC) -nostdlib -o $@ \ + cmd_vdso = $(LD) -nostdlib -o $@ \ $(VDSO_LDFLAGS) $(VDSO_LDFLAGS_$(filter %.lds,$(^F))) \ - -Wl,-T,$(filter %.lds,$^) $(filter %.o,$^) && \ + -T $(filter %.lds,$^) $(filter %.o,$^) && \ sh $(srctree)/$(src)/checkundef.sh '$(NM)' '$@' -VDSO_LDFLAGS = -fPIC -shared $(call cc-ldoption, -Wl$(comma)--hash-style=both) \ - $(call cc-ldoption, -Wl$(comma)--build-id) -Wl,-Bsymbolic $(LTO_CFLAGS) \ - $(filter --target=% --gcc-toolchain=%,$(KBUILD_CFLAGS)) +VDSO_LDFLAGS = -shared $(call ld-option, --hash-style=both) \ + $(call ld-option, --build-id) -Bsymbolic GCOV_PROFILE := n # diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c index ae6aad1d24f7..b348c4641312 100644 --- a/arch/x86/ia32/ia32_aout.c +++ b/arch/x86/ia32/ia32_aout.c @@ -50,7 +50,7 @@ static unsigned long get_dr(int n) /* * fill in the user structure for a core dump.. */ -static void dump_thread32(struct pt_regs *regs, struct user32 *dump) +static void fill_dump(struct pt_regs *regs, struct user32 *dump) { u32 fs, gs; memset(dump, 0, sizeof(*dump)); @@ -156,10 +156,12 @@ static int aout_core_dump(struct coredump_params *cprm) fs = get_fs(); set_fs(KERNEL_DS); has_dumped = 1; + + fill_dump(cprm->regs, &dump); + strncpy(dump.u_comm, current->comm, sizeof(current->comm)); dump.u_ar0 = offsetof(struct user32, regs); dump.signal = cprm->siginfo->si_signo; - dump_thread32(cprm->regs, &dump); /* * If the size of the dump file exceeds the rlimit, then see diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 16825dda18dc..66a5e60f60c4 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -94,6 +94,9 @@ extern void fpstate_sanitize_xstate(struct fpu *fpu); #define user_insn(insn, output, input...) \ ({ \ int err; \ + \ + might_fault(); \ + \ asm volatile(ASM_STAC "\n" \ "1:" #insn "\n\t" \ "2: " ASM_CLAC "\n" \ diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 3a37cdbdfbaa..2cb49ac1b2b2 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -765,7 +765,7 @@ struct kvm_x86_ops { int (*hardware_setup)(void); /* __init */ void (*hardware_unsetup)(void); /* __exit */ bool (*cpu_has_accelerated_tpr)(void); - bool (*cpu_has_high_real_mode_segbase)(void); + bool (*has_emulated_msr)(int index); void (*cpuid_update)(struct kvm_vcpu *vcpu); /* Create, but do not attach this VCPU */ @@ -1200,7 +1200,7 @@ asmlinkage void kvm_spurious_fault(void); "cmpb $0, kvm_rebooting \n\t" \ "jne 668b \n\t" \ __ASM_SIZE(push) " $666b \n\t" \ - "call kvm_spurious_fault \n\t" \ + "jmp kvm_spurious_fault \n\t" \ ".popsection \n\t" \ _ASM_EXTABLE(666b, 667b) diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index 4928cf0d5af0..fb1251946b45 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h @@ -2,7 +2,11 @@ #define _ASM_X86_PAGE_64_DEFS_H #ifdef CONFIG_KASAN +#ifdef CONFIG_KASAN_EXTRA +#define KASAN_STACK_ORDER 2 +#else #define KASAN_STACK_ORDER 1 +#endif #else #define KASAN_STACK_ORDER 0 #endif diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 561be63b61ab..c14f699f5c36 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -170,17 +170,6 @@ static inline struct thread_info *current_thread_info(void) return (struct thread_info *)(current_top_of_stack() - THREAD_SIZE); } -static inline unsigned long current_stack_pointer(void) -{ - unsigned long sp; -#ifdef CONFIG_X86_64 - asm("mov %%rsp,%0" : "=g" (sp)); -#else - asm("mov %%esp,%0" : "=g" (sp)); -#endif - return sp; -} - /* * Walks up the stack frames to make sure that the specified object is * entirely contained by a single stack frame. diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 8857f6f4daa9..9958e4b32f41 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -151,6 +151,11 @@ extern int __get_user_bad(void); #define __uaccess_begin() stac() #define __uaccess_end() clac() +#define __uaccess_begin_nospec() \ +({ \ + stac(); \ + barrier_nospec(); \ +}) /* * This is a type: either unsigned long, if the argument fits into @@ -314,8 +319,7 @@ do { \ __put_user_asm(x, ptr, retval, "l", "k", "ir", errret); \ break; \ case 8: \ - __put_user_asm_u64((__typeof__(*ptr))(x), ptr, retval, \ - errret); \ + __put_user_asm_u64(x, ptr, retval, errret); \ break; \ default: \ __put_user_bad(); \ @@ -426,8 +430,10 @@ do { \ #define __put_user_nocheck(x, ptr, size) \ ({ \ int __pu_err; \ + __typeof__(*(ptr)) __pu_val; \ + __pu_val = x; \ __uaccess_begin(); \ - __put_user_size((x), (ptr), (size), __pu_err, -EFAULT); \ + __put_user_size(__pu_val, (ptr), (size), __pu_err, -EFAULT);\ __uaccess_end(); \ __builtin_expect(__pu_err, 0); \ }) @@ -436,7 +442,7 @@ do { \ ({ \ int __gu_err; \ unsigned long __gu_val; \ - __uaccess_begin(); \ + __uaccess_begin_nospec(); \ __get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT); \ __uaccess_end(); \ (x) = (__force __typeof__(*(ptr)))__gu_val; \ @@ -478,6 +484,10 @@ struct __large_struct { unsigned long buf[100]; }; __uaccess_begin(); \ barrier(); +#define uaccess_try_nospec do { \ + current_thread_info()->uaccess_err = 0; \ + __uaccess_begin_nospec(); \ + #define uaccess_catch(err) \ __uaccess_end(); \ (err) |= (current_thread_info()->uaccess_err ? -EFAULT : 0); \ @@ -542,7 +552,7 @@ struct __large_struct { unsigned long buf[100]; }; * get_user_ex(...); * } get_user_catch(err) */ -#define get_user_try uaccess_try +#define get_user_try uaccess_try_nospec #define get_user_catch(err) uaccess_catch(err) #define get_user_ex(x, ptr) do { \ @@ -577,7 +587,7 @@ extern void __cmpxchg_wrong_size(void) __typeof__(ptr) __uval = (uval); \ __typeof__(*(ptr)) __old = (old); \ __typeof__(*(ptr)) __new = (new); \ - __uaccess_begin(); \ + __uaccess_begin_nospec(); \ switch (size) { \ case 1: \ { \ diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h index 7d3bdd1ed697..d21eb05b16eb 100644 --- a/arch/x86/include/asm/uaccess_32.h +++ b/arch/x86/include/asm/uaccess_32.h @@ -33,11 +33,47 @@ unsigned long __must_check __copy_from_user_ll_nocache_nozero * the specified block with access_ok() before calling this function. * The caller should also make sure he pins the user space address * so that we don't result in page fault and sleep. + * + * Here we special-case 1, 2 and 4-byte copy_*_user invocations. On a fault + * we return the initial request size (1, 2 or 4), as copy_*_user should do. + * If a store crosses a page boundary and gets a fault, the x86 will not write + * anything, so this is accurate. */ + static __always_inline unsigned long __must_check __copy_to_user_inatomic(void __user *to, const void *from, unsigned long n) { check_object_size(from, n, true); + if (__builtin_constant_p(n)) { + unsigned long ret; + + switch (n) { + case 1: + __uaccess_begin_nospec(); + __put_user_size(*(u8 *)from, (u8 __user *)to, + 1, ret, 1); + __uaccess_end(); + return ret; + case 2: + __uaccess_begin_nospec(); + __put_user_size(*(u16 *)from, (u16 __user *)to, + 2, ret, 2); + __uaccess_end(); + return ret; + case 4: + __uaccess_begin_nospec(); + __put_user_size(*(u32 *)from, (u32 __user *)to, + 4, ret, 4); + __uaccess_end(); + return ret; + case 8: + __uaccess_begin_nospec(); + __put_user_size(*(u64 *)from, (u64 __user *)to, + 8, ret, 8); + __uaccess_end(); + return ret; + } + } return __copy_to_user_ll(to, from, n); } @@ -66,6 +102,32 @@ __copy_to_user(void __user *to, const void *from, unsigned long n) static __always_inline unsigned long __copy_from_user_inatomic(void *to, const void __user *from, unsigned long n) { + /* Avoid zeroing the tail if the copy fails.. + * If 'n' is constant and 1, 2, or 4, we do still zero on a failure, + * but as the zeroing behaviour is only significant when n is not + * constant, that shouldn't be a problem. + */ + if (__builtin_constant_p(n)) { + unsigned long ret; + + switch (n) { + case 1: + __uaccess_begin_nospec(); + __get_user_size(*(u8 *)to, from, 1, ret, 1); + __uaccess_end(); + return ret; + case 2: + __uaccess_begin_nospec(); + __get_user_size(*(u16 *)to, from, 2, ret, 2); + __uaccess_end(); + return ret; + case 4: + __uaccess_begin_nospec(); + __get_user_size(*(u32 *)to, from, 4, ret, 4); + __uaccess_end(); + return ret; + } + } return __copy_from_user_ll_nozero(to, from, n); } @@ -102,17 +164,17 @@ __copy_from_user(void *to, const void __user *from, unsigned long n) switch (n) { case 1: - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_size(*(u8 *)to, from, 1, ret, 1); __uaccess_end(); return ret; case 2: - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_size(*(u16 *)to, from, 2, ret, 2); __uaccess_end(); return ret; case 4: - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_size(*(u32 *)to, from, 4, ret, 4); __uaccess_end(); return ret; @@ -130,17 +192,17 @@ static __always_inline unsigned long __copy_from_user_nocache(void *to, switch (n) { case 1: - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_size(*(u8 *)to, from, 1, ret, 1); __uaccess_end(); return ret; case 2: - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_size(*(u16 *)to, from, 2, ret, 2); __uaccess_end(); return ret; case 4: - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_size(*(u32 *)to, from, 4, ret, 4); __uaccess_end(); return ret; diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index ec9d2bcc8c24..411b6156bfeb 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -58,31 +58,31 @@ int __copy_from_user_nocheck(void *dst, const void __user *src, unsigned size) return copy_user_generic(dst, (__force void *)src, size); switch (size) { case 1: - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_asm(*(u8 *)dst, (u8 __user *)src, ret, "b", "b", "=q", 1); __uaccess_end(); return ret; case 2: - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_asm(*(u16 *)dst, (u16 __user *)src, ret, "w", "w", "=r", 2); __uaccess_end(); return ret; case 4: - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_asm(*(u32 *)dst, (u32 __user *)src, ret, "l", "k", "=r", 4); __uaccess_end(); return ret; case 8: - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_asm(*(u64 *)dst, (u64 __user *)src, ret, "q", "", "=r", 8); __uaccess_end(); return ret; case 10: - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_asm(*(u64 *)dst, (u64 __user *)src, ret, "q", "", "=r", 10); if (likely(!ret)) @@ -92,7 +92,7 @@ int __copy_from_user_nocheck(void *dst, const void __user *src, unsigned size) __uaccess_end(); return ret; case 16: - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_asm(*(u64 *)dst, (u64 __user *)src, ret, "q", "", "=r", 16); if (likely(!ret)) @@ -192,7 +192,7 @@ int __copy_in_user(void __user *dst, const void __user *src, unsigned size) switch (size) { case 1: { u8 tmp; - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_asm(tmp, (u8 __user *)src, ret, "b", "b", "=q", 1); if (likely(!ret)) @@ -203,7 +203,7 @@ int __copy_in_user(void __user *dst, const void __user *src, unsigned size) } case 2: { u16 tmp; - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_asm(tmp, (u16 __user *)src, ret, "w", "w", "=r", 2); if (likely(!ret)) @@ -215,7 +215,7 @@ int __copy_in_user(void __user *dst, const void __user *src, unsigned size) case 4: { u32 tmp; - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_asm(tmp, (u32 __user *)src, ret, "l", "k", "=r", 4); if (likely(!ret)) @@ -226,7 +226,7 @@ int __copy_in_user(void __user *dst, const void __user *src, unsigned size) } case 8: { u64 tmp; - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_asm(tmp, (u64 __user *)src, ret, "q", "", "=r", 8); if (likely(!ret)) diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index 0977e7607046..5937b112ebc2 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h @@ -215,6 +215,9 @@ privcmd_call(unsigned call, __HYPERCALL_DECLS; __HYPERCALL_5ARG(a1, a2, a3, a4, a5); + if (call >= PAGE_SIZE / sizeof(hypercall_page[0])) + return -EINVAL; + stac(); asm volatile(CALL_NOSPEC : __HYPERCALL_5PARAM diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 9f6151884249..e94e6f16172b 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -716,11 +716,9 @@ static void init_amd_bd(struct cpuinfo_x86 *c) static void init_amd_zn(struct cpuinfo_x86 *c) { set_cpu_cap(c, X86_FEATURE_ZEN); - /* - * Fix erratum 1076: CPB feature bit not being set in CPUID. It affects - * all up to and including B1. - */ - if (c->x86_model <= 1 && c->x86_mask <= 1) + + /* Fix erratum 1076: CPB feature bit not being set in CPUID. */ + if (!cpu_has(c, X86_FEATURE_CPB)) set_cpu_cap(c, X86_FEATURE_CPB); } diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index b12c0287d6cf..e8b46f575306 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -693,7 +693,8 @@ static void init_speculation_control(struct cpuinfo_x86 *c) if (cpu_has(c, X86_FEATURE_INTEL_STIBP)) set_cpu_cap(c, X86_FEATURE_STIBP); - if (cpu_has(c, X86_FEATURE_SPEC_CTRL_SSBD)) + if (cpu_has(c, X86_FEATURE_SPEC_CTRL_SSBD) || + cpu_has(c, X86_FEATURE_VIRT_SSBD)) set_cpu_cap(c, X86_FEATURE_SSBD); if (cpu_has(c, X86_FEATURE_AMD_IBRS)) { diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index 15e47c1cd412..6e4e4191abb5 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -121,7 +121,7 @@ static void set_cx86_reorder(void) setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ /* Load/Store Serialize to mem access disable (=reorder it) */ - setCx86_old(CX86_PCR0, getCx86_old(CX86_PCR0) & ~0x80); + setCx86(CX86_PCR0, getCx86(CX86_PCR0) & ~0x80); /* set load/store serialize from 1GB to 4GB */ ccr3 |= 0xe0; setCx86(CX86_CCR3, ccr3); @@ -132,11 +132,11 @@ static void set_cx86_memwb(void) printk(KERN_INFO "Enable Memory-Write-back mode on Cyrix/NSC processor.\n"); /* CCR2 bit 2: unlock NW bit */ - setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) & ~0x04); + setCx86(CX86_CCR2, getCx86(CX86_CCR2) & ~0x04); /* set 'Not Write-through' */ write_cr0(read_cr0() | X86_CR0_NW); /* CCR2 bit 2: lock NW bit and set WT1 */ - setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) | 0x14); + setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14); } /* @@ -150,14 +150,14 @@ static void geode_configure(void) local_irq_save(flags); /* Suspend on halt power saving and enable #SUSP pin */ - setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) | 0x88); + setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x88); ccr3 = getCx86(CX86_CCR3); setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ /* FPU fast, DTE cache, Mem bypass */ - setCx86_old(CX86_CCR4, getCx86_old(CX86_CCR4) | 0x38); + setCx86(CX86_CCR4, getCx86(CX86_CCR4) | 0x38); setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ set_cx86_memwb(); @@ -292,7 +292,7 @@ static void init_cyrix(struct cpuinfo_x86 *c) /* GXm supports extended cpuid levels 'ala' AMD */ if (c->cpuid_level == 2) { /* Enable cxMMX extensions (GX1 Datasheet 54) */ - setCx86_old(CX86_CCR7, getCx86_old(CX86_CCR7) | 1); + setCx86(CX86_CCR7, getCx86(CX86_CCR7) | 1); /* * GXm : 0x30 ... 0x5f GXm datasheet 51 @@ -315,7 +315,7 @@ static void init_cyrix(struct cpuinfo_x86 *c) if (dir1 > 7) { dir0_msn++; /* M II */ /* Enable MMX extensions (App note 108) */ - setCx86_old(CX86_CCR7, getCx86_old(CX86_CCR7)|1); + setCx86(CX86_CCR7, getCx86(CX86_CCR7)|1); } else { /* A 6x86MX - it has the bug. */ set_cpu_bug(c, X86_BUG_COMA); diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 7b8c8c838191..77f7580e22c6 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -670,6 +670,7 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp, } if (mce_severity(m, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) { + m->bank = i; *msg = tmp; ret = 1; } diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c index d76f13d6d8d6..ec894bf5eeb0 100644 --- a/arch/x86/kernel/cpu/mtrr/if.c +++ b/arch/x86/kernel/cpu/mtrr/if.c @@ -173,6 +173,8 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg) struct mtrr_gentry gentry; void __user *arg = (void __user *) __arg; + memset(&gentry, 0, sizeof(gentry)); + switch (cmd) { case MTRRIOC_ADD_ENTRY: case MTRRIOC_SET_ENTRY: diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c index f0f4fcba252e..947579425861 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c @@ -1081,6 +1081,8 @@ static struct pci_driver snbep_uncore_pci_driver = { .id_table = snbep_uncore_pci_ids, }; +#define NODE_ID_MASK 0x7 + /* * build pci bus to socket mapping */ @@ -1102,7 +1104,7 @@ static int snbep_pci2phy_map_init(int devid) err = pci_read_config_dword(ubox_dev, 0x40, &config); if (err) break; - nodeid = config; + nodeid = config & NODE_ID_MASK; /* get the Node ID mapping */ err = pci_read_config_dword(ubox_dev, 0x54, &config); if (err) diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 3fdc1e53aaac..9cce5504a5c7 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -825,6 +825,8 @@ int __init hpet_enable(void) return 0; hpet_set_mapping(); + if (!hpet_virt_address) + return 0; /* * Read the period and check for a sane value: diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index 2bcfb5f2bc44..433f17d154e2 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -351,6 +351,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) #endif default: WARN_ON_ONCE(1); + return -EINVAL; } /* diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c index 0f8a6bbaaa44..0bf17576dd2a 100644 --- a/arch/x86/kernel/kexec-bzimage64.c +++ b/arch/x86/kernel/kexec-bzimage64.c @@ -168,6 +168,9 @@ setup_efi_state(struct boot_params *params, unsigned long params_load_addr, struct efi_info *current_ei = &boot_params.efi_info; struct efi_info *ei = ¶ms->efi_info; + if (!efi_enabled(EFI_RUNTIME_SERVICES)) + return 0; + if (!current_ei->efi_memmap_size) return 0; diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 581671172e10..ddf968b21d6c 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -542,6 +542,7 @@ void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) unsigned long *sara = stack_addr(regs); ri->ret_addr = (kprobe_opcode_t *) *sara; + ri->fp = sara; /* Replace the return addr with trampoline addr */ *sara = (unsigned long) &kretprobe_trampoline; @@ -743,15 +744,21 @@ __visible __used void *trampoline_handler(struct pt_regs *regs) unsigned long flags, orig_ret_address = 0; unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline; kprobe_opcode_t *correct_ret_addr = NULL; + void *frame_pointer; + bool skipped = false; INIT_HLIST_HEAD(&empty_rp); kretprobe_hash_lock(current, &head, &flags); /* fixup registers */ #ifdef CONFIG_X86_64 regs->cs = __KERNEL_CS; + /* On x86-64, we use pt_regs->sp for return address holder. */ + frame_pointer = ®s->sp; #else regs->cs = __KERNEL_CS | get_kernel_rpl(); regs->gs = 0; + /* On x86-32, we use pt_regs->flags for return address holder. */ + frame_pointer = ®s->flags; #endif regs->ip = trampoline_address; regs->orig_ax = ~0UL; @@ -773,8 +780,25 @@ __visible __used void *trampoline_handler(struct pt_regs *regs) if (ri->task != current) /* another task is sharing our hash bucket */ continue; + /* + * Return probes must be pushed on this hash list correct + * order (same as return order) so that it can be poped + * correctly. However, if we find it is pushed it incorrect + * order, this means we find a function which should not be + * probed, because the wrong order entry is pushed on the + * path of processing other kretprobe itself. + */ + if (ri->fp != frame_pointer) { + if (!skipped) + pr_warn("kretprobe is stacked incorrectly. Trying to fixup.\n"); + skipped = true; + continue; + } orig_ret_address = (unsigned long)ri->ret_addr; + if (skipped) + pr_warn("%ps must be blacklisted because of incorrect kretprobe order\n", + ri->rp->kp.addr); if (orig_ret_address != trampoline_address) /* @@ -792,6 +816,8 @@ __visible __used void *trampoline_handler(struct pt_regs *regs) if (ri->task != current) /* another task is sharing our hash bucket */ continue; + if (ri->fp != frame_pointer) + continue; orig_ret_address = (unsigned long)ri->ret_addr; if (ri->rp && ri->rp->handler) { diff --git a/arch/x86/kernel/livepatch.c b/arch/x86/kernel/livepatch.c index bcc06e82a593..579f8f813ce0 100644 --- a/arch/x86/kernel/livepatch.c +++ b/arch/x86/kernel/livepatch.c @@ -41,8 +41,8 @@ int klp_write_module_reloc(struct module *mod, unsigned long type, int ret, numpages, size = 4; bool readonly; unsigned long val; - unsigned long core = (unsigned long)mod->core_layout.base; - unsigned long core_size = mod->core_layout.size; + unsigned long core = (unsigned long)mod->module_core; + unsigned long core_size = mod->core_size; switch (type) { case R_X86_64_NONE: @@ -58,6 +58,7 @@ int klp_write_module_reloc(struct module *mod, unsigned long type, val = (s32)value; break; case R_X86_64_PC32: + case R_X86_64_PLT32: val = (u32)(value - loc); break; default: @@ -72,7 +73,7 @@ int klp_write_module_reloc(struct module *mod, unsigned long type, readonly = false; #ifdef CONFIG_DEBUG_SET_MODULE_RONX - if (loc < core + mod->core_layout.ro_size) + if (loc < core + mod->core_ro_size) readonly = true; #endif diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 2ddb850bceab..13d6b8ac0b0b 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -539,48 +539,3 @@ overflow: return -ENOEXEC; } #endif /* CONFIG_KEXEC_FILE */ - -static int -kexec_mark_range(unsigned long start, unsigned long end, bool protect) -{ - struct page *page; - unsigned int nr_pages; - - /* - * For physical range: [start, end]. We must skip the unassigned - * crashk resource with zero-valued "end" member. - */ - if (!end || start > end) - return 0; - - page = pfn_to_page(start >> PAGE_SHIFT); - nr_pages = (end >> PAGE_SHIFT) - (start >> PAGE_SHIFT) + 1; - if (protect) - return set_pages_ro(page, nr_pages); - else - return set_pages_rw(page, nr_pages); -} - -static void kexec_mark_crashkres(bool protect) -{ - unsigned long control; - - kexec_mark_range(crashk_low_res.start, crashk_low_res.end, protect); - - /* Don't touch the control code page used in crash_kexec().*/ - control = PFN_PHYS(page_to_pfn(kexec_crash_image->control_code_page)); - /* Control code page is located in the 2nd page. */ - kexec_mark_range(crashk_res.start, control + PAGE_SIZE - 1, protect); - control += KEXEC_CONTROL_PAGE_SIZE; - kexec_mark_range(control, crashk_res.end, protect); -} - -void arch_kexec_protect_crashkres(void) -{ - kexec_mark_crashkres(true); -} - -void arch_kexec_unprotect_crashkres(void) -{ - kexec_mark_crashkres(false); -} diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 3611136b1455..6c6bc7f4ba8a 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -363,7 +363,7 @@ SECTIONS * Per-cpu symbols which need to be offset from __per_cpu_load * for the boot processor. */ -#define INIT_PER_CPU(x) init_per_cpu__##x = x + __per_cpu_load +#define INIT_PER_CPU(x) init_per_cpu__##x = ABSOLUTE(x) + __per_cpu_load INIT_PER_CPU(gdt_page); INIT_PER_CPU(irq_stack_union); diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 338d13d4fd2f..b857bb9f6f23 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -341,6 +341,10 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, F(3DNOWPREFETCH) | F(OSVW) | 0 /* IBS */ | F(XOP) | 0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM); + /* cpuid 0x80000008.ebx */ + const u32 kvm_cpuid_8000_0008_ebx_x86_features = + F(AMD_IBPB) | F(AMD_IBRS) | F(VIRT_SSBD); + /* cpuid 0xC0000001.edx */ const u32 kvm_supported_word5_x86_features = F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) | @@ -358,6 +362,10 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, const u32 kvm_supported_word10_x86_features = F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | f_xsaves; + /* cpuid 7.0.edx*/ + const u32 kvm_cpuid_7_0_edx_x86_features = + F(SPEC_CTRL) | F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES); + /* all calls to cpuid_count() should be made on the same cpu */ get_cpu(); @@ -435,11 +443,14 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, cpuid_mask(&entry->ebx, 9); // TSC_ADJUST is emulated entry->ebx |= F(TSC_ADJUST); - } else + entry->edx &= kvm_cpuid_7_0_edx_x86_features; + cpuid_mask(&entry->edx, CPUID_7_EDX); + } else { entry->ebx = 0; + entry->edx = 0; + } entry->eax = 0; entry->ecx = 0; - entry->edx = 0; break; } case 9: @@ -583,7 +594,21 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, if (!g_phys_as) g_phys_as = phys_as; entry->eax = g_phys_as | (virt_as << 8); - entry->ebx = entry->edx = 0; + entry->edx = 0; + /* + * IBRS, IBPB and VIRT_SSBD aren't necessarily present in + * hardware cpuid + */ + if (boot_cpu_has(X86_FEATURE_AMD_IBPB)) + entry->ebx |= F(AMD_IBPB); + if (boot_cpu_has(X86_FEATURE_AMD_IBRS)) + entry->ebx |= F(AMD_IBRS); + if (boot_cpu_has(X86_FEATURE_VIRT_SSBD)) + entry->ebx |= F(VIRT_SSBD); + entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features; + cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX); + if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD)) + entry->ebx |= F(VIRT_SSBD); break; } case 0x80000019: diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index d1534feefcfe..72f159f4d456 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -159,6 +159,46 @@ static inline bool guest_cpuid_has_rdtscp(struct kvm_vcpu *vcpu) return best && (best->edx & bit(X86_FEATURE_RDTSCP)); } +static inline bool guest_cpuid_has_ibpb(struct kvm_vcpu *vcpu) +{ + struct kvm_cpuid_entry2 *best; + + best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0); + if (best && (best->ebx & bit(X86_FEATURE_AMD_IBPB))) + return true; + best = kvm_find_cpuid_entry(vcpu, 7, 0); + return best && (best->edx & bit(X86_FEATURE_SPEC_CTRL)); +} + +static inline bool guest_cpuid_has_spec_ctrl(struct kvm_vcpu *vcpu) +{ + struct kvm_cpuid_entry2 *best; + + best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0); + if (best && (best->ebx & bit(X86_FEATURE_AMD_IBRS))) + return true; + best = kvm_find_cpuid_entry(vcpu, 7, 0); + return best && (best->edx & (bit(X86_FEATURE_SPEC_CTRL) | bit(X86_FEATURE_SPEC_CTRL_SSBD))); +} + +static inline bool guest_cpuid_has_arch_capabilities(struct kvm_vcpu *vcpu) +{ + struct kvm_cpuid_entry2 *best; + + best = kvm_find_cpuid_entry(vcpu, 7, 0); + return best && (best->edx & bit(X86_FEATURE_ARCH_CAPABILITIES)); +} + +static inline bool guest_cpuid_has_virt_ssbd(struct kvm_vcpu *vcpu) +{ + struct kvm_cpuid_entry2 *best; + + best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0); + return best && (best->ebx & bit(X86_FEATURE_VIRT_SSBD)); +} + + + /* * NRIPS is provided through cpuidfn 0x8000000a.edx bit 3 */ diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index f1507626ed36..5dd56e3517f3 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2567,15 +2567,13 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt) * CR0/CR3/CR4/EFER. It's all a bit more complicated if the vCPU * supports long mode. */ - cr4 = ctxt->ops->get_cr(ctxt, 4); if (emulator_has_longmode(ctxt)) { struct desc_struct cs_desc; /* Zero CR4.PCIDE before CR0.PG. */ - if (cr4 & X86_CR4_PCIDE) { + cr4 = ctxt->ops->get_cr(ctxt, 4); + if (cr4 & X86_CR4_PCIDE) ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE); - cr4 &= ~X86_CR4_PCIDE; - } /* A 32-bit code segment is required to clear EFER.LMA. */ memset(&cs_desc, 0, sizeof(cs_desc)); @@ -2589,13 +2587,16 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt) if (cr0 & X86_CR0_PE) ctxt->ops->set_cr(ctxt, 0, cr0 & ~(X86_CR0_PG | X86_CR0_PE)); - /* Now clear CR4.PAE (which must be done before clearing EFER.LME). */ - if (cr4 & X86_CR4_PAE) - ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PAE); + if (emulator_has_longmode(ctxt)) { + /* Clear CR4.PAE before clearing EFER.LME. */ + cr4 = ctxt->ops->get_cr(ctxt, 4); + if (cr4 & X86_CR4_PAE) + ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PAE); - /* And finally go back to 32-bit mode. */ - efer = 0; - ctxt->ops->set_msr(ctxt, MSR_EFER, efer); + /* And finally go back to 32-bit mode. */ + efer = 0; + ctxt->ops->set_msr(ctxt, MSR_EFER, efer); + } smbase = ctxt->ops->get_smbase(ctxt); if (emulator_has_longmode(ctxt)) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index a1afd80a68aa..3c70f6c76d3a 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -56,7 +56,7 @@ #define APIC_BUS_CYCLE_NS 1 /* #define apic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) */ -#define apic_debug(fmt, arg...) +#define apic_debug(fmt, arg...) do {} while (0) #define APIC_LVT_NUM 6 /* 14 is the version for Xeon and Pentium 8.4.8*/ diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index df7827a981dd..acbde1249b6f 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -147,6 +148,14 @@ struct vcpu_svm { u64 gs_base; } host; + u64 spec_ctrl; + /* + * Contains guest-controlled bits of VIRT_SPEC_CTRL, which will be + * translated into the appropriate L2_CFG bits on the host to + * perform speculative control. + */ + u64 virt_spec_ctrl; + u32 *msrpm; ulong nmi_iret_rip; @@ -182,6 +191,8 @@ static const struct svm_direct_access_msrs { { .index = MSR_CSTAR, .always = true }, { .index = MSR_SYSCALL_MASK, .always = true }, #endif + { .index = MSR_IA32_SPEC_CTRL, .always = false }, + { .index = MSR_IA32_PRED_CMD, .always = false }, { .index = MSR_IA32_LASTBRANCHFROMIP, .always = false }, { .index = MSR_IA32_LASTBRANCHTOIP, .always = false }, { .index = MSR_IA32_LASTINTFROMIP, .always = false }, @@ -411,6 +422,7 @@ struct svm_cpu_data { struct kvm_ldttss_desc *tss_desc; struct page *save_area; + struct vmcb *current_vmcb; }; static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data); @@ -762,6 +774,25 @@ static bool valid_msr_intercept(u32 index) return false; } +static bool msr_write_intercepted(struct kvm_vcpu *vcpu, unsigned msr) +{ + u8 bit_write; + unsigned long tmp; + u32 offset; + u32 *msrpm; + + msrpm = is_guest_mode(vcpu) ? to_svm(vcpu)->nested.msrpm: + to_svm(vcpu)->msrpm; + + offset = svm_msrpm_offset(msr); + bit_write = 2 * (msr & 0x0f) + 1; + tmp = msrpm[offset]; + + BUG_ON(offset == MSR_INVALID); + + return !!test_bit(bit_write, &tmp); +} + static void set_msr_interception(u32 *msrpm, unsigned msr, int read, int write) { @@ -1120,6 +1151,9 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) u32 dummy; u32 eax = 1; + svm->spec_ctrl = 0; + svm->virt_spec_ctrl = 0; + if (!init_event) { svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE; @@ -1210,11 +1244,17 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu) __free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER); kvm_vcpu_uninit(vcpu); kmem_cache_free(kvm_vcpu_cache, svm); + /* + * The vmcb page can be recycled, causing a false negative in + * svm_vcpu_load(). So do a full IBPB now. + */ + indirect_branch_prediction_barrier(); } static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { struct vcpu_svm *svm = to_svm(vcpu); + struct svm_cpu_data *sd = per_cpu(svm_data, cpu); int i; if (unlikely(cpu != vcpu->cpu)) { @@ -1239,6 +1279,10 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) wrmsrl(MSR_AMD64_TSC_RATIO, tsc_ratio); } } + if (sd->current_vmcb != svm->vmcb) { + sd->current_vmcb = svm->vmcb; + indirect_branch_prediction_barrier(); + } } static void svm_vcpu_put(struct kvm_vcpu *vcpu) @@ -2344,6 +2388,14 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) kvm_mmu_reset_context(&svm->vcpu); kvm_mmu_load(&svm->vcpu); + /* + * Drop what we picked up for L2 via svm_complete_interrupts() so it + * doesn't end up in L1. + */ + svm->vcpu.arch.nmi_injected = false; + kvm_clear_exception_queue(&svm->vcpu); + kvm_clear_interrupt_queue(&svm->vcpu); + return 0; } @@ -3051,6 +3103,20 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_VM_CR: msr_info->data = svm->nested.vm_cr_msr; break; + case MSR_IA32_SPEC_CTRL: + if (!msr_info->host_initiated && + !guest_cpuid_has_spec_ctrl(vcpu)) + return 1; + + msr_info->data = svm->spec_ctrl; + break; + case MSR_AMD64_VIRT_SPEC_CTRL: + if (!msr_info->host_initiated && + !guest_cpuid_has_virt_ssbd(vcpu)) + return 1; + + msr_info->data = svm->virt_spec_ctrl; + break; case MSR_IA32_UCODE_REV: msr_info->data = 0x01000065; break; @@ -3125,6 +3191,59 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) case MSR_IA32_TSC: kvm_write_tsc(vcpu, msr); break; + case MSR_IA32_SPEC_CTRL: + if (!msr->host_initiated && + !guest_cpuid_has_spec_ctrl(vcpu)) + return 1; + + /* The STIBP bit doesn't fault even if it's not advertised */ + if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP)) + return 1; + + svm->spec_ctrl = data; + + if (!data) + break; + + /* + * For non-nested: + * When it's written (to non-zero) for the first time, pass + * it through. + * + * For nested: + * The handling of the MSR bitmap for L2 guests is done in + * nested_svm_vmrun_msrpm. + * We update the L1 MSR bit as well since it will end up + * touching the MSR anyway now. + */ + set_msr_interception(svm->msrpm, MSR_IA32_SPEC_CTRL, 1, 1); + break; + case MSR_IA32_PRED_CMD: + if (!msr->host_initiated && + !guest_cpuid_has_ibpb(vcpu)) + return 1; + + if (data & ~PRED_CMD_IBPB) + return 1; + + if (!data) + break; + + wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB); + if (is_guest_mode(vcpu)) + break; + set_msr_interception(svm->msrpm, MSR_IA32_PRED_CMD, 0, 1); + break; + case MSR_AMD64_VIRT_SPEC_CTRL: + if (!msr->host_initiated && + !guest_cpuid_has_virt_ssbd(vcpu)) + return 1; + + if (data & ~SPEC_CTRL_SSBD) + return 1; + + svm->virt_spec_ctrl = data; + break; case MSR_STAR: svm->vmcb->save.star = data; break; @@ -3811,6 +3930,14 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) local_irq_enable(); + /* + * If this vCPU has touched SPEC_CTRL, restore the guest's value if + * it's non-zero. Since vmentry is serialising on affected CPUs, there + * is no need to worry about the conditional branch over the wrmsr + * being speculatively taken. + */ + x86_spec_ctrl_set_guest(svm->spec_ctrl, svm->virt_spec_ctrl); + asm volatile ( "push %%" _ASM_BP "; \n\t" "mov %c[rbx](%[svm]), %%" _ASM_BX " \n\t" @@ -3915,6 +4042,26 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) #endif #endif + /* + * We do not use IBRS in the kernel. If this vCPU has used the + * SPEC_CTRL MSR it may have left it on; save the value and + * turn it off. This is much more efficient than blindly adding + * it to the atomic save/restore list. Especially as the former + * (Saving guest MSRs on vmexit) doesn't even exist in KVM. + * + * For non-nested case: + * If the L01 MSR bitmap does not intercept the MSR, then we need to + * save it. + * + * For nested case: + * If the L02 MSR bitmap does not intercept the MSR, then we need to + * save it. + */ + if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)) + svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); + + x86_spec_ctrl_restore_host(svm->spec_ctrl, svm->virt_spec_ctrl); + reload_tss(vcpu); local_irq_disable(); @@ -4015,8 +4162,15 @@ static bool svm_cpu_has_accelerated_tpr(void) return false; } -static bool svm_has_high_real_mode_segbase(void) +static bool svm_has_emulated_msr(int index) { + switch (index) { + case MSR_IA32_MCG_EXT_CTL: + return false; + default: + break; + } + return true; } @@ -4299,7 +4453,7 @@ static struct kvm_x86_ops svm_x86_ops = { .hardware_enable = svm_hardware_enable, .hardware_disable = svm_hardware_disable, .cpu_has_accelerated_tpr = svm_cpu_has_accelerated_tpr, - .cpu_has_high_real_mode_segbase = svm_has_high_real_mode_segbase, + .has_emulated_msr = svm_has_emulated_msr, .vcpu_create = svm_create_vcpu, .vcpu_free = svm_free_vcpu, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index c5a4b1978cbf..098be61a6b4c 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -48,6 +48,7 @@ #include #include #include +#include #include #include "trace.h" @@ -109,6 +110,14 @@ static u64 __read_mostly host_xss; static bool __read_mostly enable_pml = 1; module_param_named(pml, enable_pml, bool, S_IRUGO); +#define MSR_TYPE_R 1 +#define MSR_TYPE_W 2 +#define MSR_TYPE_RW 3 + +#define MSR_BITMAP_MODE_X2APIC 1 +#define MSR_BITMAP_MODE_X2APIC_APICV 2 +#define MSR_BITMAP_MODE_LM 4 + #define KVM_VMX_TSC_MULTIPLIER_MAX 0xffffffffffffffffULL #define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD) @@ -172,7 +181,6 @@ module_param(ple_window_max, int, S_IRUGO); extern const ulong vmx_return; #define NR_AUTOLOAD_MSRS 8 -#define VMCS02_POOL_SIZE 1 struct vmcs { u32 revision_id; @@ -189,6 +197,7 @@ struct loaded_vmcs { struct vmcs *vmcs; int cpu; int launched; + unsigned long *msr_bitmap; struct list_head loaded_vmcss_on_cpu_link; }; @@ -205,7 +214,7 @@ struct shared_msr_entry { * stored in guest memory specified by VMPTRLD, but is opaque to the guest, * which must access it using VMREAD/VMWRITE/VMCLEAR instructions. * More than one of these structures may exist, if L1 runs multiple L2 guests. - * nested_vmx_run() will use the data here to build a vmcs02: a VMCS for the + * nested_vmx_run() will use the data here to build the vmcs02: a VMCS for the * underlying hardware which will be used to run L2. * This structure is packed to ensure that its layout is identical across * machines (necessary for live migration). @@ -384,13 +393,6 @@ struct __packed vmcs12 { */ #define VMCS12_SIZE 0x1000 -/* Used to remember the last vmcs02 used for some recently used vmcs12s */ -struct vmcs02_list { - struct list_head list; - gpa_t vmptr; - struct loaded_vmcs vmcs02; -}; - /* * The nested_vmx structure is part of vcpu_vmx, and holds information we need * for correct emulation of VMX (i.e., nested VMX) on this vcpu. @@ -412,16 +414,16 @@ struct nested_vmx { */ bool sync_shadow_vmcs; - /* vmcs02_list cache of VMCSs recently used to run L2 guests */ - struct list_head vmcs02_pool; - int vmcs02_num; u64 vmcs01_tsc_offset; bool change_vmcs01_virtual_x2apic_mode; /* L2 must run next, and mustn't decide to exit to L1. */ bool nested_run_pending; + + struct loaded_vmcs vmcs02; + /* - * Guest pages referred to in vmcs02 with host-physical pointers, so - * we must keep them pinned while L2 runs. + * Guest pages referred to in the vmcs02 with host-physical + * pointers, so we must keep them pinned while L2 runs. */ struct page *apic_access_page; struct page *virtual_apic_page; @@ -531,6 +533,7 @@ struct vcpu_vmx { unsigned long host_rsp; u8 fail; bool nmi_known_unmasked; + u8 msr_bitmap_mode; u32 exit_intr_info; u32 idt_vectoring_info; ulong rflags; @@ -542,6 +545,10 @@ struct vcpu_vmx { u64 msr_host_kernel_gs_base; u64 msr_guest_kernel_gs_base; #endif + + u64 arch_capabilities; + u64 spec_ctrl; + u32 vm_entry_controls_shadow; u32 vm_exit_controls_shadow; /* @@ -889,6 +896,9 @@ static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu); static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx); static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx); static int alloc_identity_pagetable(struct kvm *kvm); +static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu); +static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, + u32 msr, int type); static DEFINE_PER_CPU(struct vmcs *, vmxarea); static DEFINE_PER_CPU(struct vmcs *, current_vmcs); @@ -908,11 +918,6 @@ static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock); static unsigned long *vmx_io_bitmap_a; static unsigned long *vmx_io_bitmap_b; -static unsigned long *vmx_msr_bitmap_legacy; -static unsigned long *vmx_msr_bitmap_longmode; -static unsigned long *vmx_msr_bitmap_legacy_x2apic; -static unsigned long *vmx_msr_bitmap_longmode_x2apic; -static unsigned long *vmx_msr_bitmap_nested; static unsigned long *vmx_vmread_bitmap; static unsigned long *vmx_vmwrite_bitmap; @@ -1689,6 +1694,52 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) vmcs_write32(EXCEPTION_BITMAP, eb); } +/* + * Check if MSR is intercepted for currently loaded MSR bitmap. + */ +static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr) +{ + unsigned long *msr_bitmap; + int f = sizeof(unsigned long); + + if (!cpu_has_vmx_msr_bitmap()) + return true; + + msr_bitmap = to_vmx(vcpu)->loaded_vmcs->msr_bitmap; + + if (msr <= 0x1fff) { + return !!test_bit(msr, msr_bitmap + 0x800 / f); + } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { + msr &= 0x1fff; + return !!test_bit(msr, msr_bitmap + 0xc00 / f); + } + + return true; +} + +/* + * Check if MSR is intercepted for L01 MSR bitmap. + */ +static bool msr_write_intercepted_l01(struct kvm_vcpu *vcpu, u32 msr) +{ + unsigned long *msr_bitmap; + int f = sizeof(unsigned long); + + if (!cpu_has_vmx_msr_bitmap()) + return true; + + msr_bitmap = to_vmx(vcpu)->vmcs01.msr_bitmap; + + if (msr <= 0x1fff) { + return !!test_bit(msr, msr_bitmap + 0x800 / f); + } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { + msr &= 0x1fff; + return !!test_bit(msr, msr_bitmap + 0xc00 / f); + } + + return true; +} + static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx, unsigned long entry, unsigned long exit) { @@ -2074,6 +2125,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) { per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs; vmcs_load(vmx->loaded_vmcs->vmcs); + indirect_branch_prediction_barrier(); } if (vmx->loaded_vmcs->cpu != cpu) { @@ -2353,27 +2405,6 @@ static void move_msr_up(struct vcpu_vmx *vmx, int from, int to) vmx->guest_msrs[from] = tmp; } -static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu) -{ - unsigned long *msr_bitmap; - - if (is_guest_mode(vcpu)) - msr_bitmap = vmx_msr_bitmap_nested; - else if (vcpu->arch.apic_base & X2APIC_ENABLE) { - if (is_long_mode(vcpu)) - msr_bitmap = vmx_msr_bitmap_longmode_x2apic; - else - msr_bitmap = vmx_msr_bitmap_legacy_x2apic; - } else { - if (is_long_mode(vcpu)) - msr_bitmap = vmx_msr_bitmap_longmode; - else - msr_bitmap = vmx_msr_bitmap_legacy; - } - - vmcs_write64(MSR_BITMAP, __pa(msr_bitmap)); -} - /* * Set up the vmcs to automatically save and restore system * msrs. Don't touch the 64-bit msrs if the guest is in legacy @@ -2414,7 +2445,7 @@ static void setup_msrs(struct vcpu_vmx *vmx) vmx->save_nmsrs = save_nmsrs; if (cpu_has_vmx_msr_bitmap()) - vmx_set_msr_bitmap(&vmx->vcpu); + vmx_update_msr_bitmap(&vmx->vcpu); } /* @@ -2828,6 +2859,19 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_TSC: msr_info->data = guest_read_tsc(vcpu); break; + case MSR_IA32_SPEC_CTRL: + if (!msr_info->host_initiated && + !guest_cpuid_has_spec_ctrl(vcpu)) + return 1; + + msr_info->data = to_vmx(vcpu)->spec_ctrl; + break; + case MSR_IA32_ARCH_CAPABILITIES: + if (!msr_info->host_initiated && + !guest_cpuid_has_arch_capabilities(vcpu)) + return 1; + msr_info->data = to_vmx(vcpu)->arch_capabilities; + break; case MSR_IA32_SYSENTER_CS: msr_info->data = vmcs_read32(GUEST_SYSENTER_CS); break; @@ -2927,6 +2971,68 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_TSC: kvm_write_tsc(vcpu, msr_info); break; + case MSR_IA32_SPEC_CTRL: + if (!msr_info->host_initiated && + !guest_cpuid_has_spec_ctrl(vcpu)) + return 1; + + /* The STIBP bit doesn't fault even if it's not advertised */ + if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD)) + return 1; + + vmx->spec_ctrl = data; + + if (!data) + break; + + /* + * For non-nested: + * When it's written (to non-zero) for the first time, pass + * it through. + * + * For nested: + * The handling of the MSR bitmap for L2 guests is done in + * nested_vmx_merge_msr_bitmap. We should not touch the + * vmcs02.msr_bitmap here since it gets completely overwritten + * in the merging. We update the vmcs01 here for L1 as well + * since it will end up touching the MSR anyway now. + */ + vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, + MSR_IA32_SPEC_CTRL, + MSR_TYPE_RW); + break; + case MSR_IA32_PRED_CMD: + if (!msr_info->host_initiated && + !guest_cpuid_has_ibpb(vcpu)) + return 1; + + if (data & ~PRED_CMD_IBPB) + return 1; + + if (!data) + break; + + wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB); + + /* + * For non-nested: + * When it's written (to non-zero) for the first time, pass + * it through. + * + * For nested: + * The handling of the MSR bitmap for L2 guests is done in + * nested_vmx_merge_msr_bitmap. We should not touch the + * vmcs02.msr_bitmap here since it gets completely overwritten + * in the merging. + */ + vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, MSR_IA32_PRED_CMD, + MSR_TYPE_W); + break; + case MSR_IA32_ARCH_CAPABILITIES: + if (!msr_info->host_initiated) + return 1; + vmx->arch_capabilities = data; + break; case MSR_IA32_CR_PAT: if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data)) @@ -3352,11 +3458,6 @@ static struct vmcs *alloc_vmcs_cpu(int cpu) return vmcs; } -static struct vmcs *alloc_vmcs(void) -{ - return alloc_vmcs_cpu(raw_smp_processor_id()); -} - static void free_vmcs(struct vmcs *vmcs) { free_pages((unsigned long)vmcs, vmcs_config.order); @@ -3372,6 +3473,34 @@ static void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) loaded_vmcs_clear(loaded_vmcs); free_vmcs(loaded_vmcs->vmcs); loaded_vmcs->vmcs = NULL; + if (loaded_vmcs->msr_bitmap) + free_page((unsigned long)loaded_vmcs->msr_bitmap); +} + +static struct vmcs *alloc_vmcs(void) +{ + return alloc_vmcs_cpu(raw_smp_processor_id()); +} + +static int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) +{ + loaded_vmcs->vmcs = alloc_vmcs(); + if (!loaded_vmcs->vmcs) + return -ENOMEM; + + loaded_vmcs_init(loaded_vmcs); + + if (cpu_has_vmx_msr_bitmap()) { + loaded_vmcs->msr_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); + if (!loaded_vmcs->msr_bitmap) + goto out_vmcs; + memset(loaded_vmcs->msr_bitmap, 0xff, PAGE_SIZE); + } + return 0; + +out_vmcs: + free_loaded_vmcs(loaded_vmcs); + return -ENOMEM; } static void free_kvm_area(void) @@ -4370,10 +4499,8 @@ static void free_vpid(int vpid) spin_unlock(&vmx_vpid_lock); } -#define MSR_TYPE_R 1 -#define MSR_TYPE_W 2 -static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, - u32 msr, int type) +static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, + u32 msr, int type) { int f = sizeof(unsigned long); @@ -4407,8 +4534,8 @@ static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, } } -static void __vmx_enable_intercept_for_msr(unsigned long *msr_bitmap, - u32 msr, int type) +static void __always_inline vmx_enable_intercept_for_msr(unsigned long *msr_bitmap, + u32 msr, int type) { int f = sizeof(unsigned long); @@ -4488,37 +4615,78 @@ static void nested_vmx_disable_intercept_for_msr(unsigned long *msr_bitmap_l1, } } -static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only) +static void __always_inline vmx_set_intercept_for_msr(unsigned long *msr_bitmap, + u32 msr, int type, bool value) { - if (!longmode_only) - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, - msr, MSR_TYPE_R | MSR_TYPE_W); - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, - msr, MSR_TYPE_R | MSR_TYPE_W); + if (value) + vmx_enable_intercept_for_msr(msr_bitmap, msr, type); + else + vmx_disable_intercept_for_msr(msr_bitmap, msr, type); } -static void vmx_enable_intercept_msr_read_x2apic(u32 msr) +static u8 vmx_msr_bitmap_mode(struct kvm_vcpu *vcpu) { - __vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic, - msr, MSR_TYPE_R); - __vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic, - msr, MSR_TYPE_R); + u8 mode = 0; + + if (cpu_has_secondary_exec_ctrls() && + (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) & + SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) { + mode |= MSR_BITMAP_MODE_X2APIC; + if (enable_apicv) + mode |= MSR_BITMAP_MODE_X2APIC_APICV; + } + + if (is_long_mode(vcpu)) + mode |= MSR_BITMAP_MODE_LM; + + return mode; } -static void vmx_disable_intercept_msr_read_x2apic(u32 msr) +#define X2APIC_MSR(r) (APIC_BASE_MSR + ((r) >> 4)) + +static void vmx_update_msr_bitmap_x2apic(unsigned long *msr_bitmap, + u8 mode) { - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic, - msr, MSR_TYPE_R); - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic, - msr, MSR_TYPE_R); + int msr; + + for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) { + unsigned word = msr / BITS_PER_LONG; + msr_bitmap[word] = (mode & MSR_BITMAP_MODE_X2APIC_APICV) ? 0 : ~0; + msr_bitmap[word + (0x800 / sizeof(long))] = ~0; + } + + if (mode & MSR_BITMAP_MODE_X2APIC) { + /* + * TPR reads and writes can be virtualized even if virtual interrupt + * delivery is not in use. + */ + vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TASKPRI), MSR_TYPE_RW); + if (mode & MSR_BITMAP_MODE_X2APIC_APICV) { + vmx_enable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_ID), MSR_TYPE_R); + vmx_enable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TMCCT), MSR_TYPE_R); + vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_EOI), MSR_TYPE_W); + vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_SELF_IPI), MSR_TYPE_W); + } + } } -static void vmx_disable_intercept_msr_write_x2apic(u32 msr) +static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu) { - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic, - msr, MSR_TYPE_W); - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic, - msr, MSR_TYPE_W); + struct vcpu_vmx *vmx = to_vmx(vcpu); + unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap; + u8 mode = vmx_msr_bitmap_mode(vcpu); + u8 changed = mode ^ vmx->msr_bitmap_mode; + + if (!changed) + return; + + vmx_set_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW, + !(mode & MSR_BITMAP_MODE_LM)); + + if (changed & (MSR_BITMAP_MODE_X2APIC | MSR_BITMAP_MODE_X2APIC_APICV)) + vmx_update_msr_bitmap_x2apic(msr_bitmap, mode); + + vmx->msr_bitmap_mode = mode; } static int vmx_cpu_uses_apicv(struct kvm_vcpu *vcpu) @@ -4526,6 +4694,28 @@ static int vmx_cpu_uses_apicv(struct kvm_vcpu *vcpu) return enable_apicv && lapic_in_kernel(vcpu); } +static void nested_mark_vmcs12_pages_dirty(struct kvm_vcpu *vcpu) +{ + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + gfn_t gfn; + + /* + * Don't need to mark the APIC access page dirty; it is never + * written to by the CPU during APIC virtualization. + */ + + if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) { + gfn = vmcs12->virtual_apic_page_addr >> PAGE_SHIFT; + kvm_vcpu_mark_page_dirty(vcpu, gfn); + } + + if (nested_cpu_has_posted_intr(vmcs12)) { + gfn = vmcs12->posted_intr_desc_addr >> PAGE_SHIFT; + kvm_vcpu_mark_page_dirty(vcpu, gfn); + } +} + + static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -4533,18 +4723,15 @@ static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) void *vapic_page; u16 status; - if (vmx->nested.pi_desc && - vmx->nested.pi_pending) { - vmx->nested.pi_pending = false; - if (!pi_test_and_clear_on(vmx->nested.pi_desc)) - return; + if (!vmx->nested.pi_desc || !vmx->nested.pi_pending) + return; - max_irr = find_last_bit( - (unsigned long *)vmx->nested.pi_desc->pir, 256); - - if (max_irr == 256) - return; + vmx->nested.pi_pending = false; + if (!pi_test_and_clear_on(vmx->nested.pi_desc)) + return; + max_irr = find_last_bit((unsigned long *)vmx->nested.pi_desc->pir, 256); + if (max_irr != 256) { vapic_page = kmap(vmx->nested.virtual_apic_page); __kvm_apic_update_irr(vmx->nested.pi_desc->pir, vapic_page); kunmap(vmx->nested.virtual_apic_page); @@ -4556,6 +4743,8 @@ static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) vmcs_write16(GUEST_INTR_STATUS, status); } } + + nested_mark_vmcs12_pages_dirty(vcpu); } static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu) @@ -4818,7 +5007,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap)); } if (cpu_has_vmx_msr_bitmap()) - vmcs_write64(MSR_BITMAP, __pa(vmx_msr_bitmap_legacy)); + vmcs_write64(MSR_BITMAP, __pa(vmx->vmcs01.msr_bitmap)); vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */ @@ -4890,6 +5079,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) ++vmx->nmsrs; } + if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) + rdmsrl(MSR_IA32_ARCH_CAPABILITIES, vmx->arch_capabilities); vm_exit_controls_init(vmx, vmcs_config.vmexit_ctrl); @@ -4918,6 +5109,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) u64 cr0; vmx->rmode.vm86_active = 0; + vmx->spec_ctrl = 0; vmx->soft_vnmi_blocked = 0; @@ -5382,6 +5574,7 @@ static int handle_external_interrupt(struct kvm_vcpu *vcpu) static int handle_triple_fault(struct kvm_vcpu *vcpu) { vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN; + vcpu->mmio_needed = 0; return 0; } @@ -5973,9 +6166,24 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu) gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); if (!kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) { - skip_emulated_instruction(vcpu); trace_kvm_fast_mmio(gpa); - return 1; + /* + * Doing kvm_skip_emulated_instruction() depends on undefined + * behavior: Intel's manual doesn't mandate + * VM_EXIT_INSTRUCTION_LEN to be set in VMCS when EPT MISCONFIG + * occurs and while on real hardware it was observed to be set, + * other hypervisors (namely Hyper-V) don't set it, we end up + * advancing IP with some random value. Disable fast mmio when + * running nested and keep it for real hardware in hope that + * VM_EXIT_INSTRUCTION_LEN will always be set correctly. + */ + if (!static_cpu_has(X86_FEATURE_HYPERVISOR)) { + skip_emulated_instruction(vcpu); + return 1; + } + else + return x86_emulate_instruction(vcpu, gpa, EMULTYPE_SKIP, + NULL, 0) == EMULATE_DONE; } ret = handle_mmio_page_fault(vcpu, gpa, true); @@ -6159,7 +6367,7 @@ static void wakeup_handler(void) static __init int hardware_setup(void) { - int r = -ENOMEM, i, msr; + int r = -ENOMEM, i; rdmsrl_safe(MSR_EFER, &host_efer); @@ -6174,38 +6382,13 @@ static __init int hardware_setup(void) if (!vmx_io_bitmap_b) goto out; - vmx_msr_bitmap_legacy = (unsigned long *)__get_free_page(GFP_KERNEL); - if (!vmx_msr_bitmap_legacy) - goto out1; - - vmx_msr_bitmap_legacy_x2apic = - (unsigned long *)__get_free_page(GFP_KERNEL); - if (!vmx_msr_bitmap_legacy_x2apic) - goto out2; - - vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL); - if (!vmx_msr_bitmap_longmode) - goto out3; - - vmx_msr_bitmap_longmode_x2apic = - (unsigned long *)__get_free_page(GFP_KERNEL); - if (!vmx_msr_bitmap_longmode_x2apic) - goto out4; - - if (nested) { - vmx_msr_bitmap_nested = - (unsigned long *)__get_free_page(GFP_KERNEL); - if (!vmx_msr_bitmap_nested) - goto out5; - } - vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); if (!vmx_vmread_bitmap) - goto out6; + goto out1; vmx_vmwrite_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); if (!vmx_vmwrite_bitmap) - goto out7; + goto out2; memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE); memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE); @@ -6214,14 +6397,9 @@ static __init int hardware_setup(void) memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE); - memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE); - memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE); - if (nested) - memset(vmx_msr_bitmap_nested, 0xff, PAGE_SIZE); - if (setup_vmcs_config(&vmcs_config) < 0) { r = -EIO; - goto out8; + goto out3; } if (boot_cpu_has(X86_FEATURE_NX)) @@ -6287,38 +6465,8 @@ static __init int hardware_setup(void) kvm_x86_ops->sync_pir_to_irr = vmx_sync_pir_to_irr_dummy; } - vmx_disable_intercept_for_msr(MSR_FS_BASE, false); - vmx_disable_intercept_for_msr(MSR_GS_BASE, false); - vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true); - vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false); - vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false); - vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false); - - memcpy(vmx_msr_bitmap_legacy_x2apic, - vmx_msr_bitmap_legacy, PAGE_SIZE); - memcpy(vmx_msr_bitmap_longmode_x2apic, - vmx_msr_bitmap_longmode, PAGE_SIZE); - set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */ - if (enable_apicv) { - for (msr = 0x800; msr <= 0x8ff; msr++) - vmx_disable_intercept_msr_read_x2apic(msr); - - /* According SDM, in x2apic mode, the whole id reg is used. - * But in KVM, it only use the highest eight bits. Need to - * intercept it */ - vmx_enable_intercept_msr_read_x2apic(0x802); - /* TMCCT */ - vmx_enable_intercept_msr_read_x2apic(0x839); - /* TPR */ - vmx_disable_intercept_msr_write_x2apic(0x808); - /* EOI */ - vmx_disable_intercept_msr_write_x2apic(0x80b); - /* SELF-IPI */ - vmx_disable_intercept_msr_write_x2apic(0x83f); - } - if (enable_ept) { kvm_mmu_set_mask_ptes(0ull, (enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull, @@ -6349,21 +6497,10 @@ static __init int hardware_setup(void) return alloc_kvm_area(); -out8: - free_page((unsigned long)vmx_vmwrite_bitmap); -out7: - free_page((unsigned long)vmx_vmread_bitmap); -out6: - if (nested) - free_page((unsigned long)vmx_msr_bitmap_nested); -out5: - free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic); -out4: - free_page((unsigned long)vmx_msr_bitmap_longmode); out3: - free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic); + free_page((unsigned long)vmx_vmwrite_bitmap); out2: - free_page((unsigned long)vmx_msr_bitmap_legacy); + free_page((unsigned long)vmx_vmread_bitmap); out1: free_page((unsigned long)vmx_io_bitmap_b); out: @@ -6374,16 +6511,10 @@ out: static __exit void hardware_unsetup(void) { - free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic); - free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic); - free_page((unsigned long)vmx_msr_bitmap_legacy); - free_page((unsigned long)vmx_msr_bitmap_longmode); free_page((unsigned long)vmx_io_bitmap_b); free_page((unsigned long)vmx_io_bitmap_a); free_page((unsigned long)vmx_vmwrite_bitmap); free_page((unsigned long)vmx_vmread_bitmap); - if (nested) - free_page((unsigned long)vmx_msr_bitmap_nested); free_kvm_area(); } @@ -6426,93 +6557,6 @@ static int handle_monitor(struct kvm_vcpu *vcpu) return handle_nop(vcpu); } -/* - * To run an L2 guest, we need a vmcs02 based on the L1-specified vmcs12. - * We could reuse a single VMCS for all the L2 guests, but we also want the - * option to allocate a separate vmcs02 for each separate loaded vmcs12 - this - * allows keeping them loaded on the processor, and in the future will allow - * optimizations where prepare_vmcs02 doesn't need to set all the fields on - * every entry if they never change. - * So we keep, in vmx->nested.vmcs02_pool, a cache of size VMCS02_POOL_SIZE - * (>=0) with a vmcs02 for each recently loaded vmcs12s, most recent first. - * - * The following functions allocate and free a vmcs02 in this pool. - */ - -/* Get a VMCS from the pool to use as vmcs02 for the current vmcs12. */ -static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx) -{ - struct vmcs02_list *item; - list_for_each_entry(item, &vmx->nested.vmcs02_pool, list) - if (item->vmptr == vmx->nested.current_vmptr) { - list_move(&item->list, &vmx->nested.vmcs02_pool); - return &item->vmcs02; - } - - if (vmx->nested.vmcs02_num >= max(VMCS02_POOL_SIZE, 1)) { - /* Recycle the least recently used VMCS. */ - item = list_entry(vmx->nested.vmcs02_pool.prev, - struct vmcs02_list, list); - item->vmptr = vmx->nested.current_vmptr; - list_move(&item->list, &vmx->nested.vmcs02_pool); - return &item->vmcs02; - } - - /* Create a new VMCS */ - item = kmalloc(sizeof(struct vmcs02_list), GFP_KERNEL); - if (!item) - return NULL; - item->vmcs02.vmcs = alloc_vmcs(); - if (!item->vmcs02.vmcs) { - kfree(item); - return NULL; - } - loaded_vmcs_init(&item->vmcs02); - item->vmptr = vmx->nested.current_vmptr; - list_add(&(item->list), &(vmx->nested.vmcs02_pool)); - vmx->nested.vmcs02_num++; - return &item->vmcs02; -} - -/* Free and remove from pool a vmcs02 saved for a vmcs12 (if there is one) */ -static void nested_free_vmcs02(struct vcpu_vmx *vmx, gpa_t vmptr) -{ - struct vmcs02_list *item; - list_for_each_entry(item, &vmx->nested.vmcs02_pool, list) - if (item->vmptr == vmptr) { - free_loaded_vmcs(&item->vmcs02); - list_del(&item->list); - kfree(item); - vmx->nested.vmcs02_num--; - return; - } -} - -/* - * Free all VMCSs saved for this vcpu, except the one pointed by - * vmx->loaded_vmcs. We must be running L1, so vmx->loaded_vmcs - * must be &vmx->vmcs01. - */ -static void nested_free_all_saved_vmcss(struct vcpu_vmx *vmx) -{ - struct vmcs02_list *item, *n; - - WARN_ON(vmx->loaded_vmcs != &vmx->vmcs01); - list_for_each_entry_safe(item, n, &vmx->nested.vmcs02_pool, list) { - /* - * Something will leak if the above WARN triggers. Better than - * a use-after-free. - */ - if (vmx->loaded_vmcs == &item->vmcs02) - continue; - - free_loaded_vmcs(&item->vmcs02); - list_del(&item->list); - kfree(item); - vmx->nested.vmcs02_num--; - } -} - /* * The following 3 functions, nested_vmx_succeed()/failValid()/failInvalid(), * set the success or error code of an emulated VMX instruction, as specified @@ -6613,6 +6657,10 @@ static int get_vmx_mem_address(struct kvm_vcpu *vcpu, /* Addr = segment_base + offset */ /* offset = base + [index * scale] + displacement */ off = exit_qualification; /* holds the displacement */ + if (addr_size == 1) + off = (gva_t)sign_extend64(off, 31); + else if (addr_size == 0) + off = (gva_t)sign_extend64(off, 15); if (base_is_valid) off += kvm_register_read(vcpu, base_reg); if (index_is_valid) @@ -6655,10 +6703,16 @@ static int get_vmx_mem_address(struct kvm_vcpu *vcpu, /* Protected mode: #GP(0)/#SS(0) if the segment is unusable. */ exn = (s.unusable != 0); - /* Protected mode: #GP(0)/#SS(0) if the memory - * operand is outside the segment limit. + + /* + * Protected mode: #GP(0)/#SS(0) if the memory operand is + * outside the segment limit. All CPUs that support VMX ignore + * limit checks for flat segments, i.e. segments with base==0, + * limit==0xffffffff and of type expand-up data or code. */ - exn = exn || (off + sizeof(u64) > s.limit); + if (!(s.base == 0 && s.limit == 0xffffffff && + ((s.type & 8) || !(s.type & 4)))) + exn = exn || (off + sizeof(u64) > s.limit); } if (exn) { kvm_queue_exception_e(vcpu, @@ -6786,6 +6840,7 @@ static int handle_vmon(struct kvm_vcpu *vcpu) struct vmcs *shadow_vmcs; const u64 VMXON_NEEDED_FEATURES = FEATURE_CONTROL_LOCKED | FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; + int r; /* The Intel VMX Instruction Reference lists a bunch of bits that * are prerequisite to running VMXON, most notably cr4.VMXE must be @@ -6825,10 +6880,14 @@ static int handle_vmon(struct kvm_vcpu *vcpu) return 1; } + r = alloc_loaded_vmcs(&vmx->nested.vmcs02); + if (r < 0) + goto out_vmcs02; + if (enable_shadow_vmcs) { shadow_vmcs = alloc_vmcs(); if (!shadow_vmcs) - return -ENOMEM; + goto out_shadow_vmcs; /* mark vmcs as shadow */ shadow_vmcs->revision_id |= (1u << 31); /* init shadow vmcs */ @@ -6836,9 +6895,6 @@ static int handle_vmon(struct kvm_vcpu *vcpu) vmx->nested.current_shadow_vmcs = shadow_vmcs; } - INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool)); - vmx->nested.vmcs02_num = 0; - hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); vmx->nested.preemption_timer.function = vmx_preemption_timer_fn; @@ -6850,6 +6906,12 @@ static int handle_vmon(struct kvm_vcpu *vcpu) skip_emulated_instruction(vcpu); nested_vmx_succeed(vcpu); return 1; + +out_shadow_vmcs: + free_loaded_vmcs(&vmx->nested.vmcs02); + +out_vmcs02: + return -ENOMEM; } /* @@ -6916,12 +6978,13 @@ static void free_nested(struct vcpu_vmx *vmx) if (!vmx->nested.vmxon) return; + hrtimer_cancel(&vmx->nested.preemption_timer); vmx->nested.vmxon = false; free_vpid(vmx->nested.vpid02); nested_release_vmcs12(vmx); if (enable_shadow_vmcs) free_vmcs(vmx->nested.current_shadow_vmcs); - /* Unpin physical memory we referred to in current vmcs02 */ + /* Unpin physical memory we referred to in the vmcs02 */ if (vmx->nested.apic_access_page) { nested_release_page(vmx->nested.apic_access_page); vmx->nested.apic_access_page = NULL; @@ -6937,7 +7000,7 @@ static void free_nested(struct vcpu_vmx *vmx) vmx->nested.pi_desc = NULL; } - nested_free_all_saved_vmcss(vmx); + free_loaded_vmcs(&vmx->nested.vmcs02); } /* Emulate the VMXOFF instruction */ @@ -6971,8 +7034,6 @@ static int handle_vmclear(struct kvm_vcpu *vcpu) vmptr + offsetof(struct vmcs12, launch_state), &zero, sizeof(zero)); - nested_free_vmcs02(vmx, vmptr); - skip_emulated_instruction(vcpu); nested_vmx_succeed(vcpu); return 1; @@ -7757,6 +7818,19 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) vmcs_read32(VM_EXIT_INTR_ERROR_CODE), KVM_ISA_VMX); + /* + * The host physical addresses of some pages of guest memory + * are loaded into the vmcs02 (e.g. vmcs12's Virtual APIC + * Page). The CPU may write to these pages via their host + * physical address while L2 is running, bypassing any + * address-translation-based dirty tracking (e.g. EPT write + * protection). + * + * Mark them dirty on every exit from L2 to prevent them from + * getting out of sync with dirty tracking. + */ + nested_mark_vmcs12_pages_dirty(vcpu); + if (vmx->nested.nested_run_pending) return false; @@ -8244,7 +8318,7 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) } vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control); - vmx_set_msr_bitmap(vcpu); + vmx_update_msr_bitmap(vcpu); } static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa) @@ -8413,9 +8487,21 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu) local_irq_enable(); } -static bool vmx_has_high_real_mode_segbase(void) +static bool vmx_has_emulated_msr(int index) { - return enable_unrestricted_guest || emulate_invalid_guest_state; + switch (index) { + case MSR_IA32_SMBASE: + /* + * We cannot do SMM unless we can run the guest in big + * real mode. + */ + return enable_unrestricted_guest || emulate_invalid_guest_state; + case MSR_AMD64_VIRT_SPEC_CTRL: + /* This is AMD only. */ + return false; + default: + return true; + } } static bool vmx_mpx_supported(void) @@ -8607,7 +8693,16 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) atomic_switch_perf_msrs(vmx); debugctlmsr = get_debugctlmsr(); + /* + * If this vCPU has touched SPEC_CTRL, restore the guest's value if + * it's non-zero. Since vmentry is serialising on affected CPUs, there + * is no need to worry about the conditional branch over the wrmsr + * being speculatively taken. + */ + x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0); + vmx->__launched = vmx->loaded_vmcs->launched; + asm( /* Store host registers */ "push %%" _ASM_DX "; push %%" _ASM_BP ";" @@ -8725,6 +8820,26 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) #endif ); + /* + * We do not use IBRS in the kernel. If this vCPU has used the + * SPEC_CTRL MSR it may have left it on; save the value and + * turn it off. This is much more efficient than blindly adding + * it to the atomic save/restore list. Especially as the former + * (Saving guest MSRs on vmexit) doesn't even exist in KVM. + * + * For non-nested case: + * If the L01 MSR bitmap does not intercept the MSR, then we need to + * save it. + * + * For nested case: + * If the L02 MSR bitmap does not intercept the MSR, then we need to + * save it. + */ + if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)) + vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); + + x86_spec_ctrl_restore_host(vmx->spec_ctrl, 0); + /* Eliminate branch target predictions from guest mode */ vmexit_fill_RSB(); @@ -8824,6 +8939,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) { int err; struct vcpu_vmx *vmx = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); + unsigned long *msr_bitmap; int cpu; if (!vmx) @@ -8856,16 +8972,24 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) if (!vmx->guest_msrs) goto free_pml; - vmx->loaded_vmcs = &vmx->vmcs01; - vmx->loaded_vmcs->vmcs = alloc_vmcs(); - if (!vmx->loaded_vmcs->vmcs) - goto free_msrs; if (!vmm_exclusive) kvm_cpu_vmxon(__pa(per_cpu(vmxarea, raw_smp_processor_id()))); - loaded_vmcs_init(vmx->loaded_vmcs); + err = alloc_loaded_vmcs(&vmx->vmcs01); if (!vmm_exclusive) kvm_cpu_vmxoff(); + if (err < 0) + goto free_msrs; + msr_bitmap = vmx->vmcs01.msr_bitmap; + vmx_disable_intercept_for_msr(msr_bitmap, MSR_FS_BASE, MSR_TYPE_RW); + vmx_disable_intercept_for_msr(msr_bitmap, MSR_GS_BASE, MSR_TYPE_RW); + vmx_disable_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW); + vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW); + vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW); + vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW); + vmx->msr_bitmap_mode = 0; + + vmx->loaded_vmcs = &vmx->vmcs01; cpu = get_cpu(); vmx_vcpu_load(&vmx->vcpu, cpu); vmx->vcpu.cpu = cpu; @@ -9248,9 +9372,26 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, { int msr; struct page *page; - unsigned long *msr_bitmap; + unsigned long *msr_bitmap_l1; + unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap; + /* + * pred_cmd & spec_ctrl are trying to verify two things: + * + * 1. L0 gave a permission to L1 to actually passthrough the MSR. This + * ensures that we do not accidentally generate an L02 MSR bitmap + * from the L12 MSR bitmap that is too permissive. + * 2. That L1 or L2s have actually used the MSR. This avoids + * unnecessarily merging of the bitmap if the MSR is unused. This + * works properly because we only update the L01 MSR bitmap lazily. + * So even if L0 should pass L1 these MSRs, the L01 bitmap is only + * updated to reflect this when L1 (or its L2s) actually write to + * the MSR. + */ + bool pred_cmd = msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD); + bool spec_ctrl = msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL); - if (!nested_cpu_has_virt_x2apic_mode(vmcs12)) + if (!nested_cpu_has_virt_x2apic_mode(vmcs12) && + !pred_cmd && !spec_ctrl) return false; page = nested_get_page(vcpu, vmcs12->msr_bitmap); @@ -9258,59 +9399,46 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, WARN_ON(1); return false; } - msr_bitmap = (unsigned long *)kmap(page); + msr_bitmap_l1 = (unsigned long *)kmap(page); + + memset(msr_bitmap_l0, 0xff, PAGE_SIZE); if (nested_cpu_has_virt_x2apic_mode(vmcs12)) { if (nested_cpu_has_apic_reg_virt(vmcs12)) for (msr = 0x800; msr <= 0x8ff; msr++) nested_vmx_disable_intercept_for_msr( - msr_bitmap, - vmx_msr_bitmap_nested, + msr_bitmap_l1, msr_bitmap_l0, msr, MSR_TYPE_R); - /* TPR is allowed */ - nested_vmx_disable_intercept_for_msr(msr_bitmap, - vmx_msr_bitmap_nested, + + nested_vmx_disable_intercept_for_msr( + msr_bitmap_l1, msr_bitmap_l0, APIC_BASE_MSR + (APIC_TASKPRI >> 4), MSR_TYPE_R | MSR_TYPE_W); + if (nested_cpu_has_vid(vmcs12)) { - /* EOI and self-IPI are allowed */ nested_vmx_disable_intercept_for_msr( - msr_bitmap, - vmx_msr_bitmap_nested, + msr_bitmap_l1, msr_bitmap_l0, APIC_BASE_MSR + (APIC_EOI >> 4), MSR_TYPE_W); nested_vmx_disable_intercept_for_msr( - msr_bitmap, - vmx_msr_bitmap_nested, + msr_bitmap_l1, msr_bitmap_l0, APIC_BASE_MSR + (APIC_SELF_IPI >> 4), MSR_TYPE_W); } - } else { - /* - * Enable reading intercept of all the x2apic - * MSRs. We should not rely on vmcs12 to do any - * optimizations here, it may have been modified - * by L1. - */ - for (msr = 0x800; msr <= 0x8ff; msr++) - __vmx_enable_intercept_for_msr( - vmx_msr_bitmap_nested, - msr, - MSR_TYPE_R); - - __vmx_enable_intercept_for_msr( - vmx_msr_bitmap_nested, - APIC_BASE_MSR + (APIC_TASKPRI >> 4), - MSR_TYPE_W); - __vmx_enable_intercept_for_msr( - vmx_msr_bitmap_nested, - APIC_BASE_MSR + (APIC_EOI >> 4), - MSR_TYPE_W); - __vmx_enable_intercept_for_msr( - vmx_msr_bitmap_nested, - APIC_BASE_MSR + (APIC_SELF_IPI >> 4), - MSR_TYPE_W); } + + if (spec_ctrl) + nested_vmx_disable_intercept_for_msr( + msr_bitmap_l1, msr_bitmap_l0, + MSR_IA32_SPEC_CTRL, + MSR_TYPE_R | MSR_TYPE_W); + + if (pred_cmd) + nested_vmx_disable_intercept_for_msr( + msr_bitmap_l1, msr_bitmap_l0, + MSR_IA32_PRED_CMD, + MSR_TYPE_W); + kunmap(page); nested_release_page_clean(page); @@ -9729,10 +9857,10 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) } if (cpu_has_vmx_msr_bitmap() && - exec_control & CPU_BASED_USE_MSR_BITMAPS) { - nested_vmx_merge_msr_bitmap(vcpu, vmcs12); - /* MSR_BITMAP will be set by following vmx_set_efer. */ - } else + exec_control & CPU_BASED_USE_MSR_BITMAPS && + nested_vmx_merge_msr_bitmap(vcpu, vmcs12)) + ; /* MSR_BITMAP will be set by following vmx_set_efer. */ + else exec_control &= ~CPU_BASED_USE_MSR_BITMAPS; /* @@ -9784,6 +9912,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) else vmcs_write64(TSC_OFFSET, vmx->nested.vmcs01_tsc_offset); + if (cpu_has_vmx_msr_bitmap()) + vmcs_write64(MSR_BITMAP, __pa(vmx->nested.vmcs02.msr_bitmap)); + if (enable_vpid) { /* * There is no direct mapping between vpid02 and vpid12, the @@ -9876,7 +10007,6 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) struct vmcs12 *vmcs12; struct vcpu_vmx *vmx = to_vmx(vcpu); int cpu; - struct loaded_vmcs *vmcs02; bool ia32e; u32 msr_entry_idx; @@ -10016,10 +10146,6 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) * the nested entry. */ - vmcs02 = nested_get_current_vmcs02(vmx); - if (!vmcs02) - return -ENOMEM; - enter_guest_mode(vcpu); vmx->nested.vmcs01_tsc_offset = vmcs_read64(TSC_OFFSET); @@ -10028,7 +10154,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); cpu = get_cpu(); - vmx->loaded_vmcs = vmcs02; + vmx->loaded_vmcs = &vmx->nested.vmcs02; vmx_vcpu_put(vcpu); vmx_vcpu_load(vcpu, cpu); vcpu->cpu = cpu; @@ -10489,7 +10615,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, vmcs_write64(GUEST_IA32_DEBUGCTL, 0); if (cpu_has_vmx_msr_bitmap()) - vmx_set_msr_bitmap(vcpu); + vmx_update_msr_bitmap(vcpu); if (nested_vmx_load_msr(vcpu, vmcs12->vm_exit_msr_load_addr, vmcs12->vm_exit_msr_load_count)) @@ -10540,10 +10666,6 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, vm_exit_controls_init(vmx, vmcs_read32(VM_EXIT_CONTROLS)); vmx_segment_cache_clear(vmx); - /* if no vmcs02 cache requested, remove the one we used */ - if (VMCS02_POOL_SIZE == 0) - nested_free_vmcs02(vmx, vmx->nested.current_vmptr); - load_vmcs12_host_state(vcpu, vmcs12); /* Update TSC_OFFSET if TSC was changed while L2 ran */ @@ -10871,7 +10993,7 @@ static struct kvm_x86_ops vmx_x86_ops = { .hardware_enable = hardware_enable, .hardware_disable = hardware_disable, .cpu_has_accelerated_tpr = report_flexpriority, - .cpu_has_high_real_mode_segbase = vmx_has_high_real_mode_segbase, + .has_emulated_msr = vmx_has_emulated_msr, .vcpu_create = vmx_create_vcpu, .vcpu_free = vmx_free_vcpu, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e6ab034f0bc7..706c5d63a53f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -961,6 +961,7 @@ static u32 msrs_to_save[] = { #endif MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA, MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX, + MSR_IA32_SPEC_CTRL, MSR_IA32_ARCH_CAPABILITIES }; static unsigned num_msrs_to_save; @@ -984,6 +985,7 @@ static u32 emulated_msrs[] = { MSR_IA32_MCG_STATUS, MSR_IA32_MCG_CTL, MSR_IA32_SMBASE, + MSR_AMD64_VIRT_SPEC_CTRL, }; static unsigned num_emulated_msrs; @@ -2583,7 +2585,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) * fringe case that is not enabled except via specific settings * of the module parameters. */ - r = kvm_x86_ops->cpu_has_high_real_mode_segbase(); + r = kvm_x86_ops->has_emulated_msr(MSR_IA32_SMBASE); break; case KVM_CAP_COALESCED_MMIO: r = KVM_COALESCED_MMIO_PAGE_OFFSET; @@ -4072,14 +4074,8 @@ static void kvm_init_msr_list(void) num_msrs_to_save = j; for (i = j = 0; i < ARRAY_SIZE(emulated_msrs); i++) { - switch (emulated_msrs[i]) { - case MSR_IA32_SMBASE: - if (!kvm_x86_ops->cpu_has_high_real_mode_segbase()) - continue; - break; - default: - break; - } + if (!kvm_x86_ops->has_emulated_msr(emulated_msrs[i])) + continue; if (j < i) emulated_msrs[j] = emulated_msrs[i]; @@ -4251,6 +4247,13 @@ int kvm_read_guest_virt(struct kvm_vcpu *vcpu, { u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; + /* + * FIXME: this should call handle_emulation_failure if X86EMUL_IO_NEEDED + * is returned, but our callers are not ready for that and they blindly + * call kvm_inject_page_fault. Ensure that they at least do not leak + * uninitialized kernel stack memory into cr2 and error code. + */ + memset(exception, 0, sizeof(*exception)); return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access, exception); } @@ -5440,7 +5443,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, * handle watchpoints yet, those would be handled in * the emulate_ops. */ - if (kvm_vcpu_check_breakpoint(vcpu, &r)) + if (!(emulation_type & EMULTYPE_SKIP) && + kvm_vcpu_check_breakpoint(vcpu, &r)) return r; ctxt->interruptibility = 0; @@ -5527,8 +5531,7 @@ restart: toggle_interruptibility(vcpu, ctxt->interruptibility); vcpu->arch.emulate_regs_need_sync_to_vcpu = false; kvm_rip_write(vcpu, ctxt->eip); - if (r == EMULATE_DONE && - (ctxt->tf || (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP))) + if (r == EMULATE_DONE && ctxt->tf) kvm_vcpu_do_singlestep(vcpu, &r); if (!ctxt->have_exception || exception_type(ctxt->exception.vector) == EXCPT_TRAP) @@ -6475,6 +6478,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) } if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) { vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN; + vcpu->mmio_needed = 0; r = 0; goto out; } diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c index 91d93b95bd86..0a6fcae404f8 100644 --- a/arch/x86/lib/usercopy_32.c +++ b/arch/x86/lib/usercopy_32.c @@ -570,12 +570,12 @@ do { \ unsigned long __copy_to_user_ll(void __user *to, const void *from, unsigned long n) { - stac(); + __uaccess_begin_nospec(); if (movsl_is_ok(to, from, n)) __copy_user(to, from, n); else n = __copy_user_intel(to, from, n); - clac(); + __uaccess_end(); return n; } EXPORT_SYMBOL(__copy_to_user_ll); @@ -583,12 +583,12 @@ EXPORT_SYMBOL(__copy_to_user_ll); unsigned long __copy_from_user_ll(void *to, const void __user *from, unsigned long n) { - stac(); + __uaccess_begin_nospec(); if (movsl_is_ok(to, from, n)) __copy_user_zeroing(to, from, n); else n = __copy_user_zeroing_intel(to, from, n); - clac(); + __uaccess_end(); return n; } EXPORT_SYMBOL(__copy_from_user_ll); @@ -596,13 +596,13 @@ EXPORT_SYMBOL(__copy_from_user_ll); unsigned long __copy_from_user_ll_nozero(void *to, const void __user *from, unsigned long n) { - stac(); + __uaccess_begin_nospec(); if (movsl_is_ok(to, from, n)) __copy_user(to, from, n); else n = __copy_user_intel((void __user *)to, (const void *)from, n); - clac(); + __uaccess_end(); return n; } EXPORT_SYMBOL(__copy_from_user_ll_nozero); @@ -610,7 +610,7 @@ EXPORT_SYMBOL(__copy_from_user_ll_nozero); unsigned long __copy_from_user_ll_nocache(void *to, const void __user *from, unsigned long n) { - stac(); + __uaccess_begin_nospec(); #ifdef CONFIG_X86_INTEL_USERCOPY if (n > 64 && cpu_has_xmm2) n = __copy_user_zeroing_intel_nocache(to, from, n); @@ -619,7 +619,7 @@ unsigned long __copy_from_user_ll_nocache(void *to, const void __user *from, #else __copy_user_zeroing(to, from, n); #endif - clac(); + __uaccess_end(); return n; } EXPORT_SYMBOL(__copy_from_user_ll_nocache); @@ -627,7 +627,7 @@ EXPORT_SYMBOL(__copy_from_user_ll_nocache); unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *from, unsigned long n) { - stac(); + __uaccess_begin_nospec(); #ifdef CONFIG_X86_INTEL_USERCOPY if (n > 64 && cpu_has_xmm2) n = __copy_user_intel_nocache(to, from, n); @@ -636,7 +636,7 @@ unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *fr #else __copy_user(to, from, n); #endif - clac(); + __uaccess_end(); return n; } EXPORT_SYMBOL(__copy_from_user_ll_nocache_nozero); diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index ae9a37bf1371..7d2542ad346a 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c @@ -388,7 +388,7 @@ slow_irqon: ret = get_user_pages_unlocked(current, mm, start, (end - start) >> PAGE_SHIFT, - write, 0, pages); + pages, write ? FOLL_WRITE : 0); /* Have to be a bit careful with return values */ if (nr > 0) { diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c index 7ed47b1e6f42..7e94fc6f608a 100644 --- a/arch/x86/mm/mpx.c +++ b/arch/x86/mm/mpx.c @@ -536,10 +536,9 @@ static int mpx_resolve_fault(long __user *addr, int write) { long gup_ret; int nr_pages = 1; - int force = 0; gup_ret = get_user_pages(current, current->mm, (unsigned long)addr, - nr_pages, write, force, NULL, NULL); + nr_pages, write ? FOLL_WRITE : 0, NULL, NULL); /* * get_user_pages() returns number of pages gotten. * 0 means we failed to fault in and get anything, diff --git a/arch/x86/pci/broadcom_bus.c b/arch/x86/pci/broadcom_bus.c index 526536c81ddc..ca1e8e6dccc8 100644 --- a/arch/x86/pci/broadcom_bus.c +++ b/arch/x86/pci/broadcom_bus.c @@ -50,8 +50,8 @@ static void __init cnb20le_res(u8 bus, u8 slot, u8 func) word1 = read_pci_config_16(bus, slot, func, 0xc0); word2 = read_pci_config_16(bus, slot, func, 0xc2); if (word1 != word2) { - res.start = (word1 << 16) | 0x0000; - res.end = (word2 << 16) | 0xffff; + res.start = ((resource_size_t) word1 << 16) | 0x0000; + res.end = ((resource_size_t) word2 << 16) | 0xffff; res.flags = IORESOURCE_MEM; update_res(info, res.start, res.end, res.flags, 0); } diff --git a/arch/x86/platform/efi/early_printk.c b/arch/x86/platform/efi/early_printk.c index 524142117296..82324fc25d5e 100644 --- a/arch/x86/platform/efi/early_printk.c +++ b/arch/x86/platform/efi/early_printk.c @@ -179,7 +179,7 @@ early_efi_write(struct console *con, const char *str, unsigned int num) num--; } - if (efi_x >= si->lfb_width) { + if (efi_x + font->width > si->lfb_width) { efi_x = 0; efi_y += font->height; } diff --git a/arch/x86/realmode/rm/Makefile b/arch/x86/realmode/rm/Makefile index 59610be05468..e8fb26502b55 100644 --- a/arch/x86/realmode/rm/Makefile +++ b/arch/x86/realmode/rm/Makefile @@ -46,7 +46,7 @@ $(obj)/pasyms.h: $(REALMODE_OBJS) FORCE targets += realmode.lds $(obj)/realmode.lds: $(obj)/pasyms.h -LDFLAGS_realmode.elf := --emit-relocs -T +LDFLAGS_realmode.elf := -m elf_i386 --emit-relocs -T CPPFLAGS_realmode.lds += -P -C -I$(obj) targets += realmode.elf diff --git a/arch/xtensa/configs/smp_lx200_defconfig b/arch/xtensa/configs/smp_lx200_defconfig index 22eeacba37cc..199e05f85e89 100644 --- a/arch/xtensa/configs/smp_lx200_defconfig +++ b/arch/xtensa/configs/smp_lx200_defconfig @@ -35,6 +35,7 @@ CONFIG_SMP=y CONFIG_HOTPLUG_CPU=y # CONFIG_INITIALIZE_XTENSA_MMU_INSIDE_VMLINUX is not set # CONFIG_PCI is not set +CONFIG_VECTORS_OFFSET=0x00002000 CONFIG_XTENSA_PLATFORM_XTFPGA=y CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="earlycon=uart8250,mmio32,0xfd050020,115200n8 console=ttyS0,115200n8 ip=dhcp root=/dev/nfs rw debug" diff --git a/arch/xtensa/kernel/head.S b/arch/xtensa/kernel/head.S index c7b3bedbfffe..e3823b4f9d08 100644 --- a/arch/xtensa/kernel/head.S +++ b/arch/xtensa/kernel/head.S @@ -286,12 +286,13 @@ should_never_return: movi a2, cpu_start_ccount 1: + memw l32i a3, a2, 0 beqi a3, 0, 1b movi a3, 0 s32i a3, a2, 0 - memw 1: + memw l32i a3, a2, 0 beqi a3, 0, 1b wsr a3, ccount @@ -328,11 +329,13 @@ ENTRY(cpu_restart) rsr a0, prid neg a2, a0 movi a3, cpu_start_id + memw s32i a2, a3, 0 #if XCHAL_DCACHE_IS_WRITEBACK dhwbi a3, 0 #endif 1: + memw l32i a2, a3, 0 dhi a3, 0 bne a2, a0, 1b diff --git a/arch/xtensa/kernel/smp.c b/arch/xtensa/kernel/smp.c index 4d02e38514f5..54bb8e0473a0 100644 --- a/arch/xtensa/kernel/smp.c +++ b/arch/xtensa/kernel/smp.c @@ -80,7 +80,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus) { unsigned i; - for (i = 0; i < max_cpus; ++i) + for_each_possible_cpu(i) set_cpu_present(i, true); } @@ -93,6 +93,11 @@ void __init smp_init_cpus(void) pr_info("%s: Core Count = %d\n", __func__, ncpus); pr_info("%s: Core Id = %d\n", __func__, core_id); + if (ncpus > NR_CPUS) { + ncpus = NR_CPUS; + pr_info("%s: limiting core count by %d\n", __func__, ncpus); + } + for (i = 0; i < ncpus; ++i) set_cpu_possible(i, true); } @@ -192,9 +197,11 @@ static int boot_secondary(unsigned int cpu, struct task_struct *ts) int i; #ifdef CONFIG_HOTPLUG_CPU - cpu_start_id = cpu; - system_flush_invalidate_dcache_range( - (unsigned long)&cpu_start_id, sizeof(cpu_start_id)); + WRITE_ONCE(cpu_start_id, cpu); + /* Pairs with the third memw in the cpu_restart */ + mb(); + system_flush_invalidate_dcache_range((unsigned long)&cpu_start_id, + sizeof(cpu_start_id)); #endif smp_call_function_single(0, mx_cpu_start, (void *)cpu, 1); @@ -203,18 +210,21 @@ static int boot_secondary(unsigned int cpu, struct task_struct *ts) ccount = get_ccount(); while (!ccount); - cpu_start_ccount = ccount; + WRITE_ONCE(cpu_start_ccount, ccount); - while (time_before(jiffies, timeout)) { + do { + /* + * Pairs with the first two memws in the + * .Lboot_secondary. + */ mb(); - if (!cpu_start_ccount) - break; - } + ccount = READ_ONCE(cpu_start_ccount); + } while (ccount && time_before(jiffies, timeout)); - if (cpu_start_ccount) { + if (ccount) { smp_call_function_single(0, mx_cpu_stop, - (void *)cpu, 1); - cpu_start_ccount = 0; + (void *)cpu, 1); + WRITE_ONCE(cpu_start_ccount, 0); return -EIO; } } @@ -234,6 +244,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) pr_debug("%s: Calling wakeup_secondary(cpu:%d, idle:%p, sp: %08lx)\n", __func__, cpu, idle, start_info.stack); + init_completion(&cpu_running); ret = boot_secondary(cpu, idle); if (ret == 0) { wait_for_completion_timeout(&cpu_running, @@ -295,8 +306,10 @@ void __cpu_die(unsigned int cpu) unsigned long timeout = jiffies + msecs_to_jiffies(1000); while (time_before(jiffies, timeout)) { system_invalidate_dcache_range((unsigned long)&cpu_start_id, - sizeof(cpu_start_id)); - if (cpu_start_id == -cpu) { + sizeof(cpu_start_id)); + /* Pairs with the second memw in the cpu_restart */ + mb(); + if (READ_ONCE(cpu_start_id) == -cpu) { platform_cpu_kill(cpu); return; } diff --git a/arch/xtensa/kernel/stacktrace.c b/arch/xtensa/kernel/stacktrace.c index 7538d802b65a..483593068139 100644 --- a/arch/xtensa/kernel/stacktrace.c +++ b/arch/xtensa/kernel/stacktrace.c @@ -272,10 +272,14 @@ static int return_address_cb(struct stackframe *frame, void *data) return 1; } +/* + * level == 0 is for the return address from the caller of this function, + * not from this function itself. + */ unsigned long return_address(unsigned level) { struct return_addr_data r = { - .skip = level + 1, + .skip = level, }; walk_stackframe(stack_pointer(NULL), return_address_cb, &r); return r.addr; diff --git a/arch/xtensa/kernel/time.c b/arch/xtensa/kernel/time.c index b9ad9feadc2d..a992cb6a47db 100644 --- a/arch/xtensa/kernel/time.c +++ b/arch/xtensa/kernel/time.c @@ -87,7 +87,7 @@ static int ccount_timer_shutdown(struct clock_event_device *evt) container_of(evt, struct ccount_timer, evt); if (timer->irq_enabled) { - disable_irq(evt->irq); + disable_irq_nosync(evt->irq); timer->irq_enabled = 0; } return 0; diff --git a/block/bio.c b/block/bio.c index 63363a689922..cf513f74dffd 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1216,8 +1216,11 @@ struct bio *bio_copy_user_iov(struct request_queue *q, } } - if (bio_add_pc_page(q, bio, page, bytes, offset) < bytes) + if (bio_add_pc_page(q, bio, page, bytes, offset) < bytes) { + if (!map_data) + __free_page(page); break; + } len -= bytes; offset = 0; diff --git a/block/blk-settings.c b/block/blk-settings.c index c7bb666aafd1..7c1e07165e3c 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -249,6 +249,7 @@ void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_secto max_sectors = min_not_zero(max_hw_sectors, limits->max_dev_sectors); max_sectors = min_t(unsigned int, max_sectors, BLK_DEF_MAX_SECTORS); limits->max_sectors = max_sectors; + q->backing_dev_info.io_pages = max_sectors >> (PAGE_SHIFT - 9); } EXPORT_SYMBOL(blk_queue_max_hw_sectors); diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index e140cc487ce1..68155dd21910 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -213,6 +213,7 @@ queue_max_sectors_store(struct request_queue *q, const char *page, size_t count) spin_lock_irq(q->queue_lock); q->limits.max_sectors = max_sectors_kb << 1; + q->backing_dev_info.io_pages = max_sectors_kb >> (PAGE_SHIFT - 10); spin_unlock_irq(q->queue_lock); return ret; diff --git a/build.config.cuttlefish.aarch64 b/build.config.cuttlefish.aarch64 index e36bae42443e..6bf6755ab3a8 100644 --- a/build.config.cuttlefish.aarch64 +++ b/build.config.cuttlefish.aarch64 @@ -6,7 +6,7 @@ DEFCONFIG=cuttlefish_defconfig EXTRA_CMDS='' KERNEL_DIR=common POST_DEFCONFIG_CMDS="check_defconfig" -CLANG_PREBUILT_BIN=prebuilts-master/clang/host/linux-x86/clang-r346389b/bin +CLANG_PREBUILT_BIN=prebuilts-master/clang/host/linux-x86/clang-r349610/bin LINUX_GCC_CROSS_COMPILE_PREBUILTS_BIN=prebuilts/gcc/linux-x86/aarch64/aarch64-linux-android-4.9/bin FILES=" arch/arm64/boot/Image.gz diff --git a/build.config.cuttlefish.x86_64 b/build.config.cuttlefish.x86_64 index 4c064a00681e..e3477c3b3cbe 100644 --- a/build.config.cuttlefish.x86_64 +++ b/build.config.cuttlefish.x86_64 @@ -6,10 +6,11 @@ DEFCONFIG=x86_64_cuttlefish_defconfig EXTRA_CMDS='' KERNEL_DIR=common POST_DEFCONFIG_CMDS="check_defconfig" -CLANG_PREBUILT_BIN=prebuilts-master/clang/host/linux-x86/clang-r346389b/bin +CLANG_PREBUILT_BIN=prebuilts-master/clang/host/linux-x86/clang-r349610/bin LINUX_GCC_CROSS_COMPILE_PREBUILTS_BIN=prebuilts/gcc/linux-x86/x86/x86_64-linux-android-4.9/bin FILES=" arch/x86/boot/bzImage vmlinux System.map " +STOP_SHIP_TRACEPRINTK=1 diff --git a/build.config.goldfish.arm b/build.config.goldfish.arm new file mode 100644 index 000000000000..ff5646ab4f40 --- /dev/null +++ b/build.config.goldfish.arm @@ -0,0 +1,13 @@ +ARCH=arm +BRANCH=android-4.4 +CROSS_COMPILE=arm-linux-androidkernel- +DEFCONFIG=ranchu_defconfig +EXTRA_CMDS='' +KERNEL_DIR=common +LINUX_GCC_CROSS_COMPILE_PREBUILTS_BIN=prebuilts/gcc/linux-x86/arm/arm-linux-androideabi-4.9/bin +FILES=" +arch/arm/boot/zImage +vmlinux +System.map +" +STOP_SHIP_TRACEPRINTK=1 diff --git a/build.config.goldfish.arm64 b/build.config.goldfish.arm64 new file mode 100644 index 000000000000..4c896a679ab9 --- /dev/null +++ b/build.config.goldfish.arm64 @@ -0,0 +1,13 @@ +ARCH=arm64 +BRANCH=android-4.4 +CROSS_COMPILE=aarch64-linux-android- +DEFCONFIG=ranchu64_defconfig +EXTRA_CMDS='' +KERNEL_DIR=common +LINUX_GCC_CROSS_COMPILE_PREBUILTS_BIN=prebuilts/gcc/linux-x86/aarch64/aarch64-linux-android-4.9/bin +FILES=" +arch/arm64/boot/Image +vmlinux +System.map +" +STOP_SHIP_TRACEPRINTK=1 diff --git a/build.config.goldfish.mips b/build.config.goldfish.mips new file mode 100644 index 000000000000..9a14a444ac14 --- /dev/null +++ b/build.config.goldfish.mips @@ -0,0 +1,12 @@ +ARCH=mips +BRANCH=android-4.4 +CROSS_COMPILE=mips64el-linux-android- +DEFCONFIG=ranchu_defconfig +EXTRA_CMDS='' +KERNEL_DIR=common +LINUX_GCC_CROSS_COMPILE_PREBUILTS_BIN=prebuilts/gcc/linux-x86/mips/mips64el-linux-android-4.9/bin +FILES=" +vmlinux +System.map +" +STOP_SHIP_TRACEPRINTK=1 diff --git a/build.config.goldfish.mips64 b/build.config.goldfish.mips64 new file mode 100644 index 000000000000..6ad9759f5f4a --- /dev/null +++ b/build.config.goldfish.mips64 @@ -0,0 +1,12 @@ +ARCH=mips +BRANCH=android-4.4 +CROSS_COMPILE=mips64el-linux-android- +DEFCONFIG=ranchu64_defconfig +EXTRA_CMDS='' +KERNEL_DIR=common +LINUX_GCC_CROSS_COMPILE_PREBUILTS_BIN=prebuilts/gcc/linux-x86/mips/mips64el-linux-android-4.9/bin +FILES=" +vmlinux +System.map +" +STOP_SHIP_TRACEPRINTK=1 diff --git a/build.config.goldfish.x86 b/build.config.goldfish.x86 new file mode 100644 index 000000000000..2266c621835e --- /dev/null +++ b/build.config.goldfish.x86 @@ -0,0 +1,13 @@ +ARCH=x86 +BRANCH=android-4.4 +CROSS_COMPILE=x86_64-linux-android- +DEFCONFIG=i386_ranchu_defconfig +EXTRA_CMDS='' +KERNEL_DIR=common +LINUX_GCC_CROSS_COMPILE_PREBUILTS_BIN=prebuilts/gcc/linux-x86/x86/x86_64-linux-android-4.9/bin +FILES=" +arch/x86/boot/bzImage +vmlinux +System.map +" +STOP_SHIP_TRACEPRINTK=1 diff --git a/build.config.goldfish.x86_64 b/build.config.goldfish.x86_64 new file mode 100644 index 000000000000..08c42c2eba03 --- /dev/null +++ b/build.config.goldfish.x86_64 @@ -0,0 +1,13 @@ +ARCH=x86_64 +BRANCH=android-4.4 +CROSS_COMPILE=x86_64-linux-android- +DEFCONFIG=x86_64_ranchu_defconfig +EXTRA_CMDS='' +KERNEL_DIR=common +LINUX_GCC_CROSS_COMPILE_PREBUILTS_BIN=prebuilts/gcc/linux-x86/x86/x86_64-linux-android-4.9/bin +FILES=" +arch/x86/boot/bzImage +vmlinux +System.map +" +STOP_SHIP_TRACEPRINTK=1 diff --git a/crypto/ahash.c b/crypto/ahash.c index 6978ad86e516..595c4f3657ff 100644 --- a/crypto/ahash.c +++ b/crypto/ahash.c @@ -85,17 +85,17 @@ static int hash_walk_new_entry(struct crypto_hash_walk *walk) int crypto_hash_walk_done(struct crypto_hash_walk *walk, int err) { unsigned int alignmask = walk->alignmask; - unsigned int nbytes = walk->entrylen; walk->data -= walk->offset; - if (nbytes && walk->offset & alignmask && !err) { - walk->offset = ALIGN(walk->offset, alignmask + 1); - nbytes = min(nbytes, - ((unsigned int)(PAGE_SIZE)) - walk->offset); - walk->entrylen -= nbytes; + if (walk->entrylen && (walk->offset & alignmask) && !err) { + unsigned int nbytes; + walk->offset = ALIGN(walk->offset, alignmask + 1); + nbytes = min(walk->entrylen, + (unsigned int)(PAGE_SIZE - walk->offset)); if (nbytes) { + walk->entrylen -= nbytes; walk->data += walk->offset; return nbytes; } @@ -115,7 +115,7 @@ int crypto_hash_walk_done(struct crypto_hash_walk *walk, int err) if (err) return err; - if (nbytes) { + if (walk->entrylen) { walk->offset = 0; walk->pg++; return hash_walk_next(walk); diff --git a/crypto/authenc.c b/crypto/authenc.c index b7290c5b1eaa..5c25005ff398 100644 --- a/crypto/authenc.c +++ b/crypto/authenc.c @@ -58,14 +58,22 @@ int crypto_authenc_extractkeys(struct crypto_authenc_keys *keys, const u8 *key, return -EINVAL; if (rta->rta_type != CRYPTO_AUTHENC_KEYA_PARAM) return -EINVAL; - if (RTA_PAYLOAD(rta) < sizeof(*param)) + + /* + * RTA_OK() didn't align the rtattr's payload when validating that it + * fits in the buffer. Yet, the keys should start on the next 4-byte + * aligned boundary. To avoid confusion, require that the rtattr + * payload be exactly the param struct, which has a 4-byte aligned size. + */ + if (RTA_PAYLOAD(rta) != sizeof(*param)) return -EINVAL; + BUILD_BUG_ON(sizeof(*param) % RTA_ALIGNTO); param = RTA_DATA(rta); keys->enckeylen = be32_to_cpu(param->enckeylen); - key += RTA_ALIGN(rta->rta_len); - keylen -= RTA_ALIGN(rta->rta_len); + key += rta->rta_len; + keylen -= rta->rta_len; if (keylen < keys->enckeylen) return -EINVAL; diff --git a/crypto/authencesn.c b/crypto/authencesn.c index fa0c4567f697..5fdf3e532310 100644 --- a/crypto/authencesn.c +++ b/crypto/authencesn.c @@ -276,7 +276,7 @@ static void authenc_esn_verify_ahash_done(struct crypto_async_request *areq, struct aead_request *req = areq->data; err = err ?: crypto_authenc_esn_decrypt_tail(req, 0); - aead_request_complete(req, err); + authenc_esn_request_complete(req, err); } static int crypto_authenc_esn_decrypt(struct aead_request *req) diff --git a/crypto/cts.c b/crypto/cts.c index e467ec0acf9f..e65688d6a4ca 100644 --- a/crypto/cts.c +++ b/crypto/cts.c @@ -137,8 +137,8 @@ static int crypto_cts_encrypt(struct blkcipher_desc *desc, lcldesc.info = desc->info; lcldesc.flags = desc->flags; - if (tot_blocks == 1) { - err = crypto_blkcipher_encrypt_iv(&lcldesc, dst, src, bsize); + if (tot_blocks <= 1) { + err = crypto_blkcipher_encrypt_iv(&lcldesc, dst, src, nbytes); } else if (nbytes <= bsize * 2) { err = cts_cbc_encrypt(ctx, desc, dst, src, 0, nbytes); } else { @@ -232,8 +232,8 @@ static int crypto_cts_decrypt(struct blkcipher_desc *desc, lcldesc.info = desc->info; lcldesc.flags = desc->flags; - if (tot_blocks == 1) { - err = crypto_blkcipher_decrypt_iv(&lcldesc, dst, src, bsize); + if (tot_blocks <= 1) { + err = crypto_blkcipher_decrypt_iv(&lcldesc, dst, src, nbytes); } else if (nbytes <= bsize * 2) { err = cts_cbc_decrypt(ctx, desc, dst, src, 0, nbytes); } else { diff --git a/crypto/pcbc.c b/crypto/pcbc.c index f654965f0933..de81f716cf26 100644 --- a/crypto/pcbc.c +++ b/crypto/pcbc.c @@ -52,7 +52,7 @@ static int crypto_pcbc_encrypt_segment(struct blkcipher_desc *desc, unsigned int nbytes = walk->nbytes; u8 *src = walk->src.virt.addr; u8 *dst = walk->dst.virt.addr; - u8 *iv = walk->iv; + u8 * const iv = walk->iv; do { crypto_xor(iv, src, bsize); @@ -76,7 +76,7 @@ static int crypto_pcbc_encrypt_inplace(struct blkcipher_desc *desc, int bsize = crypto_cipher_blocksize(tfm); unsigned int nbytes = walk->nbytes; u8 *src = walk->src.virt.addr; - u8 *iv = walk->iv; + u8 * const iv = walk->iv; u8 tmpbuf[bsize]; do { @@ -89,8 +89,6 @@ static int crypto_pcbc_encrypt_inplace(struct blkcipher_desc *desc, src += bsize; } while ((nbytes -= bsize) >= bsize); - memcpy(walk->iv, iv, bsize); - return nbytes; } @@ -130,7 +128,7 @@ static int crypto_pcbc_decrypt_segment(struct blkcipher_desc *desc, unsigned int nbytes = walk->nbytes; u8 *src = walk->src.virt.addr; u8 *dst = walk->dst.virt.addr; - u8 *iv = walk->iv; + u8 * const iv = walk->iv; do { fn(crypto_cipher_tfm(tfm), dst, src); @@ -142,8 +140,6 @@ static int crypto_pcbc_decrypt_segment(struct blkcipher_desc *desc, dst += bsize; } while ((nbytes -= bsize) >= bsize); - memcpy(walk->iv, iv, bsize); - return nbytes; } @@ -156,7 +152,7 @@ static int crypto_pcbc_decrypt_inplace(struct blkcipher_desc *desc, int bsize = crypto_cipher_blocksize(tfm); unsigned int nbytes = walk->nbytes; u8 *src = walk->src.virt.addr; - u8 *iv = walk->iv; + u8 * const iv = walk->iv; u8 tmpbuf[bsize]; do { @@ -169,8 +165,6 @@ static int crypto_pcbc_decrypt_inplace(struct blkcipher_desc *desc, src += bsize; } while ((nbytes -= bsize) >= bsize); - memcpy(walk->iv, iv, bsize); - return nbytes; } diff --git a/crypto/testmgr.h b/crypto/testmgr.h index 9aad61534246..98ebaf79680a 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -3494,7 +3494,49 @@ static struct hash_testvec poly1305_tv_template[] = { .psize = 80, .digest = "\x13\x00\x00\x00\x00\x00\x00\x00" "\x00\x00\x00\x00\x00\x00\x00\x00", - }, + }, { /* Regression test for overflow in AVX2 implementation */ + .plaintext = "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff", + .psize = 300, + .digest = "\xfb\x5e\x96\xd8\x61\xd5\xc7\xc8" + "\x78\xe5\x87\xcc\x2d\x5a\x22\xe1", + } }; /* diff --git a/drivers/Makefile b/drivers/Makefile index 082c6f0c158d..ee62edb593f1 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -141,6 +141,7 @@ obj-$(CONFIG_OF) += of/ obj-$(CONFIG_SSB) += ssb/ obj-$(CONFIG_BCMA) += bcma/ obj-$(CONFIG_VHOST_RING) += vhost/ +obj-$(CONFIG_VHOST) += vhost/ obj-$(CONFIG_VLYNQ) += vlynq/ obj-$(CONFIG_STAGING) += staging/ obj-y += platform/ diff --git a/drivers/acpi/device_sysfs.c b/drivers/acpi/device_sysfs.c index 1521d9a41d25..a899a7abcf63 100644 --- a/drivers/acpi/device_sysfs.c +++ b/drivers/acpi/device_sysfs.c @@ -202,11 +202,15 @@ static int create_of_modalias(struct acpi_device *acpi_dev, char *modalias, { struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER }; const union acpi_object *of_compatible, *obj; + acpi_status status; int len, count; int i, nval; char *c; - acpi_get_name(acpi_dev->handle, ACPI_SINGLE_NAME, &buf); + status = acpi_get_name(acpi_dev->handle, ACPI_SINGLE_NAME, &buf); + if (ACPI_FAILURE(status)) + return -ENODEV; + /* DT strings are all in lower case */ for (c = buf.pointer; *c != '\0'; c++) *c = tolower(*c); diff --git a/drivers/acpi/power.c b/drivers/acpi/power.c index 1c2b846c5776..f28b4949cb9d 100644 --- a/drivers/acpi/power.c +++ b/drivers/acpi/power.c @@ -131,6 +131,23 @@ void acpi_power_resources_list_free(struct list_head *list) } } +static bool acpi_power_resource_is_dup(union acpi_object *package, + unsigned int start, unsigned int i) +{ + acpi_handle rhandle, dup; + unsigned int j; + + /* The caller is expected to check the package element types */ + rhandle = package->package.elements[i].reference.handle; + for (j = start; j < i; j++) { + dup = package->package.elements[j].reference.handle; + if (dup == rhandle) + return true; + } + + return false; +} + int acpi_extract_power_resources(union acpi_object *package, unsigned int start, struct list_head *list) { @@ -150,6 +167,11 @@ int acpi_extract_power_resources(union acpi_object *package, unsigned int start, err = -ENODEV; break; } + + /* Some ACPI tables contain duplicate power resource references */ + if (acpi_power_resource_is_dup(package, start, i)) + continue; + err = acpi_add_power_resource(rhandle); if (err) break; diff --git a/drivers/acpi/sbs.c b/drivers/acpi/sbs.c index cb3dedb1beae..b133dac8a7f2 100644 --- a/drivers/acpi/sbs.c +++ b/drivers/acpi/sbs.c @@ -443,9 +443,13 @@ static int acpi_ac_get_present(struct acpi_sbs *sbs) /* * The spec requires that bit 4 always be 1. If it's not set, assume - * that the implementation doesn't support an SBS charger + * that the implementation doesn't support an SBS charger. + * + * And on some MacBooks a status of 0xffff is always returned, no + * matter whether the charger is plugged in or not, which is also + * wrong, so ignore the SBS charger for those too. */ - if (!((status >> 4) & 0x1)) + if (!((status >> 4) & 0x1) || status == 0xffff) return -ENODEV; sbs->charger_present = (status >> 15) & 0x1; diff --git a/drivers/android/binder.c b/drivers/android/binder.c index ddccbcadcb63..c58ec06f6a40 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -360,6 +360,7 @@ struct binder_error { * @min_priority: minimum scheduling priority * (invariant after initialized) * @inherit_rt: inherit RT scheduling policy from caller + * @txn_security_ctx: require sender's security context * (invariant after initialized) * @async_todo: list of async work items * (protected by @proc->inner_lock) @@ -399,6 +400,7 @@ struct binder_node { u8 sched_policy:2; u8 inherit_rt:1; u8 accept_fds:1; + u8 txn_security_ctx:1; u8 min_priority; }; bool has_async_transaction; @@ -600,6 +602,8 @@ enum { * (protected by @proc->inner_lock) * @todo: list of work to do for this thread * (protected by @proc->inner_lock) + * @process_todo: whether work in @todo should be processed + * (protected by @proc->inner_lock) * @return_error: transaction errors reported by this thread * (only accessed by this thread) * @reply_error: transaction errors reported by target thread @@ -626,6 +630,7 @@ struct binder_thread { bool looper_need_return; /* can be written by other thread */ struct binder_transaction *transaction_stack; struct list_head todo; + bool process_todo; struct binder_error return_error; struct binder_error reply_error; wait_queue_head_t wait; @@ -653,6 +658,7 @@ struct binder_transaction { struct binder_priority saved_priority; bool set_priority_called; kuid_t sender_euid; + binder_uintptr_t security_ctx; /** * @lock: protects @from, @to_proc, and @to_thread * @@ -813,6 +819,16 @@ static bool binder_worklist_empty(struct binder_proc *proc, return ret; } +/** + * binder_enqueue_work_ilocked() - Add an item to the work list + * @work: struct binder_work to add to list + * @target_list: list to add work to + * + * Adds the work to the specified list. Asserts that work + * is not already on a list. + * + * Requires the proc->inner_lock to be held. + */ static void binder_enqueue_work_ilocked(struct binder_work *work, struct list_head *target_list) @@ -823,22 +839,56 @@ binder_enqueue_work_ilocked(struct binder_work *work, } /** - * binder_enqueue_work() - Add an item to the work list - * @proc: binder_proc associated with list + * binder_enqueue_deferred_thread_work_ilocked() - Add deferred thread work + * @thread: thread to queue work to * @work: struct binder_work to add to list - * @target_list: list to add work to * - * Adds the work to the specified list. Asserts that work - * is not already on a list. + * Adds the work to the todo list of the thread. Doesn't set the process_todo + * flag, which means that (if it wasn't already set) the thread will go to + * sleep without handling this work when it calls read. + * + * Requires the proc->inner_lock to be held. */ static void -binder_enqueue_work(struct binder_proc *proc, - struct binder_work *work, - struct list_head *target_list) +binder_enqueue_deferred_thread_work_ilocked(struct binder_thread *thread, + struct binder_work *work) { - binder_inner_proc_lock(proc); - binder_enqueue_work_ilocked(work, target_list); - binder_inner_proc_unlock(proc); + binder_enqueue_work_ilocked(work, &thread->todo); +} + +/** + * binder_enqueue_thread_work_ilocked() - Add an item to the thread work list + * @thread: thread to queue work to + * @work: struct binder_work to add to list + * + * Adds the work to the todo list of the thread, and enables processing + * of the todo queue. + * + * Requires the proc->inner_lock to be held. + */ +static void +binder_enqueue_thread_work_ilocked(struct binder_thread *thread, + struct binder_work *work) +{ + binder_enqueue_work_ilocked(work, &thread->todo); + thread->process_todo = true; +} + +/** + * binder_enqueue_thread_work() - Add an item to the thread work list + * @thread: thread to queue work to + * @work: struct binder_work to add to list + * + * Adds the work to the todo list of the thread, and enables processing + * of the todo queue. + */ +static void +binder_enqueue_thread_work(struct binder_thread *thread, + struct binder_work *work) +{ + binder_inner_proc_lock(thread->proc); + binder_enqueue_thread_work_ilocked(thread, work); + binder_inner_proc_unlock(thread->proc); } static void @@ -964,7 +1014,7 @@ err: static bool binder_has_work_ilocked(struct binder_thread *thread, bool do_proc_work) { - return !binder_worklist_empty_ilocked(&thread->todo) || + return thread->process_todo || thread->looper_need_return || (do_proc_work && !binder_worklist_empty_ilocked(&thread->proc->todo)); @@ -1318,6 +1368,7 @@ static struct binder_node *binder_init_node_ilocked( node->min_priority = to_kernel_prio(node->sched_policy, priority); node->accept_fds = !!(flags & FLAT_BINDER_FLAG_ACCEPTS_FDS); node->inherit_rt = !!(flags & FLAT_BINDER_FLAG_INHERIT_RT); + node->txn_security_ctx = !!(flags & FLAT_BINDER_FLAG_TXN_SECURITY_CTX); spin_lock_init(&node->lock); INIT_LIST_HEAD(&node->work.entry); INIT_LIST_HEAD(&node->async_todo); @@ -1381,6 +1432,17 @@ static int binder_inc_node_nilocked(struct binder_node *node, int strong, node->local_strong_refs++; if (!node->has_strong_ref && target_list) { binder_dequeue_work_ilocked(&node->work); + /* + * Note: this function is the only place where we queue + * directly to a thread->todo without using the + * corresponding binder_enqueue_thread_work() helper + * functions; in this case it's ok to not set the + * process_todo flag, since we know this node work will + * always be followed by other work that starts queue + * processing: in case of synchronous transactions, a + * BR_REPLY or BR_ERROR; in case of oneway + * transactions, a BR_TRANSACTION_COMPLETE. + */ binder_enqueue_work_ilocked(&node->work, target_list); } } else { @@ -1392,6 +1454,9 @@ static int binder_inc_node_nilocked(struct binder_node *node, int strong, node->debug_id); return -EINVAL; } + /* + * See comment above + */ binder_enqueue_work_ilocked(&node->work, target_list); } } @@ -2081,9 +2146,9 @@ static void binder_send_failed_reply(struct binder_transaction *t, binder_pop_transaction_ilocked(target_thread, t); if (target_thread->reply_error.cmd == BR_OK) { target_thread->reply_error.cmd = error_code; - binder_enqueue_work_ilocked( - &target_thread->reply_error.work, - &target_thread->todo); + binder_enqueue_thread_work_ilocked( + target_thread, + &target_thread->reply_error.work); wake_up_interruptible(&target_thread->wait); } else { /* @@ -2728,11 +2793,10 @@ static bool binder_proc_transaction(struct binder_transaction *t, struct binder_proc *proc, struct binder_thread *thread) { - struct list_head *target_list = NULL; struct binder_node *node = t->buffer->target_node; struct binder_priority node_prio; bool oneway = !!(t->flags & TF_ONE_WAY); - bool wakeup = true; + bool pending_async = false; BUG_ON(!node); binder_node_lock(node); @@ -2742,8 +2806,7 @@ static bool binder_proc_transaction(struct binder_transaction *t, if (oneway) { BUG_ON(thread); if (node->has_async_transaction) { - target_list = &node->async_todo; - wakeup = false; + pending_async = true; } else { node->has_async_transaction = true; } @@ -2757,22 +2820,20 @@ static bool binder_proc_transaction(struct binder_transaction *t, return false; } - if (!thread && !target_list) + if (!thread && !pending_async) thread = binder_select_thread_ilocked(proc); if (thread) { - target_list = &thread->todo; binder_transaction_priority(thread->task, t, node_prio, node->inherit_rt); - } else if (!target_list) { - target_list = &proc->todo; + binder_enqueue_thread_work_ilocked(thread, &t->work); + } else if (!pending_async) { + binder_enqueue_work_ilocked(&t->work, &proc->todo); } else { - BUG_ON(target_list != &node->async_todo); + binder_enqueue_work_ilocked(&t->work, &node->async_todo); } - binder_enqueue_work_ilocked(&t->work, target_list); - - if (wakeup) + if (!pending_async) binder_wakeup_thread_ilocked(proc, thread, !oneway /* sync */); binder_inner_proc_unlock(proc); @@ -2846,6 +2907,8 @@ static void binder_transaction(struct binder_proc *proc, binder_size_t last_fixup_min_off = 0; struct binder_context *context = proc->context; int t_debug_id = atomic_inc_return(&binder_last_id); + char *secctx = NULL; + u32 secctx_sz = 0; e = binder_transaction_log_add(&binder_transaction_log); e->debug_id = t_debug_id; @@ -3069,6 +3132,20 @@ static void binder_transaction(struct binder_proc *proc, t->priority = target_proc->default_priority; } + if (target_node && target_node->txn_security_ctx) { + u32 secid; + + security_task_getsecid(proc->tsk, &secid); + ret = security_secid_to_secctx(secid, &secctx, &secctx_sz); + if (ret) { + return_error = BR_FAILED_REPLY; + return_error_param = ret; + return_error_line = __LINE__; + goto err_get_secctx_failed; + } + extra_buffers_size += ALIGN(secctx_sz, sizeof(u64)); + } + trace_binder_transaction(reply, t, target_node); t->buffer = binder_alloc_new_buf(&target_proc->alloc, tr->data_size, @@ -3085,6 +3162,19 @@ static void binder_transaction(struct binder_proc *proc, t->buffer = NULL; goto err_binder_alloc_buf_failed; } + if (secctx) { + size_t buf_offset = ALIGN(tr->data_size, sizeof(void *)) + + ALIGN(tr->offsets_size, sizeof(void *)) + + ALIGN(extra_buffers_size, sizeof(void *)) - + ALIGN(secctx_sz, sizeof(u64)); + char *kptr = t->buffer->data + buf_offset; + + t->security_ctx = (uintptr_t)kptr + + binder_alloc_get_user_buffer_offset(&target_proc->alloc); + memcpy(kptr, secctx, secctx_sz); + security_release_secctx(secctx, secctx_sz); + secctx = NULL; + } t->buffer->debug_id = t->debug_id; t->buffer->transaction = t; t->buffer->target_node = target_node; @@ -3281,10 +3371,10 @@ static void binder_transaction(struct binder_proc *proc, } } tcomplete->type = BINDER_WORK_TRANSACTION_COMPLETE; - binder_enqueue_work(proc, tcomplete, &thread->todo); t->work.type = BINDER_WORK_TRANSACTION; if (reply) { + binder_enqueue_thread_work(thread, tcomplete); binder_inner_proc_lock(target_proc); if (target_thread->is_dead) { binder_inner_proc_unlock(target_proc); @@ -3292,7 +3382,7 @@ static void binder_transaction(struct binder_proc *proc, } BUG_ON(t->buffer->async_transaction != 0); binder_pop_transaction_ilocked(target_thread, in_reply_to); - binder_enqueue_work_ilocked(&t->work, &target_thread->todo); + binder_enqueue_thread_work_ilocked(target_thread, &t->work); binder_inner_proc_unlock(target_proc); wake_up_interruptible_sync(&target_thread->wait); binder_restore_priority(current, in_reply_to->saved_priority); @@ -3300,6 +3390,14 @@ static void binder_transaction(struct binder_proc *proc, } else if (!(t->flags & TF_ONE_WAY)) { BUG_ON(t->buffer->async_transaction != 0); binder_inner_proc_lock(proc); + /* + * Defer the TRANSACTION_COMPLETE, so we don't return to + * userspace immediately; this allows the target process to + * immediately start processing this transaction, reducing + * latency. We will then return the TRANSACTION_COMPLETE when + * the target replies (or there is an error). + */ + binder_enqueue_deferred_thread_work_ilocked(thread, tcomplete); t->need_reply = 1; t->from_parent = thread->transaction_stack; thread->transaction_stack = t; @@ -3313,6 +3411,7 @@ static void binder_transaction(struct binder_proc *proc, } else { BUG_ON(target_node == NULL); BUG_ON(t->buffer->async_transaction != 1); + binder_enqueue_thread_work(thread, tcomplete); if (!binder_proc_transaction(t, target_proc, NULL)) goto err_dead_proc_or_thread; } @@ -3346,6 +3445,9 @@ err_copy_data_failed: t->buffer->transaction = NULL; binder_alloc_free_buf(&target_proc->alloc, t->buffer); err_binder_alloc_buf_failed: + if (secctx) + security_release_secctx(secctx, secctx_sz); +err_get_secctx_failed: kfree(tcomplete); binder_stats_deleted(BINDER_STAT_TRANSACTION_COMPLETE); err_alloc_tcomplete_failed: @@ -3392,15 +3494,11 @@ err_invalid_target_handle: if (in_reply_to) { binder_restore_priority(current, in_reply_to->saved_priority); thread->return_error.cmd = BR_TRANSACTION_COMPLETE; - binder_enqueue_work(thread->proc, - &thread->return_error.work, - &thread->todo); + binder_enqueue_thread_work(thread, &thread->return_error.work); binder_send_failed_reply(in_reply_to, return_error); } else { thread->return_error.cmd = return_error; - binder_enqueue_work(thread->proc, - &thread->return_error.work, - &thread->todo); + binder_enqueue_thread_work(thread, &thread->return_error.work); } } @@ -3708,10 +3806,9 @@ static int binder_thread_write(struct binder_proc *proc, WARN_ON(thread->return_error.cmd != BR_OK); thread->return_error.cmd = BR_ERROR; - binder_enqueue_work( - thread->proc, - &thread->return_error.work, - &thread->todo); + binder_enqueue_thread_work( + thread, + &thread->return_error.work); binder_debug( BINDER_DEBUG_FAILED_TRANSACTION, "%d:%d BC_REQUEST_DEATH_NOTIFICATION failed\n", @@ -3791,9 +3888,9 @@ static int binder_thread_write(struct binder_proc *proc, if (thread->looper & (BINDER_LOOPER_STATE_REGISTERED | BINDER_LOOPER_STATE_ENTERED)) - binder_enqueue_work_ilocked( - &death->work, - &thread->todo); + binder_enqueue_thread_work_ilocked( + thread, + &death->work); else { binder_enqueue_work_ilocked( &death->work, @@ -3848,8 +3945,8 @@ static int binder_thread_write(struct binder_proc *proc, if (thread->looper & (BINDER_LOOPER_STATE_REGISTERED | BINDER_LOOPER_STATE_ENTERED)) - binder_enqueue_work_ilocked( - &death->work, &thread->todo); + binder_enqueue_thread_work_ilocked( + thread, &death->work); else { binder_enqueue_work_ilocked( &death->work, @@ -3997,11 +4094,13 @@ retry: while (1) { uint32_t cmd; - struct binder_transaction_data tr; + struct binder_transaction_data_secctx tr; + struct binder_transaction_data *trd = &tr.transaction_data; struct binder_work *w = NULL; struct list_head *list = NULL; struct binder_transaction *t = NULL; struct binder_thread *t_from; + size_t trsize = sizeof(*trd); binder_inner_proc_lock(proc); if (!binder_worklist_empty_ilocked(&thread->todo)) @@ -4023,6 +4122,8 @@ retry: break; } w = binder_dequeue_work_head_ilocked(list); + if (binder_worklist_empty_ilocked(&thread->todo)) + thread->process_todo = false; switch (w->type) { case BINDER_WORK_TRANSACTION: { @@ -4195,41 +4296,47 @@ retry: struct binder_node *target_node = t->buffer->target_node; struct binder_priority node_prio; - tr.target.ptr = target_node->ptr; - tr.cookie = target_node->cookie; + trd->target.ptr = target_node->ptr; + trd->cookie = target_node->cookie; node_prio.sched_policy = target_node->sched_policy; node_prio.prio = target_node->min_priority; binder_transaction_priority(current, t, node_prio, target_node->inherit_rt); cmd = BR_TRANSACTION; } else { - tr.target.ptr = 0; - tr.cookie = 0; + trd->target.ptr = 0; + trd->cookie = 0; cmd = BR_REPLY; } - tr.code = t->code; - tr.flags = t->flags; - tr.sender_euid = from_kuid(current_user_ns(), t->sender_euid); + trd->code = t->code; + trd->flags = t->flags; + trd->sender_euid = from_kuid(current_user_ns(), t->sender_euid); t_from = binder_get_txn_from(t); if (t_from) { struct task_struct *sender = t_from->proc->tsk; - tr.sender_pid = task_tgid_nr_ns(sender, - task_active_pid_ns(current)); + trd->sender_pid = + task_tgid_nr_ns(sender, + task_active_pid_ns(current)); } else { - tr.sender_pid = 0; + trd->sender_pid = 0; } - tr.data_size = t->buffer->data_size; - tr.offsets_size = t->buffer->offsets_size; - tr.data.ptr.buffer = (binder_uintptr_t) + trd->data_size = t->buffer->data_size; + trd->offsets_size = t->buffer->offsets_size; + trd->data.ptr.buffer = (binder_uintptr_t) ((uintptr_t)t->buffer->data + binder_alloc_get_user_buffer_offset(&proc->alloc)); - tr.data.ptr.offsets = tr.data.ptr.buffer + + trd->data.ptr.offsets = trd->data.ptr.buffer + ALIGN(t->buffer->data_size, sizeof(void *)); + tr.secctx = t->security_ctx; + if (t->security_ctx) { + cmd = BR_TRANSACTION_SEC_CTX; + trsize = sizeof(tr); + } if (put_user(cmd, (uint32_t __user *)ptr)) { if (t_from) binder_thread_dec_tmpref(t_from); @@ -4240,7 +4347,7 @@ retry: return -EFAULT; } ptr += sizeof(uint32_t); - if (copy_to_user(ptr, &tr, sizeof(tr))) { + if (copy_to_user(ptr, &tr, trsize)) { if (t_from) binder_thread_dec_tmpref(t_from); @@ -4249,7 +4356,7 @@ retry: return -EFAULT; } - ptr += sizeof(tr); + ptr += trsize; trace_binder_transaction_received(t); binder_stat_br(proc, thread, cmd); @@ -4257,16 +4364,18 @@ retry: "%d:%d %s %d %d:%d, cmd %d size %zd-%zd ptr %016llx-%016llx\n", proc->pid, thread->pid, (cmd == BR_TRANSACTION) ? "BR_TRANSACTION" : - "BR_REPLY", + (cmd == BR_TRANSACTION_SEC_CTX) ? + "BR_TRANSACTION_SEC_CTX" : "BR_REPLY", t->debug_id, t_from ? t_from->proc->pid : 0, t_from ? t_from->pid : 0, cmd, t->buffer->data_size, t->buffer->offsets_size, - (u64)tr.data.ptr.buffer, (u64)tr.data.ptr.offsets); + (u64)trd->data.ptr.buffer, + (u64)trd->data.ptr.offsets); if (t_from) binder_thread_dec_tmpref(t_from); t->buffer->allow_user_free = 1; - if (cmd == BR_TRANSACTION && !(t->flags & TF_ONE_WAY)) { + if (cmd != BR_REPLY && !(t->flags & TF_ONE_WAY)) { binder_inner_proc_lock(thread->proc); t->to_parent = thread->transaction_stack; t->to_thread = thread; @@ -4611,7 +4720,8 @@ out: return ret; } -static int binder_ioctl_set_ctx_mgr(struct file *filp) +static int binder_ioctl_set_ctx_mgr(struct file *filp, + struct flat_binder_object *fbo) { int ret = 0; struct binder_proc *proc = filp->private_data; @@ -4640,7 +4750,7 @@ static int binder_ioctl_set_ctx_mgr(struct file *filp) } else { context->binder_context_mgr_uid = curr_euid; } - new_node = binder_new_node(proc, NULL); + new_node = binder_new_node(proc, fbo); if (!new_node) { ret = -ENOMEM; goto out; @@ -4762,8 +4872,20 @@ static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) binder_inner_proc_unlock(proc); break; } + case BINDER_SET_CONTEXT_MGR_EXT: { + struct flat_binder_object fbo; + + if (copy_from_user(&fbo, ubuf, sizeof(fbo))) { + ret = -EINVAL; + goto err; + } + ret = binder_ioctl_set_ctx_mgr(filp, &fbo); + if (ret) + goto err; + break; + } case BINDER_SET_CONTEXT_MGR: - ret = binder_ioctl_set_ctx_mgr(filp); + ret = binder_ioctl_set_ctx_mgr(filp, NULL); if (ret) goto err; break; @@ -5413,6 +5535,9 @@ static void print_binder_proc(struct seq_file *m, for (n = rb_first(&proc->nodes); n != NULL; n = rb_next(n)) { struct binder_node *node = rb_entry(n, struct binder_node, rb_node); + if (!print_all && !node->has_async_transaction) + continue; + /* * take a temporary reference on the node so it * survives and isn't removed from the tree diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index ba514fa733de..d543172b20b3 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4297,6 +4297,7 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { { "SSD*INTEL*", NULL, ATA_HORKAGE_ZERO_AFTER_TRIM, }, { "Samsung*SSD*", NULL, ATA_HORKAGE_ZERO_AFTER_TRIM, }, { "SAMSUNG*SSD*", NULL, ATA_HORKAGE_ZERO_AFTER_TRIM, }, + { "SAMSUNG*MZ7KM*", NULL, ATA_HORKAGE_ZERO_AFTER_TRIM, }, { "ST[1248][0248]0[FH]*", NULL, ATA_HORKAGE_ZERO_AFTER_TRIM, }, /* diff --git a/drivers/ata/sata_rcar.c b/drivers/ata/sata_rcar.c index 8804127b108c..21b80f5ee092 100644 --- a/drivers/ata/sata_rcar.c +++ b/drivers/ata/sata_rcar.c @@ -875,7 +875,9 @@ static int sata_rcar_probe(struct platform_device *pdev) int ret = 0; irq = platform_get_irq(pdev, 0); - if (irq <= 0) + if (irq < 0) + return irq; + if (!irq) return -EINVAL; priv = devm_kzalloc(&pdev->dev, sizeof(struct sata_rcar_priv), diff --git a/drivers/atm/he.c b/drivers/atm/he.c index 0f5cb37636bc..010581e8bee0 100644 --- a/drivers/atm/he.c +++ b/drivers/atm/he.c @@ -717,7 +717,7 @@ static int he_init_cs_block_rcm(struct he_dev *he_dev) instead of '/ 512', use '>> 9' to prevent a call to divdu3 on x86 platforms */ - rate_cps = (unsigned long long) (1 << exp) * (man + 512) >> 9; + rate_cps = (unsigned long long) (1UL << exp) * (man + 512) >> 9; if (rate_cps < 10) rate_cps = 10; /* 2.2.1 minimum payload rate is 10 cps */ diff --git a/drivers/base/bus.c b/drivers/base/bus.c index 0346e46e2871..ecca4ae248e0 100644 --- a/drivers/base/bus.c +++ b/drivers/base/bus.c @@ -33,6 +33,9 @@ static struct kset *system_kset; #define to_drv_attr(_attr) container_of(_attr, struct driver_attribute, attr) +#define DRIVER_ATTR_IGNORE_LOCKDEP(_name, _mode, _show, _store) \ + struct driver_attribute driver_attr_##_name = \ + __ATTR_IGNORE_LOCKDEP(_name, _mode, _show, _store) static int __must_check bus_rescan_devices_helper(struct device *dev, void *data); @@ -198,7 +201,7 @@ static ssize_t unbind_store(struct device_driver *drv, const char *buf, bus_put(bus); return err; } -static DRIVER_ATTR_WO(unbind); +static DRIVER_ATTR_IGNORE_LOCKDEP(unbind, S_IWUSR, NULL, unbind_store); /* * Manually attach a device to a driver. @@ -234,7 +237,7 @@ static ssize_t bind_store(struct device_driver *drv, const char *buf, bus_put(bus); return err; } -static DRIVER_ATTR_WO(bind); +static DRIVER_ATTR_IGNORE_LOCKDEP(bind, S_IWUSR, NULL, bind_store); static ssize_t show_drivers_autoprobe(struct bus_type *bus, char *buf) { diff --git a/drivers/base/core.c b/drivers/base/core.c index 049ccc070ce5..cb5718d2669e 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -862,6 +862,8 @@ static void cleanup_glue_dir(struct device *dev, struct kobject *glue_dir) return; mutex_lock(&gdp_mutex); + if (!kobject_has_children(glue_dir)) + kobject_del(glue_dir); kobject_put(glue_dir); mutex_unlock(&gdp_mutex); } diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index 7af116e12e53..37658ff761ed 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -114,7 +114,6 @@ void wakeup_source_drop(struct wakeup_source *ws) if (!ws) return; - del_timer_sync(&ws->timer); __pm_relax(ws); } EXPORT_SYMBOL_GPL(wakeup_source_drop); @@ -202,6 +201,13 @@ void wakeup_source_remove(struct wakeup_source *ws) list_del_rcu(&ws->entry); spin_unlock_irqrestore(&events_lock, flags); synchronize_srcu(&wakeup_srcu); + + del_timer_sync(&ws->timer); + /* + * Clear timer.function to make wakeup_source_not_registered() treat + * this wakeup source as not registered. + */ + ws->timer.function = NULL; } EXPORT_SYMBOL_GPL(wakeup_source_remove); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index e80cbefbc2b5..27e1abcf5710 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -632,14 +632,15 @@ drbd_set_role(struct drbd_device *const device, enum drbd_role new_role, int for if (rv == SS_TWO_PRIMARIES) { /* Maybe the peer is detected as dead very soon... retry at most once more in this case. */ - int timeo; - rcu_read_lock(); - nc = rcu_dereference(connection->net_conf); - timeo = nc ? (nc->ping_timeo + 1) * HZ / 10 : 1; - rcu_read_unlock(); - schedule_timeout_interruptible(timeo); - if (try < max_tries) + if (try < max_tries) { + int timeo; try = max_tries - 1; + rcu_read_lock(); + nc = rcu_dereference(connection->net_conf); + timeo = nc ? (nc->ping_timeo + 1) * HZ / 10 : 1; + rcu_read_unlock(); + schedule_timeout_interruptible(timeo); + } continue; } if (rv < SS_SUCCESS) { diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index b4b5680ac6ad..b1ee358edd3b 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3126,7 +3126,7 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_peer_device *peer_device, enum drbd_conns rv = C_MASK; enum drbd_disk_state mydisk; struct net_conf *nc; - int hg, rule_nr, rr_conflict, tentative; + int hg, rule_nr, rr_conflict, tentative, always_asbp; mydisk = device->state.disk; if (mydisk == D_NEGOTIATING) @@ -3168,8 +3168,12 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_peer_device *peer_device, rcu_read_lock(); nc = rcu_dereference(peer_device->connection->net_conf); + always_asbp = nc->always_asbp; + rr_conflict = nc->rr_conflict; + tentative = nc->tentative; + rcu_read_unlock(); - if (hg == 100 || (hg == -100 && nc->always_asbp)) { + if (hg == 100 || (hg == -100 && always_asbp)) { int pcount = (device->state.role == R_PRIMARY) + (peer_role == R_PRIMARY); int forced = (hg == -100); @@ -3208,9 +3212,6 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_peer_device *peer_device, "Sync from %s node\n", (hg < 0) ? "peer" : "this"); } - rr_conflict = nc->rr_conflict; - tentative = nc->tentative; - rcu_read_unlock(); if (hg == -100) { /* FIXME this log message is not correct if we end up here @@ -3889,7 +3890,7 @@ static int receive_uuids(struct drbd_connection *connection, struct packet_info kfree(device->p_uuid); device->p_uuid = p_uuid; - if (device->state.conn < C_CONNECTED && + if ((device->state.conn < C_CONNECTED || device->state.pdsk == D_DISKLESS) && device->state.disk < D_INCONSISTENT && device->state.role == R_PRIMARY && (device->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) { diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 7eac581912f0..91f19dfc388d 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -82,6 +82,7 @@ static DEFINE_IDR(loop_index_idr); static DEFINE_MUTEX(loop_index_mutex); +static DEFINE_MUTEX(loop_ctl_mutex); static int max_part; static int part_shift; @@ -1044,7 +1045,7 @@ static int loop_clr_fd(struct loop_device *lo) */ if (atomic_read(&lo->lo_refcnt) > 1) { lo->lo_flags |= LO_FLAGS_AUTOCLEAR; - mutex_unlock(&lo->lo_ctl_mutex); + mutex_unlock(&loop_ctl_mutex); return 0; } @@ -1094,12 +1095,12 @@ static int loop_clr_fd(struct loop_device *lo) if (!part_shift) lo->lo_disk->flags |= GENHD_FL_NO_PART_SCAN; loop_unprepare_queue(lo); - mutex_unlock(&lo->lo_ctl_mutex); + mutex_unlock(&loop_ctl_mutex); /* - * Need not hold lo_ctl_mutex to fput backing file. - * Calling fput holding lo_ctl_mutex triggers a circular + * Need not hold loop_ctl_mutex to fput backing file. + * Calling fput holding loop_ctl_mutex triggers a circular * lock dependency possibility warning as fput can take - * bd_mutex which is usually taken before lo_ctl_mutex. + * bd_mutex which is usually taken before loop_ctl_mutex. */ fput(filp); return 0; @@ -1121,6 +1122,12 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) if ((unsigned int) info->lo_encrypt_key_size > LO_KEY_SIZE) return -EINVAL; + if (lo->lo_offset != info->lo_offset || + lo->lo_sizelimit != info->lo_sizelimit) { + sync_blockdev(lo->lo_device); + kill_bdev(lo->lo_device); + } + /* I/O need to be drained during transfer transition */ blk_mq_freeze_queue(lo->lo_queue); @@ -1148,11 +1155,20 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) goto exit; if (lo->lo_offset != info->lo_offset || - lo->lo_sizelimit != info->lo_sizelimit) + lo->lo_sizelimit != info->lo_sizelimit) { + /* kill_bdev should have truncated all the pages */ + if (lo->lo_device->bd_inode->i_mapping->nrpages) { + err = -EAGAIN; + pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n", + __func__, lo->lo_number, lo->lo_file_name, + lo->lo_device->bd_inode->i_mapping->nrpages); + goto exit; + } if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit)) { err = -EFBIG; goto exit; } + } loop_config_discard(lo); @@ -1358,20 +1374,37 @@ static int loop_set_dio(struct loop_device *lo, unsigned long arg) static int loop_set_block_size(struct loop_device *lo, unsigned long arg) { + int err = 0; + if (lo->lo_state != Lo_bound) return -ENXIO; if (arg < 512 || arg > PAGE_SIZE || !is_power_of_2(arg)) return -EINVAL; + if (lo->lo_queue->limits.logical_block_size != arg) { + sync_blockdev(lo->lo_device); + kill_bdev(lo->lo_device); + } + blk_mq_freeze_queue(lo->lo_queue); + /* kill_bdev should have truncated all the pages */ + if (lo->lo_queue->limits.logical_block_size != arg && + lo->lo_device->bd_inode->i_mapping->nrpages) { + err = -EAGAIN; + pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n", + __func__, lo->lo_number, lo->lo_file_name, + lo->lo_device->bd_inode->i_mapping->nrpages); + goto out_unfreeze; + } + blk_queue_logical_block_size(lo->lo_queue, arg); loop_update_dio(lo); - +out_unfreeze: blk_mq_unfreeze_queue(lo->lo_queue); - return 0; + return err; } static int lo_ioctl(struct block_device *bdev, fmode_t mode, @@ -1380,7 +1413,7 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode, struct loop_device *lo = bdev->bd_disk->private_data; int err; - mutex_lock_nested(&lo->lo_ctl_mutex, 1); + mutex_lock_nested(&loop_ctl_mutex, 1); switch (cmd) { case LOOP_SET_FD: err = loop_set_fd(lo, mode, bdev, arg); @@ -1389,7 +1422,7 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode, err = loop_change_fd(lo, bdev, arg); break; case LOOP_CLR_FD: - /* loop_clr_fd would have unlocked lo_ctl_mutex on success */ + /* loop_clr_fd would have unlocked loop_ctl_mutex on success */ err = loop_clr_fd(lo); if (!err) goto out_unlocked; @@ -1430,7 +1463,7 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode, default: err = lo->ioctl ? lo->ioctl(lo, cmd, arg) : -EINVAL; } - mutex_unlock(&lo->lo_ctl_mutex); + mutex_unlock(&loop_ctl_mutex); out_unlocked: return err; @@ -1563,16 +1596,16 @@ static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode, switch(cmd) { case LOOP_SET_STATUS: - mutex_lock(&lo->lo_ctl_mutex); + mutex_lock(&loop_ctl_mutex); err = loop_set_status_compat( lo, (const struct compat_loop_info __user *) arg); - mutex_unlock(&lo->lo_ctl_mutex); + mutex_unlock(&loop_ctl_mutex); break; case LOOP_GET_STATUS: - mutex_lock(&lo->lo_ctl_mutex); + mutex_lock(&loop_ctl_mutex); err = loop_get_status_compat( lo, (struct compat_loop_info __user *) arg); - mutex_unlock(&lo->lo_ctl_mutex); + mutex_unlock(&loop_ctl_mutex); break; case LOOP_SET_CAPACITY: case LOOP_CLR_FD: @@ -1617,7 +1650,7 @@ static void __lo_release(struct loop_device *lo) if (atomic_dec_return(&lo->lo_refcnt)) return; - mutex_lock(&lo->lo_ctl_mutex); + mutex_lock(&loop_ctl_mutex); if (lo->lo_flags & LO_FLAGS_AUTOCLEAR) { /* * In autoclear mode, stop the loop thread @@ -1634,7 +1667,7 @@ static void __lo_release(struct loop_device *lo) loop_flush(lo); } - mutex_unlock(&lo->lo_ctl_mutex); + mutex_unlock(&loop_ctl_mutex); } static void lo_release(struct gendisk *disk, fmode_t mode) @@ -1680,10 +1713,10 @@ static int unregister_transfer_cb(int id, void *ptr, void *data) struct loop_device *lo = ptr; struct loop_func_table *xfer = data; - mutex_lock(&lo->lo_ctl_mutex); + mutex_lock(&loop_ctl_mutex); if (lo->lo_encryption == xfer) loop_release_xfer(lo); - mutex_unlock(&lo->lo_ctl_mutex); + mutex_unlock(&loop_ctl_mutex); return 0; } @@ -1846,7 +1879,6 @@ static int loop_add(struct loop_device **l, int i) if (!part_shift) disk->flags |= GENHD_FL_NO_PART_SCAN; disk->flags |= GENHD_FL_EXT_DEVT; - mutex_init(&lo->lo_ctl_mutex); atomic_set(&lo->lo_refcnt, 0); lo->lo_number = i; spin_lock_init(&lo->lo_lock); @@ -1959,19 +1991,19 @@ static long loop_control_ioctl(struct file *file, unsigned int cmd, ret = loop_lookup(&lo, parm); if (ret < 0) break; - mutex_lock(&lo->lo_ctl_mutex); + mutex_lock(&loop_ctl_mutex); if (lo->lo_state != Lo_unbound) { ret = -EBUSY; - mutex_unlock(&lo->lo_ctl_mutex); + mutex_unlock(&loop_ctl_mutex); break; } if (atomic_read(&lo->lo_refcnt) > 0) { ret = -EBUSY; - mutex_unlock(&lo->lo_ctl_mutex); + mutex_unlock(&loop_ctl_mutex); break; } lo->lo_disk->private_data = NULL; - mutex_unlock(&lo->lo_ctl_mutex); + mutex_unlock(&loop_ctl_mutex); idr_remove(&loop_index_idr, lo->lo_number); loop_remove(lo); break; diff --git a/drivers/block/loop.h b/drivers/block/loop.h index 60f0fd2c0c65..a923e74495ce 100644 --- a/drivers/block/loop.h +++ b/drivers/block/loop.h @@ -55,7 +55,6 @@ struct loop_device { spinlock_t lo_lock; int lo_state; - struct mutex lo_ctl_mutex; struct kthread_worker worker; struct task_struct *worker_task; bool use_dio; diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c index 4b911ed96ea3..31219fb9e7f4 100644 --- a/drivers/block/sunvdc.c +++ b/drivers/block/sunvdc.c @@ -40,6 +40,8 @@ MODULE_VERSION(DRV_MODULE_VERSION); #define WAITING_FOR_GEN_CMD 0x04 #define WAITING_FOR_ANY -1 +#define VDC_MAX_RETRIES 10 + static struct workqueue_struct *sunvdc_wq; struct vdc_req_entry { @@ -419,6 +421,7 @@ static int __vdc_tx_trigger(struct vdc_port *port) .end_idx = dr->prod, }; int err, delay; + int retries = 0; hdr.seq = dr->snd_nxt; delay = 1; @@ -431,6 +434,8 @@ static int __vdc_tx_trigger(struct vdc_port *port) udelay(delay); if ((delay <<= 1) > 128) delay = 128; + if (retries++ > VDC_MAX_RETRIES) + break; } while (err == -EAGAIN); if (err == -ENOTCONN) diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c index c264f2d284a7..2e0a9e2531cb 100644 --- a/drivers/block/swim3.c +++ b/drivers/block/swim3.c @@ -1027,7 +1027,11 @@ static void floppy_release(struct gendisk *disk, fmode_t mode) struct swim3 __iomem *sw = fs->swim3; mutex_lock(&swim3_mutex); - if (fs->ref_count > 0 && --fs->ref_count == 0) { + if (fs->ref_count > 0) + --fs->ref_count; + else if (fs->ref_count == -1) + fs->ref_count = 0; + if (fs->ref_count == 0) { swim3_action(fs, MOTOR_OFF); out_8(&sw->control_bic, 0xff); swim3_select(fs, RELAX); diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index d203940203b6..aee23092f50e 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c @@ -265,6 +265,7 @@ /* #define ERRLOGMASK (CD_WARNING|CD_OPEN|CD_COUNT_TRACKS|CD_CLOSE) */ /* #define ERRLOGMASK (CD_WARNING|CD_REG_UNREG|CD_DO_IOCTL|CD_OPEN|CD_CLOSE|CD_COUNT_TRACKS) */ +#include #include #include #include @@ -3677,9 +3678,9 @@ static struct ctl_table_header *cdrom_sysctl_header; static void cdrom_sysctl_register(void) { - static int initialized; + static atomic_t initialized = ATOMIC_INIT(0); - if (initialized == 1) + if (!atomic_add_unless(&initialized, 1, 1)) return; cdrom_sysctl_header = register_sysctl_table(cdrom_root_table); @@ -3690,8 +3691,6 @@ static void cdrom_sysctl_register(void) cdrom_sysctl_settings.debug = debug; cdrom_sysctl_settings.lock = lockdoor; cdrom_sysctl_settings.check = check_media_type; - - initialized = 1; } static void cdrom_sysctl_unregister(void) diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c index e2808fefbb78..1852d19d0d7b 100644 --- a/drivers/cdrom/gdrom.c +++ b/drivers/cdrom/gdrom.c @@ -882,6 +882,7 @@ static void __exit exit_gdrom(void) platform_device_unregister(pd); platform_driver_unregister(&gdrom_driver); kfree(gd.toc); + kfree(gd.cd_info); } module_init(init_gdrom); diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index 3143db57ce44..2bc741bea8f3 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig @@ -389,7 +389,7 @@ config XILINX_HWICAP config R3964 tristate "Siemens R3964 line discipline" - depends on TTY + depends on TTY && BROKEN ---help--- This driver allows synchronous communication with devices using the Siemens R3964 packet protocol. Unless you are dealing with special diff --git a/drivers/char/applicom.c b/drivers/char/applicom.c index 14790304b84b..9fcd51095d13 100644 --- a/drivers/char/applicom.c +++ b/drivers/char/applicom.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -386,7 +387,11 @@ static ssize_t ac_write(struct file *file, const char __user *buf, size_t count, TicCard = st_loc.tic_des_from_pc; /* tic number to send */ IndexCard = NumCard - 1; - if((NumCard < 1) || (NumCard > MAX_BOARD) || !apbs[IndexCard].RamIO) + if (IndexCard >= MAX_BOARD) + return -EINVAL; + IndexCard = array_index_nospec(IndexCard, MAX_BOARD); + + if (!apbs[IndexCard].RamIO) return -EINVAL; #ifdef DEBUG @@ -697,6 +702,7 @@ static long ac_ioctl(struct file *file, unsigned int cmd, unsigned long arg) unsigned char IndexCard; void __iomem *pmem; int ret = 0; + static int warncount = 10; volatile unsigned char byte_reset_it; struct st_ram_io *adgl; void __user *argp = (void __user *)arg; @@ -711,16 +717,12 @@ static long ac_ioctl(struct file *file, unsigned int cmd, unsigned long arg) mutex_lock(&ac_mutex); IndexCard = adgl->num_card-1; - if(cmd != 6 && ((IndexCard >= MAX_BOARD) || !apbs[IndexCard].RamIO)) { - static int warncount = 10; - if (warncount) { - printk( KERN_WARNING "APPLICOM driver IOCTL, bad board number %d\n",(int)IndexCard+1); - warncount--; - } - kfree(adgl); - mutex_unlock(&ac_mutex); - return -EINVAL; - } + if (cmd != 6 && IndexCard >= MAX_BOARD) + goto err; + IndexCard = array_index_nospec(IndexCard, MAX_BOARD); + + if (cmd != 6 && !apbs[IndexCard].RamIO) + goto err; switch (cmd) { @@ -838,5 +840,16 @@ static long ac_ioctl(struct file *file, unsigned int cmd, unsigned long arg) kfree(adgl); mutex_unlock(&ac_mutex); return 0; + +err: + if (warncount) { + pr_warn("APPLICOM driver IOCTL, bad board number %d\n", + (int)IndexCard + 1); + warncount--; + } + kfree(adgl); + mutex_unlock(&ac_mutex); + return -EINVAL; + } diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c index 240b6cf1d97c..72e073895ed9 100644 --- a/drivers/char/hpet.c +++ b/drivers/char/hpet.c @@ -376,7 +376,7 @@ static __init int hpet_mmap_enable(char *str) pr_info("HPET mmap %s\n", hpet_mmap_enabled ? "enabled" : "disabled"); return 1; } -__setup("hpet_mmap", hpet_mmap_enable); +__setup("hpet_mmap=", hpet_mmap_enable); static int hpet_mmap(struct file *file, struct vm_area_struct *vma) { diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c index 3fa2f8a009b3..1c5c4314c6b5 100644 --- a/drivers/char/hw_random/virtio-rng.c +++ b/drivers/char/hw_random/virtio-rng.c @@ -73,7 +73,7 @@ static int virtio_read(struct hwrng *rng, void *buf, size_t size, bool wait) if (!vi->busy) { vi->busy = true; - init_completion(&vi->have_data); + reinit_completion(&vi->have_data); register_buffer(vi, buf, size); } diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c index 7a2e23d6bfdd..b2da2382d544 100644 --- a/drivers/char/ipmi/ipmi_ssif.c +++ b/drivers/char/ipmi/ipmi_ssif.c @@ -637,8 +637,9 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result, /* Remove the multi-part read marker. */ len -= 2; + data += 2; for (i = 0; i < len; i++) - ssif_info->data[i] = data[i+2]; + ssif_info->data[i] = data[i]; ssif_info->multi_len = len; ssif_info->multi_pos = 1; @@ -666,8 +667,19 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result, } blocknum = data[0]; + len--; + data++; - if (ssif_info->multi_len + len - 1 > IPMI_MAX_MSG_LENGTH) { + if (blocknum != 0xff && len != 31) { + /* All blocks but the last must have 31 data bytes. */ + result = -EIO; + if (ssif_info->ssif_debug & SSIF_DEBUG_MSG) + pr_info("Received middle message <31\n"); + + goto continue_op; + } + + if (ssif_info->multi_len + len > IPMI_MAX_MSG_LENGTH) { /* Received message too big, abort the operation. */ result = -E2BIG; if (ssif_info->ssif_debug & SSIF_DEBUG_MSG) @@ -676,16 +688,14 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result, goto continue_op; } - /* Remove the blocknum from the data. */ - len--; for (i = 0; i < len; i++) - ssif_info->data[i + ssif_info->multi_len] = data[i + 1]; + ssif_info->data[i + ssif_info->multi_len] = data[i]; ssif_info->multi_len += len; if (blocknum == 0xff) { /* End of read */ len = ssif_info->multi_len; data = ssif_info->data; - } else if (blocknum + 1 != ssif_info->multi_pos) { + } else if (blocknum != ssif_info->multi_pos) { /* * Out of sequence block, just abort. Block * numbers start at zero for the second block, @@ -713,6 +723,7 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result, } } + continue_op: if (result < 0) { ssif_inc_stat(ssif_info, receive_errors); } else { @@ -720,8 +731,6 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result, ssif_inc_stat(ssif_info, received_message_parts); } - - continue_op: if (ssif_info->ssif_debug & SSIF_DEBUG_STATE) pr_info(PFX "DONE 1: state = %d, result=%d.\n", ssif_info->ssif_state, result); diff --git a/drivers/char/mwave/mwavedd.c b/drivers/char/mwave/mwavedd.c index 164544afd680..618f3df6c3b9 100644 --- a/drivers/char/mwave/mwavedd.c +++ b/drivers/char/mwave/mwavedd.c @@ -59,6 +59,7 @@ #include #include #include +#include #include "smapi.h" #include "mwavedd.h" #include "3780i.h" @@ -289,6 +290,8 @@ static long mwave_ioctl(struct file *file, unsigned int iocmd, ipcnum); return -EINVAL; } + ipcnum = array_index_nospec(ipcnum, + ARRAY_SIZE(pDrvData->IPCs)); PRINTK_3(TRACE_MWAVE, "mwavedd::mwave_ioctl IOCTL_MW_REGISTER_IPC" " ipcnum %x entry usIntCount %x\n", @@ -317,6 +320,8 @@ static long mwave_ioctl(struct file *file, unsigned int iocmd, " Invalid ipcnum %x\n", ipcnum); return -EINVAL; } + ipcnum = array_index_nospec(ipcnum, + ARRAY_SIZE(pDrvData->IPCs)); PRINTK_3(TRACE_MWAVE, "mwavedd::mwave_ioctl IOCTL_MW_GET_IPC" " ipcnum %x, usIntCount %x\n", @@ -383,6 +388,8 @@ static long mwave_ioctl(struct file *file, unsigned int iocmd, ipcnum); return -EINVAL; } + ipcnum = array_index_nospec(ipcnum, + ARRAY_SIZE(pDrvData->IPCs)); mutex_lock(&mwave_mutex); if (pDrvData->IPCs[ipcnum].bIsEnabled == TRUE) { pDrvData->IPCs[ipcnum].bIsEnabled = FALSE; diff --git a/drivers/char/tpm/tpm_crb.c b/drivers/char/tpm/tpm_crb.c index 35308dfff754..8226e3b6dc1f 100644 --- a/drivers/char/tpm/tpm_crb.c +++ b/drivers/char/tpm/tpm_crb.c @@ -109,19 +109,29 @@ static int crb_recv(struct tpm_chip *chip, u8 *buf, size_t count) struct crb_priv *priv = chip->vendor.priv; unsigned int expected; - /* sanity check */ - if (count < 6) + /* A sanity check that the upper layer wants to get at least the header + * as that is the minimum size for any TPM response. + */ + if (count < TPM_HEADER_SIZE) return -EIO; + /* If this bit is set, according to the spec, the TPM is in + * unrecoverable condition. + */ if (le32_to_cpu(ioread32(&priv->cca->sts)) & CRB_CA_STS_ERROR) return -EIO; - memcpy_fromio(buf, priv->rsp, 6); - expected = be32_to_cpup((__be32 *) &buf[2]); - if (expected > count || expected < 6) + /* Read the first 8 bytes in order to get the length of the response. + * We read exactly a quad word in order to make sure that the remaining + * reads will be aligned. + */ + memcpy_fromio(buf, priv->rsp, 8); + + expected = be32_to_cpup((__be32 *)&buf[2]); + if (expected > count || expected < TPM_HEADER_SIZE) return -EIO; - memcpy_fromio(&buf[6], &priv->rsp[6], expected - 6); + memcpy_fromio(&buf[8], &priv->rsp[8], expected - 8); return expected; } diff --git a/drivers/char/tpm/tpm_i2c_atmel.c b/drivers/char/tpm/tpm_i2c_atmel.c index dd8f0eb3170a..73f7e0f7e34a 100644 --- a/drivers/char/tpm/tpm_i2c_atmel.c +++ b/drivers/char/tpm/tpm_i2c_atmel.c @@ -65,7 +65,15 @@ static int i2c_atmel_send(struct tpm_chip *chip, u8 *buf, size_t len) dev_dbg(&chip->dev, "%s(buf=%*ph len=%0zx) -> sts=%d\n", __func__, (int)min_t(size_t, 64, len), buf, len, status); - return status; + + if (status < 0) + return status; + + /* The upper layer does not support incomplete sends. */ + if (status != len) + return -E2BIG; + + return 0; } static int i2c_atmel_recv(struct tpm_chip *chip, u8 *buf, size_t count) diff --git a/drivers/clk/imx/clk-imx6q.c b/drivers/clk/imx/clk-imx6q.c index a0df83e6b84b..46c05c9a9354 100644 --- a/drivers/clk/imx/clk-imx6q.c +++ b/drivers/clk/imx/clk-imx6q.c @@ -239,8 +239,12 @@ static void __init imx6q_clocks_init(struct device_node *ccm_node) * lvds1_gate and lvds2_gate are pseudo-gates. Both can be * independently configured as clock inputs or outputs. We treat * the "output_enable" bit as a gate, even though it's really just - * enabling clock output. + * enabling clock output. Initially the gate bits are cleared, as + * otherwise the exclusive configuration gets locked in the setup done + * by software running before the clock driver, with no way to change + * it. */ + writel(readl(base + 0x160) & ~0x3c00, base + 0x160); clk[IMX6QDL_CLK_LVDS1_GATE] = imx_clk_gate_exclusive("lvds1_gate", "lvds1_sel", base + 0x160, 10, BIT(12)); clk[IMX6QDL_CLK_LVDS2_GATE] = imx_clk_gate_exclusive("lvds2_gate", "lvds2_sel", base + 0x160, 11, BIT(13)); diff --git a/drivers/clk/imx/clk-imx6sl.c b/drivers/clk/imx/clk-imx6sl.c index 1be6230a07af..8b6306dc5fc6 100644 --- a/drivers/clk/imx/clk-imx6sl.c +++ b/drivers/clk/imx/clk-imx6sl.c @@ -17,6 +17,8 @@ #include "clk.h" +#define CCDR 0x4 +#define BM_CCM_CCDR_MMDC_CH0_MASK (1 << 17) #define CCSR 0xc #define BM_CCSR_PLL1_SW_CLK_SEL (1 << 2) #define CACRR 0x10 @@ -414,6 +416,10 @@ static void __init imx6sl_clocks_init(struct device_node *ccm_node) clks[IMX6SL_CLK_USDHC3] = imx_clk_gate2("usdhc3", "usdhc3_podf", base + 0x80, 6); clks[IMX6SL_CLK_USDHC4] = imx_clk_gate2("usdhc4", "usdhc4_podf", base + 0x80, 8); + /* Ensure the MMDC CH0 handshake is bypassed */ + writel_relaxed(readl_relaxed(base + CCDR) | + BM_CCM_CCDR_MMDC_CH0_MASK, base + CCDR); + imx_check_clocks(clks, ARRAY_SIZE(clks)); clk_data.clks = clks; diff --git a/drivers/clk/ingenic/cgu.c b/drivers/clk/ingenic/cgu.c index 7cfb7b2a2ed6..8878efb80620 100644 --- a/drivers/clk/ingenic/cgu.c +++ b/drivers/clk/ingenic/cgu.c @@ -355,16 +355,16 @@ ingenic_clk_round_rate(struct clk_hw *hw, unsigned long req_rate, struct ingenic_clk *ingenic_clk = to_ingenic_clk(hw); struct ingenic_cgu *cgu = ingenic_clk->cgu; const struct ingenic_cgu_clk_info *clk_info; - long rate = *parent_rate; + unsigned int div = 1; clk_info = &cgu->clock_info[ingenic_clk->idx]; if (clk_info->type & CGU_CLK_DIV) - rate /= ingenic_clk_calc_div(clk_info, *parent_rate, req_rate); + div = ingenic_clk_calc_div(clk_info, *parent_rate, req_rate); else if (clk_info->type & CGU_CLK_FIXDIV) - rate /= clk_info->fixdiv.div; + div = clk_info->fixdiv.div; - return rate; + return DIV_ROUND_UP(*parent_rate, div); } static int @@ -384,7 +384,7 @@ ingenic_clk_set_rate(struct clk_hw *hw, unsigned long req_rate, if (clk_info->type & CGU_CLK_DIV) { div = ingenic_clk_calc_div(clk_info, parent_rate, req_rate); - rate = parent_rate / div; + rate = DIV_ROUND_UP(parent_rate, div); if (rate != req_rate) return -EINVAL; diff --git a/drivers/clk/mmp/clk.c b/drivers/clk/mmp/clk.c index 61893fe73251..18b6c9b55b95 100644 --- a/drivers/clk/mmp/clk.c +++ b/drivers/clk/mmp/clk.c @@ -182,7 +182,7 @@ void mmp_clk_add(struct mmp_clk_unit *unit, unsigned int id, pr_err("CLK %d has invalid pointer %p\n", id, clk); return; } - if (id > unit->nr_clks) { + if (id >= unit->nr_clks) { pr_err("CLK %d is invalid\n", id); return; } diff --git a/drivers/clocksource/exynos_mct.c b/drivers/clocksource/exynos_mct.c index 47f8aafe3344..d65a6036d610 100644 --- a/drivers/clocksource/exynos_mct.c +++ b/drivers/clocksource/exynos_mct.c @@ -379,6 +379,13 @@ static void exynos4_mct_tick_start(unsigned long cycles, exynos4_mct_write(tmp, mevt->base + MCT_L_TCON_OFFSET); } +static void exynos4_mct_tick_clear(struct mct_clock_event_device *mevt) +{ + /* Clear the MCT tick interrupt */ + if (readl_relaxed(reg_base + mevt->base + MCT_L_INT_CSTAT_OFFSET) & 1) + exynos4_mct_write(0x1, mevt->base + MCT_L_INT_CSTAT_OFFSET); +} + static int exynos4_tick_set_next_event(unsigned long cycles, struct clock_event_device *evt) { @@ -395,6 +402,7 @@ static int set_state_shutdown(struct clock_event_device *evt) mevt = container_of(evt, struct mct_clock_event_device, evt); exynos4_mct_tick_stop(mevt); + exynos4_mct_tick_clear(mevt); return 0; } @@ -411,8 +419,11 @@ static int set_state_periodic(struct clock_event_device *evt) return 0; } -static void exynos4_mct_tick_clear(struct mct_clock_event_device *mevt) +static irqreturn_t exynos4_mct_tick_isr(int irq, void *dev_id) { + struct mct_clock_event_device *mevt = dev_id; + struct clock_event_device *evt = &mevt->evt; + /* * This is for supporting oneshot mode. * Mct would generate interrupt periodically @@ -421,16 +432,6 @@ static void exynos4_mct_tick_clear(struct mct_clock_event_device *mevt) if (!clockevent_state_periodic(&mevt->evt)) exynos4_mct_tick_stop(mevt); - /* Clear the MCT tick interrupt */ - if (readl_relaxed(reg_base + mevt->base + MCT_L_INT_CSTAT_OFFSET) & 1) - exynos4_mct_write(0x1, mevt->base + MCT_L_INT_CSTAT_OFFSET); -} - -static irqreturn_t exynos4_mct_tick_isr(int irq, void *dev_id) -{ - struct mct_clock_event_device *mevt = dev_id; - struct clock_event_device *evt = &mevt->evt; - exynos4_mct_tick_clear(mevt); evt->event_handler(evt); diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 875f22f17745..086e8cc4fceb 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -570,13 +570,13 @@ EXPORT_SYMBOL_GPL(cpufreq_driver_resolve_freq); * SYSFS INTERFACE * *********************************************************************/ static ssize_t show_boost(struct kobject *kobj, - struct attribute *attr, char *buf) + struct kobj_attribute *attr, char *buf) { return sprintf(buf, "%d\n", cpufreq_driver->boost_enabled); } -static ssize_t store_boost(struct kobject *kobj, struct attribute *attr, - const char *buf, size_t count) +static ssize_t store_boost(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) { int ret, enable; diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index 5621bb03e874..f7b340c27ff2 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -48,11 +48,11 @@ enum {OD_NORMAL_SAMPLE, OD_SUB_SAMPLE}; /* Create attributes */ #define gov_sys_attr_ro(_name) \ -static struct global_attr _name##_gov_sys = \ +static struct kobj_attribute _name##_gov_sys = \ __ATTR(_name, 0444, show_##_name##_gov_sys, NULL) #define gov_sys_attr_rw(_name) \ -static struct global_attr _name##_gov_sys = \ +static struct kobj_attribute _name##_gov_sys = \ __ATTR(_name, 0644, show_##_name##_gov_sys, store_##_name##_gov_sys) #define gov_pol_attr_ro(_name) \ @@ -74,7 +74,7 @@ __ATTR(_name, 0644, show_##_name##_gov_pol, store_##_name##_gov_pol) /* Create show/store routines */ #define show_one(_gov, file_name) \ static ssize_t show_##file_name##_gov_sys \ -(struct kobject *kobj, struct attribute *attr, char *buf) \ +(struct kobject *kobj, struct kobj_attribute *attr, char *buf) \ { \ struct _gov##_dbs_tuners *tuners = _gov##_dbs_cdata.gdbs_data->tuners; \ return sprintf(buf, "%u\n", tuners->file_name); \ @@ -90,7 +90,7 @@ static ssize_t show_##file_name##_gov_pol \ #define store_one(_gov, file_name) \ static ssize_t store_##file_name##_gov_sys \ -(struct kobject *kobj, struct attribute *attr, const char *buf, size_t count) \ +(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) \ { \ struct dbs_data *dbs_data = _gov##_dbs_cdata.gdbs_data; \ return store_##file_name(dbs_data, buf, count); \ @@ -254,7 +254,7 @@ static inline int delay_for_sampling_rate(unsigned int sampling_rate) #define declare_show_sampling_rate_min(_gov) \ static ssize_t show_sampling_rate_min_gov_sys \ -(struct kobject *kobj, struct attribute *attr, char *buf) \ +(struct kobject *kobj, struct kobj_attribute *attr, char *buf) \ { \ struct dbs_data *dbs_data = _gov##_dbs_cdata.gdbs_data; \ return sprintf(buf, "%u\n", dbs_data->min_sampling_rate); \ diff --git a/drivers/cpufreq/cpufreq_interactive.c b/drivers/cpufreq/cpufreq_interactive.c index 182b31c34000..ea1b24d081e6 100644 --- a/drivers/cpufreq/cpufreq_interactive.c +++ b/drivers/cpufreq/cpufreq_interactive.c @@ -1018,7 +1018,7 @@ static ssize_t store_io_is_busy(struct cpufreq_interactive_tunables *tunables, */ #define show_gov_pol_sys(file_name) \ static ssize_t show_##file_name##_gov_sys \ -(struct kobject *kobj, struct attribute *attr, char *buf) \ +(struct kobject *kobj, struct kobj_attribute *attr, char *buf) \ { \ return show_##file_name(common_tunables, buf); \ } \ @@ -1031,7 +1031,7 @@ static ssize_t show_##file_name##_gov_pol \ #define store_gov_pol_sys(file_name) \ static ssize_t store_##file_name##_gov_sys \ -(struct kobject *kobj, struct attribute *attr, const char *buf, \ +(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, \ size_t count) \ { \ return store_##file_name(common_tunables, buf, count); \ @@ -1060,7 +1060,7 @@ show_store_gov_pol_sys(boostpulse_duration); show_store_gov_pol_sys(io_is_busy); #define gov_sys_attr_rw(_name) \ -static struct global_attr _name##_gov_sys = \ +static struct kobj_attribute _name##_gov_sys = \ __ATTR(_name, 0644, show_##_name##_gov_sys, store_##_name##_gov_sys) #define gov_pol_attr_rw(_name) \ @@ -1082,7 +1082,7 @@ gov_sys_pol_attr_rw(boost); gov_sys_pol_attr_rw(boostpulse_duration); gov_sys_pol_attr_rw(io_is_busy); -static struct global_attr boostpulse_gov_sys = +static struct kobj_attribute boostpulse_gov_sys = __ATTR(boostpulse, 0200, NULL, store_boostpulse_gov_sys); static struct freq_attr boostpulse_gov_pol = diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 88728d997088..15fcf2cac971 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -368,13 +368,13 @@ static void __init intel_pstate_debug_expose_params(void) /************************** sysfs begin ************************/ #define show_one(file_name, object) \ static ssize_t show_##file_name \ - (struct kobject *kobj, struct attribute *attr, char *buf) \ + (struct kobject *kobj, struct kobj_attribute *attr, char *buf) \ { \ return sprintf(buf, "%u\n", limits->object); \ } static ssize_t show_turbo_pct(struct kobject *kobj, - struct attribute *attr, char *buf) + struct kobj_attribute *attr, char *buf) { struct cpudata *cpu; int total, no_turbo, turbo_pct; @@ -390,7 +390,7 @@ static ssize_t show_turbo_pct(struct kobject *kobj, } static ssize_t show_num_pstates(struct kobject *kobj, - struct attribute *attr, char *buf) + struct kobj_attribute *attr, char *buf) { struct cpudata *cpu; int total; @@ -401,7 +401,7 @@ static ssize_t show_num_pstates(struct kobject *kobj, } static ssize_t show_no_turbo(struct kobject *kobj, - struct attribute *attr, char *buf) + struct kobj_attribute *attr, char *buf) { ssize_t ret; @@ -414,7 +414,7 @@ static ssize_t show_no_turbo(struct kobject *kobj, return ret; } -static ssize_t store_no_turbo(struct kobject *a, struct attribute *b, +static ssize_t store_no_turbo(struct kobject *a, struct kobj_attribute *b, const char *buf, size_t count) { unsigned int input; @@ -438,7 +438,7 @@ static ssize_t store_no_turbo(struct kobject *a, struct attribute *b, return count; } -static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b, +static ssize_t store_max_perf_pct(struct kobject *a, struct kobj_attribute *b, const char *buf, size_t count) { unsigned int input; @@ -463,7 +463,7 @@ static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b, return count; } -static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b, +static ssize_t store_min_perf_pct(struct kobject *a, struct kobj_attribute *b, const char *buf, size_t count) { unsigned int input; diff --git a/drivers/cpufreq/pxa2xx-cpufreq.c b/drivers/cpufreq/pxa2xx-cpufreq.c index 096377232747..cd0333418d15 100644 --- a/drivers/cpufreq/pxa2xx-cpufreq.c +++ b/drivers/cpufreq/pxa2xx-cpufreq.c @@ -191,7 +191,7 @@ static int pxa_cpufreq_change_voltage(const struct pxa_freqs *pxa_freq) return ret; } -static void __init pxa_cpufreq_init_voltages(void) +static void pxa_cpufreq_init_voltages(void) { vcc_core = regulator_get(NULL, "vcc_core"); if (IS_ERR(vcc_core)) { @@ -207,7 +207,7 @@ static int pxa_cpufreq_change_voltage(const struct pxa_freqs *pxa_freq) return 0; } -static void __init pxa_cpufreq_init_voltages(void) { } +static void pxa_cpufreq_init_voltages(void) { } #endif static void find_freq_tables(struct cpufreq_frequency_table **freq_table, diff --git a/drivers/cpufreq/tegra124-cpufreq.c b/drivers/cpufreq/tegra124-cpufreq.c index 20bcceb58ccc..8e7deb65fc32 100644 --- a/drivers/cpufreq/tegra124-cpufreq.c +++ b/drivers/cpufreq/tegra124-cpufreq.c @@ -141,6 +141,8 @@ static int tegra124_cpufreq_probe(struct platform_device *pdev) platform_set_drvdata(pdev, priv); + of_node_put(np); + return 0; out_switch_to_pllx: diff --git a/drivers/cpuidle/cpuidle-big_little.c b/drivers/cpuidle/cpuidle-big_little.c index db2ede565f1a..b44476a1b7ad 100644 --- a/drivers/cpuidle/cpuidle-big_little.c +++ b/drivers/cpuidle/cpuidle-big_little.c @@ -167,6 +167,7 @@ static int __init bl_idle_init(void) { int ret; struct device_node *root = of_find_node_by_path("/"); + const struct of_device_id *match_id; if (!root) return -ENODEV; @@ -174,7 +175,11 @@ static int __init bl_idle_init(void) /* * Initialize the driver just for a compliant set of machines */ - if (!of_match_node(compatible_machine_match, root)) + match_id = of_match_node(compatible_machine_match, root); + + of_node_put(root); + + if (!match_id) return -ENODEV; if (!mcpm_is_available()) diff --git a/drivers/cpuidle/cpuidle-pseries.c b/drivers/cpuidle/cpuidle-pseries.c index 07135e009d8b..601a6c3acc7f 100644 --- a/drivers/cpuidle/cpuidle-pseries.c +++ b/drivers/cpuidle/cpuidle-pseries.c @@ -240,7 +240,13 @@ static int pseries_idle_probe(void) return -ENODEV; if (firmware_has_feature(FW_FEATURE_SPLPAR)) { - if (lppaca_shared_proc(get_lppaca())) { + /* + * Use local_paca instead of get_lppaca() since + * preemption is not disabled, and it is not required in + * fact, since lppaca_ptr does not need to be the value + * associated to the current CPU, it can be from any CPU. + */ + if (lppaca_shared_proc(local_paca->lppaca_ptr)) { cpuidle_state_table = shared_states; max_idle_state = ARRAY_SIZE(shared_states); } else { diff --git a/drivers/crypto/amcc/crypto4xx_alg.c b/drivers/crypto/amcc/crypto4xx_alg.c index 4afca3968773..e3b8bebfdd30 100644 --- a/drivers/crypto/amcc/crypto4xx_alg.c +++ b/drivers/crypto/amcc/crypto4xx_alg.c @@ -138,7 +138,8 @@ static int crypto4xx_setkey_aes(struct crypto_ablkcipher *cipher, sa = (struct dynamic_sa_ctl *) ctx->sa_in; ctx->hash_final = 0; - set_dynamic_sa_command_0(sa, SA_NOT_SAVE_HASH, SA_NOT_SAVE_IV, + set_dynamic_sa_command_0(sa, SA_NOT_SAVE_HASH, (cm == CRYPTO_MODE_CBC ? + SA_SAVE_IV : SA_NOT_SAVE_IV), SA_LOAD_HASH_FROM_SA, SA_LOAD_IV_FROM_STATE, SA_NO_HEADER_PROC, SA_HASH_ALG_NULL, SA_CIPHER_ALG_AES, SA_PAD_TYPE_ZERO, diff --git a/drivers/crypto/amcc/crypto4xx_core.c b/drivers/crypto/amcc/crypto4xx_core.c index 78d0722feacb..1e810f5f03fa 100644 --- a/drivers/crypto/amcc/crypto4xx_core.c +++ b/drivers/crypto/amcc/crypto4xx_core.c @@ -645,6 +645,15 @@ static u32 crypto4xx_ablkcipher_done(struct crypto4xx_device *dev, addr = dma_map_page(dev->core_dev->device, sg_page(dst), dst->offset, dst->length, DMA_FROM_DEVICE); } + + if (pd_uinfo->sa_va->sa_command_0.bf.save_iv == SA_SAVE_IV) { + struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req); + + crypto4xx_memcpy_from_le32((u32 *)req->iv, + pd_uinfo->sr_va->save_iv, + crypto_skcipher_ivsize(skcipher)); + } + crypto4xx_ret_sg_desc(dev, pd_uinfo); if (ablk_req->base.complete != NULL) ablk_req->base.complete(&ablk_req->base, 0); diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c index f3307fc38e79..f2d1fea23fbf 100644 --- a/drivers/crypto/caam/caamalg.c +++ b/drivers/crypto/caam/caamalg.c @@ -2081,6 +2081,7 @@ static void init_aead_job(struct aead_request *req, if (unlikely(req->src != req->dst)) { if (!edesc->dst_nents) { dst_dma = sg_dma_address(req->dst); + out_options = 0; } else { dst_dma = edesc->sec4_sg_dma + sec4_sg_index * diff --git a/drivers/crypto/ux500/cryp/cryp_core.c b/drivers/crypto/ux500/cryp/cryp_core.c index 790f7cadc1ed..efebc484e371 100644 --- a/drivers/crypto/ux500/cryp/cryp_core.c +++ b/drivers/crypto/ux500/cryp/cryp_core.c @@ -555,7 +555,7 @@ static int cryp_set_dma_transfer(struct cryp_ctx *ctx, desc = dmaengine_prep_slave_sg(channel, ctx->device->dma.sg_src, ctx->device->dma.sg_src_len, - direction, DMA_CTRL_ACK); + DMA_MEM_TO_DEV, DMA_CTRL_ACK); break; case DMA_FROM_DEVICE: @@ -579,7 +579,7 @@ static int cryp_set_dma_transfer(struct cryp_ctx *ctx, desc = dmaengine_prep_slave_sg(channel, ctx->device->dma.sg_dst, ctx->device->dma.sg_dst_len, - direction, + DMA_DEV_TO_MEM, DMA_CTRL_ACK | DMA_PREP_INTERRUPT); diff --git a/drivers/crypto/ux500/hash/hash_core.c b/drivers/crypto/ux500/hash/hash_core.c index cd4398498495..bca6b701c067 100644 --- a/drivers/crypto/ux500/hash/hash_core.c +++ b/drivers/crypto/ux500/hash/hash_core.c @@ -181,7 +181,7 @@ static int hash_set_dma_transfer(struct hash_ctx *ctx, struct scatterlist *sg, __func__); desc = dmaengine_prep_slave_sg(channel, ctx->device->dma.sg, ctx->device->dma.sg_len, - direction, DMA_CTRL_ACK | DMA_PREP_INTERRUPT); + DMA_MEM_TO_DEV, DMA_CTRL_ACK | DMA_PREP_INTERRUPT); if (!desc) { dev_err(ctx->device->dev, "%s: dmaengine_prep_slave_sg() failed!\n", __func__); diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c index 82a7c89caae2..af24c5bf32d6 100644 --- a/drivers/dma/at_xdmac.c +++ b/drivers/dma/at_xdmac.c @@ -203,6 +203,7 @@ struct at_xdmac_chan { u32 save_cim; u32 save_cnda; u32 save_cndc; + u32 irq_status; unsigned long status; struct tasklet_struct tasklet; struct dma_slave_config sconfig; @@ -1582,8 +1583,8 @@ static void at_xdmac_tasklet(unsigned long data) struct at_xdmac_desc *desc; u32 error_mask; - dev_dbg(chan2dev(&atchan->chan), "%s: status=0x%08lx\n", - __func__, atchan->status); + dev_dbg(chan2dev(&atchan->chan), "%s: status=0x%08x\n", + __func__, atchan->irq_status); error_mask = AT_XDMAC_CIS_RBEIS | AT_XDMAC_CIS_WBEIS @@ -1591,15 +1592,15 @@ static void at_xdmac_tasklet(unsigned long data) if (at_xdmac_chan_is_cyclic(atchan)) { at_xdmac_handle_cyclic(atchan); - } else if ((atchan->status & AT_XDMAC_CIS_LIS) - || (atchan->status & error_mask)) { + } else if ((atchan->irq_status & AT_XDMAC_CIS_LIS) + || (atchan->irq_status & error_mask)) { struct dma_async_tx_descriptor *txd; - if (atchan->status & AT_XDMAC_CIS_RBEIS) + if (atchan->irq_status & AT_XDMAC_CIS_RBEIS) dev_err(chan2dev(&atchan->chan), "read bus error!!!"); - if (atchan->status & AT_XDMAC_CIS_WBEIS) + if (atchan->irq_status & AT_XDMAC_CIS_WBEIS) dev_err(chan2dev(&atchan->chan), "write bus error!!!"); - if (atchan->status & AT_XDMAC_CIS_ROIS) + if (atchan->irq_status & AT_XDMAC_CIS_ROIS) dev_err(chan2dev(&atchan->chan), "request overflow error!!!"); spin_lock_bh(&atchan->lock); @@ -1654,7 +1655,7 @@ static irqreturn_t at_xdmac_interrupt(int irq, void *dev_id) atchan = &atxdmac->chan[i]; chan_imr = at_xdmac_chan_read(atchan, AT_XDMAC_CIM); chan_status = at_xdmac_chan_read(atchan, AT_XDMAC_CIS); - atchan->status = chan_status & chan_imr; + atchan->irq_status = chan_status & chan_imr; dev_vdbg(atxdmac->dma.dev, "%s: chan%d: imr=0x%x, status=0x%x\n", __func__, i, chan_imr, chan_status); @@ -1668,7 +1669,7 @@ static irqreturn_t at_xdmac_interrupt(int irq, void *dev_id) at_xdmac_chan_read(atchan, AT_XDMAC_CDA), at_xdmac_chan_read(atchan, AT_XDMAC_CUBC)); - if (atchan->status & (AT_XDMAC_CIS_RBEIS | AT_XDMAC_CIS_WBEIS)) + if (atchan->irq_status & (AT_XDMAC_CIS_RBEIS | AT_XDMAC_CIS_WBEIS)) at_xdmac_write(atxdmac, AT_XDMAC_GD, atchan->mask); tasklet_schedule(&atchan->tasklet); diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c index 6796eb1a8a4c..884aecebb249 100644 --- a/drivers/dma/dmatest.c +++ b/drivers/dma/dmatest.c @@ -563,11 +563,9 @@ static int dmatest_func(void *data) srcs[i] = um->addr[i] + src_off; ret = dma_mapping_error(dev->dev, um->addr[i]); if (ret) { - dmaengine_unmap_put(um); result("src mapping error", total_tests, src_off, dst_off, len, ret); - failed_tests++; - continue; + goto error_unmap_continue; } um->to_cnt++; } @@ -582,11 +580,9 @@ static int dmatest_func(void *data) DMA_BIDIRECTIONAL); ret = dma_mapping_error(dev->dev, dsts[i]); if (ret) { - dmaengine_unmap_put(um); result("dst mapping error", total_tests, src_off, dst_off, len, ret); - failed_tests++; - continue; + goto error_unmap_continue; } um->bidi_cnt++; } @@ -611,12 +607,10 @@ static int dmatest_func(void *data) } if (!tx) { - dmaengine_unmap_put(um); result("prep error", total_tests, src_off, dst_off, len, ret); msleep(100); - failed_tests++; - continue; + goto error_unmap_continue; } done->done = false; @@ -625,12 +619,10 @@ static int dmatest_func(void *data) cookie = tx->tx_submit(tx); if (dma_submit_error(cookie)) { - dmaengine_unmap_put(um); result("submit error", total_tests, src_off, dst_off, len, ret); msleep(100); - failed_tests++; - continue; + goto error_unmap_continue; } dma_async_issue_pending(chan); @@ -643,16 +635,14 @@ static int dmatest_func(void *data) dmaengine_unmap_put(um); result("test timed out", total_tests, src_off, dst_off, len, 0); - failed_tests++; - continue; + goto error_unmap_continue; } else if (status != DMA_COMPLETE) { dmaengine_unmap_put(um); result(status == DMA_ERROR ? "completion error status" : "completion busy status", total_tests, src_off, dst_off, len, ret); - failed_tests++; - continue; + goto error_unmap_continue; } dmaengine_unmap_put(um); @@ -691,6 +681,12 @@ static int dmatest_func(void *data) verbose_result("test passed", total_tests, src_off, dst_off, len, 0); } + + continue; + +error_unmap_continue: + dmaengine_unmap_put(um); + failed_tests++; } runtime = ktime_us_delta(ktime_get(), ktime); diff --git a/drivers/dma/imx-dma.c b/drivers/dma/imx-dma.c index 48d85f8b95fe..529b315a6683 100644 --- a/drivers/dma/imx-dma.c +++ b/drivers/dma/imx-dma.c @@ -286,7 +286,7 @@ static inline int imxdma_sg_next(struct imxdma_desc *d) struct scatterlist *sg = d->sg; unsigned long now; - now = min(d->len, sg_dma_len(sg)); + now = min_t(size_t, d->len, sg_dma_len(sg)); if (d->len != IMX_DMA_LENGTH_LOOP) d->len -= now; @@ -619,7 +619,7 @@ static void imxdma_tasklet(unsigned long data) { struct imxdma_channel *imxdmac = (void *)data; struct imxdma_engine *imxdma = imxdmac->imxdma; - struct imxdma_desc *desc; + struct imxdma_desc *desc, *next_desc; unsigned long flags; spin_lock_irqsave(&imxdma->lock, flags); @@ -649,10 +649,10 @@ static void imxdma_tasklet(unsigned long data) list_move_tail(imxdmac->ld_active.next, &imxdmac->ld_free); if (!list_empty(&imxdmac->ld_queue)) { - desc = list_first_entry(&imxdmac->ld_queue, struct imxdma_desc, - node); + next_desc = list_first_entry(&imxdmac->ld_queue, + struct imxdma_desc, node); list_move_tail(imxdmac->ld_queue.next, &imxdmac->ld_active); - if (imxdma_xfer_desc(desc) < 0) + if (imxdma_xfer_desc(next_desc) < 0) dev_warn(imxdma->dev, "%s: channel: %d couldn't xfer desc\n", __func__, imxdmac->channel); } diff --git a/drivers/dma/sh/usb-dmac.c b/drivers/dma/sh/usb-dmac.c index 6682b3eec2b6..cc8fc601ed47 100644 --- a/drivers/dma/sh/usb-dmac.c +++ b/drivers/dma/sh/usb-dmac.c @@ -700,6 +700,8 @@ static int usb_dmac_runtime_resume(struct device *dev) #endif /* CONFIG_PM */ static const struct dev_pm_ops usb_dmac_pm = { + SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, + pm_runtime_force_resume) SET_RUNTIME_PM_OPS(usb_dmac_runtime_suspend, usb_dmac_runtime_resume, NULL) }; diff --git a/drivers/dma/tegra20-apb-dma.c b/drivers/dma/tegra20-apb-dma.c index c8f79dcaaee8..67f201b8dcda 100644 --- a/drivers/dma/tegra20-apb-dma.c +++ b/drivers/dma/tegra20-apb-dma.c @@ -632,7 +632,10 @@ static void handle_cont_sngl_cycle_dma_done(struct tegra_dma_channel *tdc, sgreq = list_first_entry(&tdc->pending_sg_req, typeof(*sgreq), node); dma_desc = sgreq->dma_desc; - dma_desc->bytes_transferred += sgreq->req_len; + /* if we dma for long enough the transfer count will wrap */ + dma_desc->bytes_transferred = + (dma_desc->bytes_transferred + sgreq->req_len) % + dma_desc->bytes_requested; /* Callback need to be call */ if (!dma_desc->cb_count) diff --git a/drivers/extcon/extcon-usb-gpio.c b/drivers/extcon/extcon-usb-gpio.c index 2b2fecffb1ad..c6a7c9ddf0ac 100644 --- a/drivers/extcon/extcon-usb-gpio.c +++ b/drivers/extcon/extcon-usb-gpio.c @@ -192,6 +192,9 @@ static int usb_extcon_resume(struct device *dev) } enable_irq(info->id_irq); + if (!device_may_wakeup(dev)) + queue_delayed_work(system_power_efficient_wq, + &info->wq_detcable, 0); return ret; } diff --git a/drivers/firmware/iscsi_ibft.c b/drivers/firmware/iscsi_ibft.c index 72791232e46b..437c8ef90643 100644 --- a/drivers/firmware/iscsi_ibft.c +++ b/drivers/firmware/iscsi_ibft.c @@ -513,6 +513,7 @@ static umode_t __init ibft_check_tgt_for(void *data, int type) case ISCSI_BOOT_TGT_NIC_ASSOC: case ISCSI_BOOT_TGT_CHAP_TYPE: rc = S_IRUGO; + break; case ISCSI_BOOT_TGT_NAME: if (tgt->tgt_name_len) rc = S_IRUGO; diff --git a/drivers/gpio/gpio-adnp.c b/drivers/gpio/gpio-adnp.c index b34a62a5a7e1..47de63ae20da 100644 --- a/drivers/gpio/gpio-adnp.c +++ b/drivers/gpio/gpio-adnp.c @@ -137,8 +137,10 @@ static int adnp_gpio_direction_input(struct gpio_chip *chip, unsigned offset) if (err < 0) goto out; - if (err & BIT(pos)) - err = -EACCES; + if (value & BIT(pos)) { + err = -EPERM; + goto out; + } err = 0; diff --git a/drivers/gpio/gpio-max7301.c b/drivers/gpio/gpio-max7301.c index 05813fbf3daf..647dfbbc4e1c 100644 --- a/drivers/gpio/gpio-max7301.c +++ b/drivers/gpio/gpio-max7301.c @@ -25,7 +25,7 @@ static int max7301_spi_write(struct device *dev, unsigned int reg, struct spi_device *spi = to_spi_device(dev); u16 word = ((reg & 0x7F) << 8) | (val & 0xFF); - return spi_write(spi, (const u8 *)&word, sizeof(word)); + return spi_write_then_read(spi, &word, sizeof(word), NULL, 0); } /* A read from the MAX7301 means two transfers; here, one message each */ @@ -37,14 +37,8 @@ static int max7301_spi_read(struct device *dev, unsigned int reg) struct spi_device *spi = to_spi_device(dev); word = 0x8000 | (reg << 8); - ret = spi_write(spi, (const u8 *)&word, sizeof(word)); - if (ret) - return ret; - /* - * This relies on the fact, that a transfer with NULL tx_buf shifts out - * zero bytes (=NOOP for MAX7301) - */ - ret = spi_read(spi, (u8 *)&word, sizeof(word)); + ret = spi_write_then_read(spi, &word, sizeof(word), &word, + sizeof(word)); if (ret) return ret; return word & 0xff; diff --git a/drivers/gpio/gpio-omap.c b/drivers/gpio/gpio-omap.c index e183351d047c..b6dada726c96 100644 --- a/drivers/gpio/gpio-omap.c +++ b/drivers/gpio/gpio-omap.c @@ -872,14 +872,16 @@ static void omap_gpio_unmask_irq(struct irq_data *d) if (trigger) omap_set_gpio_triggering(bank, offset, trigger); - /* For level-triggered GPIOs, the clearing must be done after - * the HW source is cleared, thus after the handler has run */ - if (bank->level_mask & BIT(offset)) { - omap_set_gpio_irqenable(bank, offset, 0); - omap_clear_gpio_irqstatus(bank, offset); - } - omap_set_gpio_irqenable(bank, offset, 1); + + /* + * For level-triggered GPIOs, clearing must be done after the source + * is cleared, thus after the handler has run. OMAP4 needs this done + * after enabing the interrupt to clear the wakeup status. + */ + if (bank->level_mask & BIT(offset)) + omap_clear_gpio_irqstatus(bank, offset); + raw_spin_unlock_irqrestore(&bank->lock, flags); } diff --git a/drivers/gpio/gpio-vf610.c b/drivers/gpio/gpio-vf610.c index 9031e60c815c..6af57f210fa3 100644 --- a/drivers/gpio/gpio-vf610.c +++ b/drivers/gpio/gpio-vf610.c @@ -227,6 +227,7 @@ static int vf610_gpio_probe(struct platform_device *pdev) struct vf610_gpio_port *port; struct resource *iores; struct gpio_chip *gc; + int i; int ret; port = devm_kzalloc(&pdev->dev, sizeof(*port), GFP_KERNEL); @@ -265,6 +266,10 @@ static int vf610_gpio_probe(struct platform_device *pdev) if (ret < 0) return ret; + /* Mask all GPIO interrupts */ + for (i = 0; i < gc->ngpio; i++) + vf610_gpio_writel(0, port->base + PORT_PCR(i)); + /* Clear the interrupt status register for all GPIO's */ vf610_gpio_writel(~0, port->base + PORT_ISFR); diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem.c b/drivers/gpu/arm/bifrost/mali_kbase_mem.c index 6ce3a53a8bcf..bec4439fc066 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_mem.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_mem.c @@ -3093,7 +3093,13 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx, pages = alloc->imported.user_buf.pages; -#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 168) && LINUX_VERSION_CODE < KERNEL_VERSION(4, 5, 0) + pinned_pages = get_user_pages(NULL, mm, + address, + alloc->imported.user_buf.nr_pages, + reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0, + pages, NULL); +#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) pinned_pages = get_user_pages(NULL, mm, address, alloc->imported.user_buf.nr_pages, diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.c b/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.c index 59cc03538f1b..701def88352b 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.c @@ -872,7 +872,11 @@ static struct kbase_va_region *kbase_mem_from_user_buffer( down_read(¤t->mm->mmap_sem); -#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 168) && LINUX_VERSION_CODE < KERNEL_VERSION(4, 5, 0) + faulted_pages = get_user_pages(current, current->mm, address, *va_pages, + reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0, + pages, NULL); +#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) faulted_pages = get_user_pages(current, current->mm, address, *va_pages, reg->flags & KBASE_REG_GPU_WR, 0, pages, NULL); #elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0) diff --git a/drivers/gpu/arm/bifrost_for_linux/mali_kbase_mem.c b/drivers/gpu/arm/bifrost_for_linux/mali_kbase_mem.c index a105b15d641c..fffe1fc43bff 100644 --- a/drivers/gpu/arm/bifrost_for_linux/mali_kbase_mem.c +++ b/drivers/gpu/arm/bifrost_for_linux/mali_kbase_mem.c @@ -2383,7 +2383,13 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx, pages = alloc->imported.user_buf.pages; -#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 168) && LINUX_VERSION_CODE < KERNEL_VERSION(4, 5, 0) + pinned_pages = get_user_pages(NULL, mm, + address, + alloc->imported.user_buf.nr_pages, + reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0, + pages, NULL); +#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) pinned_pages = get_user_pages(NULL, mm, address, alloc->imported.user_buf.nr_pages, diff --git a/drivers/gpu/arm/bifrost_for_linux/mali_kbase_mem_linux.c b/drivers/gpu/arm/bifrost_for_linux/mali_kbase_mem_linux.c index 842444c9b0bd..c88ac64e1681 100644 --- a/drivers/gpu/arm/bifrost_for_linux/mali_kbase_mem_linux.c +++ b/drivers/gpu/arm/bifrost_for_linux/mali_kbase_mem_linux.c @@ -1161,7 +1161,11 @@ static struct kbase_va_region *kbase_mem_from_user_buffer( down_read(¤t->mm->mmap_sem); -#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 168) && LINUX_VERSION_CODE < KERNEL_VERSION(4, 5, 0) + faulted_pages = get_user_pages(current, current->mm, address, *va_pages, + reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0, + pages, NULL); +#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) faulted_pages = get_user_pages(current, current->mm, address, *va_pages, reg->flags & KBASE_REG_GPU_WR, 0, pages, NULL); #elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0) diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem.c b/drivers/gpu/arm/midgard/mali_kbase_mem.c index 4a223e8ee404..8b2b4e79d2f8 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_mem.c +++ b/drivers/gpu/arm/midgard/mali_kbase_mem.c @@ -2178,7 +2178,13 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx, pages = alloc->imported.user_buf.pages; -#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 168) && LINUX_VERSION_CODE < KERNEL_VERSION(4, 5, 0) + pinned_pages = get_user_pages(NULL, mm, + address, + alloc->imported.user_buf.nr_pages, + reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0, + pages, NULL); +#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) pinned_pages = get_user_pages(NULL, mm, address, alloc->imported.user_buf.nr_pages, diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c b/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c index eea429a8d590..a80ee1bcec2b 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c +++ b/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c @@ -1159,7 +1159,11 @@ static struct kbase_va_region *kbase_mem_from_user_buffer( down_read(¤t->mm->mmap_sem); -#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 168) && LINUX_VERSION_CODE < KERNEL_VERSION(4, 5, 0) + faulted_pages = get_user_pages(current, current->mm, address, *va_pages, + reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0, + pages, NULL); +#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) faulted_pages = get_user_pages(current, current->mm, address, *va_pages, reg->flags & KBASE_REG_GPU_WR, 0, pages, NULL); #elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0) diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_mem.c b/drivers/gpu/arm/midgard_for_linux/mali_kbase_mem.c index 4824b31673d3..989449fc862a 100644 --- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_mem.c +++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_mem.c @@ -2037,7 +2037,13 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx, pages = alloc->imported.user_buf.pages; -#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 168) && LINUX_VERSION_CODE < KERNEL_VERSION(4, 5, 0) + pinned_pages = get_user_pages(NULL, mm, + address, + alloc->imported.user_buf.nr_pages, + reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0, + pages, NULL); +#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) pinned_pages = get_user_pages(NULL, mm, address, alloc->imported.user_buf.nr_pages, diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_mem_linux.c b/drivers/gpu/arm/midgard_for_linux/mali_kbase_mem_linux.c index b6dac5586a7a..a0e074c52188 100644 --- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_mem_linux.c +++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_mem_linux.c @@ -1189,7 +1189,11 @@ static struct kbase_va_region *kbase_mem_from_user_buffer( /* We can't really store the page list because that would involve */ /* keeping the pages pinned - instead we pin/unpin around the job */ /* (as part of the external resources handling code) */ -#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 168) && LINUX_VERSION_CODE < KERNEL_VERSION(4, 5, 0) + faulted_pages = get_user_pages(current, current->mm, address, *va_pages, + reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0, + NULL, NULL); +#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) faulted_pages = get_user_pages(current, current->mm, address, *va_pages, reg->flags & KBASE_REG_GPU_WR, 0, NULL, NULL); #else diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index e40a6d8b0b92..062c23125b2a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -496,9 +496,13 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm) int r; int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY); + unsigned int flags = 0; enum dma_data_direction direction = write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE; + if (write) + flags |= FOLL_WRITE; + if (current->mm != gtt->usermm) return -EPERM; @@ -519,7 +523,7 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm) struct page **pages = ttm->pages + pinned; r = get_user_pages(current, current->mm, userptr, num_pages, - write, 0, pages, NULL); + flags, pages, NULL); if (r < 0) goto release_pages; diff --git a/drivers/gpu/drm/ast/ast_mode.c b/drivers/gpu/drm/ast/ast_mode.c index f5a4c0eb6e84..e67cfba67096 100644 --- a/drivers/gpu/drm/ast/ast_mode.c +++ b/drivers/gpu/drm/ast/ast_mode.c @@ -968,9 +968,21 @@ static int get_clock(void *i2c_priv) { struct ast_i2c_chan *i2c = i2c_priv; struct ast_private *ast = i2c->dev->dev_private; - uint32_t val; + uint32_t val, val2, count, pass; + + count = 0; + pass = 0; + val = (ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0x10) >> 4) & 0x01; + do { + val2 = (ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0x10) >> 4) & 0x01; + if (val == val2) { + pass++; + } else { + pass = 0; + val = (ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0x10) >> 4) & 0x01; + } + } while ((pass < 5) && (count++ < 0x10000)); - val = ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0x10) >> 4; return val & 1 ? 1 : 0; } @@ -978,9 +990,21 @@ static int get_data(void *i2c_priv) { struct ast_i2c_chan *i2c = i2c_priv; struct ast_private *ast = i2c->dev->dev_private; - uint32_t val; + uint32_t val, val2, count, pass; + + count = 0; + pass = 0; + val = (ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0x20) >> 5) & 0x01; + do { + val2 = (ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0x20) >> 5) & 0x01; + if (val == val2) { + pass++; + } else { + pass = 0; + val = (ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0x20) >> 5) & 0x01; + } + } while ((pass < 5) && (count++ < 0x10000)); - val = ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0x20) >> 5; return val & 1 ? 1 : 0; } @@ -993,7 +1017,7 @@ static void set_clock(void *i2c_priv, int clock) for (i = 0; i < 0x10000; i++) { ujcrb7 = ((clock & 0x01) ? 0 : 1); - ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0xfe, ujcrb7); + ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0xf4, ujcrb7); jtemp = ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0x01); if (ujcrb7 == jtemp) break; @@ -1009,7 +1033,7 @@ static void set_data(void *i2c_priv, int data) for (i = 0; i < 0x10000; i++) { ujcrb7 = ((data & 0x01) ? 0 : 1) << 2; - ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0xfb, ujcrb7); + ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0xf1, ujcrb7); jtemp = ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0x04); if (ujcrb7 == jtemp) break; diff --git a/drivers/gpu/drm/drm_bufs.c b/drivers/gpu/drm/drm_bufs.c index f1a204d253cc..ac22b8d86249 100644 --- a/drivers/gpu/drm/drm_bufs.c +++ b/drivers/gpu/drm/drm_bufs.c @@ -36,6 +36,8 @@ #include #include "drm_legacy.h" +#include + static struct drm_map_list *drm_find_matching_map(struct drm_device *dev, struct drm_local_map *map) { @@ -1332,6 +1334,7 @@ int drm_legacy_freebufs(struct drm_device *dev, void *data, idx, dma->buf_count - 1); return -EINVAL; } + idx = array_index_nospec(idx, dma->buf_count); buf = dma->buflist[idx]; if (buf->file_priv != file_priv) { DRM_ERROR("Process %d freeing buffer not owned\n", diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index 5a1bafb5ecbb..ff12d926eb65 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -3019,6 +3019,7 @@ static int drm_dp_mst_i2c_xfer(struct i2c_adapter *adapter, struct i2c_msg *msgs msg.u.i2c_read.transactions[i].i2c_dev_id = msgs[i].addr; msg.u.i2c_read.transactions[i].num_bytes = msgs[i].len; msg.u.i2c_read.transactions[i].bytes = msgs[i].buf; + msg.u.i2c_read.transactions[i].no_stop_bit = !(msgs[i].flags & I2C_M_STOP); } msg.u.i2c_read.read_i2c_device_id = msgs[num - 1].addr; msg.u.i2c_read.num_bytes_read = msgs[num - 1].len; diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index 7cab34df7514..0af055690ae7 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -1109,9 +1109,14 @@ int drm_fb_helper_check_var(struct fb_var_screeninfo *var, struct drm_framebuffer *fb = fb_helper->fb; int depth; - if (var->pixclock != 0 || in_dbg_master()) + if (in_dbg_master()) return -EINVAL; + if (var->pixclock != 0) { + DRM_DEBUG("fbdev emulation doesn't support changing the pixel clock, value of pixclock is ignored\n"); + var->pixclock = 0; + } + /* Need to resize the fb object !!! */ if (var->bits_per_pixel > fb->bits_per_pixel || var->xres > fb->width || var->yres > fb->height || diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c index 34757168ffaa..afaa3fac3a35 100644 --- a/drivers/gpu/drm/drm_ioctl.c +++ b/drivers/gpu/drm/drm_ioctl.c @@ -36,6 +36,7 @@ #include #include +#include static int drm_version(struct drm_device *dev, void *data, struct drm_file *file_priv); @@ -712,13 +713,17 @@ long drm_ioctl(struct file *filp, if (is_driver_ioctl) { /* driver ioctl */ - if (nr - DRM_COMMAND_BASE >= dev->driver->num_ioctls) + unsigned int index = nr - DRM_COMMAND_BASE; + + if (index >= dev->driver->num_ioctls) goto err_i1; - ioctl = &dev->driver->ioctls[nr - DRM_COMMAND_BASE]; + index = array_index_nospec(index, dev->driver->num_ioctls); + ioctl = &dev->driver->ioctls[index]; } else { /* core ioctl */ if (nr >= DRM_CORE_IOCTL_COUNT) goto err_i1; + nr = array_index_nospec(nr, DRM_CORE_IOCTL_COUNT); ioctl = &drm_ioctls[nr]; } @@ -820,6 +825,7 @@ bool drm_ioctl_flags(unsigned int nr, unsigned int *flags) if (nr >= DRM_CORE_IOCTL_COUNT) return false; + nr = array_index_nospec(nr, DRM_CORE_IOCTL_COUNT); *flags = drm_ioctls[nr].flags; return true; diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c index 3b1d9252d4de..7be5b1df849c 100644 --- a/drivers/gpu/drm/drm_modes.c +++ b/drivers/gpu/drm/drm_modes.c @@ -729,7 +729,7 @@ int drm_mode_hsync(const struct drm_display_mode *mode) if (mode->hsync) return mode->hsync; - if (mode->htotal < 0) + if (mode->htotal <= 0) return 0; calc_val = (mode->clock * 1000) / mode->htotal; /* hsync in Hz */ diff --git a/drivers/gpu/drm/exynos/exynos_drm_g2d.c b/drivers/gpu/drm/exynos/exynos_drm_g2d.c index 9169817dc4b9..5eb742863082 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_g2d.c +++ b/drivers/gpu/drm/exynos/exynos_drm_g2d.c @@ -471,7 +471,8 @@ static dma_addr_t *g2d_userptr_get_dma_addr(struct drm_device *drm_dev, goto err_free; } - ret = get_vaddr_frames(start, npages, true, true, g2d_userptr->vec); + ret = get_vaddr_frames(start, npages, FOLL_FORCE | FOLL_WRITE, + g2d_userptr->vec); if (ret != npages) { DRM_ERROR("failed to get user pages from userptr.\n"); if (ret < 0) diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c index 359fe2b8bb8a..b02113b57d51 100644 --- a/drivers/gpu/drm/i915/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c @@ -581,13 +581,17 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work) pvec = drm_malloc_ab(npages, sizeof(struct page *)); if (pvec != NULL) { struct mm_struct *mm = obj->userptr.mm->mm; + unsigned int flags = 0; + + if (!obj->userptr.read_only) + flags |= FOLL_WRITE; down_read(&mm->mmap_sem); while (pinned < npages) { ret = get_user_pages(work->task, mm, obj->userptr.ptr + pinned * PAGE_SIZE, npages - pinned, - !obj->userptr.read_only, 0, + flags, pvec + pinned, NULL); if (ret < 0) break; diff --git a/drivers/gpu/drm/msm/msm_atomic.c b/drivers/gpu/drm/msm/msm_atomic.c index 7eb253bc24df..221eaea651d4 100644 --- a/drivers/gpu/drm/msm/msm_atomic.c +++ b/drivers/gpu/drm/msm/msm_atomic.c @@ -107,7 +107,12 @@ static void msm_atomic_wait_for_commit_done(struct drm_device *dev, if (old_state->legacy_cursor_update) continue; + if (drm_crtc_vblank_get(crtc)) + continue; + kms->funcs->wait_for_crtc_commit_done(kms, crtc); + + drm_crtc_vblank_put(crtc); } } diff --git a/drivers/gpu/drm/msm/msm_rd.c b/drivers/gpu/drm/msm/msm_rd.c index 9a78c48817c6..909a52b21ebe 100644 --- a/drivers/gpu/drm/msm/msm_rd.c +++ b/drivers/gpu/drm/msm/msm_rd.c @@ -103,7 +103,9 @@ static void rd_write(struct msm_rd_state *rd, const void *buf, int sz) char *fptr = &fifo->buf[fifo->head]; int n; - wait_event(rd->fifo_event, circ_space(&rd->fifo) > 0); + wait_event(rd->fifo_event, circ_space(&rd->fifo) > 0 || !rd->open); + if (!rd->open) + return; n = min(sz, circ_space_to_end(&rd->fifo)); memcpy(fptr, ptr, n); @@ -192,7 +194,10 @@ out: static int rd_release(struct inode *inode, struct file *file) { struct msm_rd_state *rd = inode->i_private; + rd->open = false; + wake_up_all(&rd->fifo_event); + return 0; } diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c index 46f87d4aaf31..782fee330b4c 100644 --- a/drivers/gpu/drm/radeon/evergreen_cs.c +++ b/drivers/gpu/drm/radeon/evergreen_cs.c @@ -1299,6 +1299,7 @@ static int evergreen_cs_handle_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) return -EINVAL; } ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); + break; case CB_TARGET_MASK: track->cb_target_mask = radeon_get_ib_value(p, idx); track->cb_dirty = true; diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index d684e2b79d2b..0c380fe77382 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -557,7 +557,7 @@ static int radeon_ttm_tt_pin_userptr(struct ttm_tt *ttm) struct page **pages = ttm->pages + pinned; r = get_user_pages(current, current->mm, userptr, num_pages, - write, 0, pages, NULL); + write ? FOLL_WRITE : 0, pages, NULL); if (r < 0) goto release_pages; diff --git a/drivers/gpu/drm/via/via_dmablit.c b/drivers/gpu/drm/via/via_dmablit.c index d0cbd5ecd7f0..4459cb32d1fe 100644 --- a/drivers/gpu/drm/via/via_dmablit.c +++ b/drivers/gpu/drm/via/via_dmablit.c @@ -242,8 +242,8 @@ via_lock_all_dma_pages(drm_via_sg_info_t *vsg, drm_via_dmablit_t *xfer) ret = get_user_pages(current, current->mm, (unsigned long)xfer->mem_addr, vsg->num_pages, - (vsg->direction == DMA_FROM_DEVICE), - 0, vsg->pages, NULL); + (vsg->direction == DMA_FROM_DEVICE) ? FOLL_WRITE : 0, + vsg->pages, NULL); up_read(¤t->mm->mmap_sem); if (ret != vsg->num_pages) { diff --git a/drivers/gpu/drm/virtio/virtgpu_kms.c b/drivers/gpu/drm/virtio/virtgpu_kms.c index 06496a128162..4150873d432e 100644 --- a/drivers/gpu/drm/virtio/virtgpu_kms.c +++ b/drivers/gpu/drm/virtio/virtgpu_kms.c @@ -130,7 +130,7 @@ int virtio_gpu_driver_load(struct drm_device *dev, unsigned long flags) static vq_callback_t *callbacks[] = { virtio_gpu_ctrl_ack, virtio_gpu_cursor_ack }; - static const char *names[] = { "control", "cursor" }; + static const char * const names[] = { "control", "cursor" }; struct virtio_gpu_device *vgdev; /* this will expand later */ diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index be3971b22a02..ed92b9ac01b2 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -594,13 +594,16 @@ out_fixup: static int vmw_dma_masks(struct vmw_private *dev_priv) { struct drm_device *dev = dev_priv->dev; + int ret = 0; - if (intel_iommu_enabled && + ret = dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(64)); + if (dev_priv->map_mode != vmw_dma_phys && (sizeof(unsigned long) == 4 || vmw_restrict_dma_mask)) { DRM_INFO("Restricting DMA addresses to 44 bits.\n"); - return dma_set_mask(dev->dev, DMA_BIT_MASK(44)); + return dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(44)); } - return 0; + + return ret; } #else static int vmw_dma_masks(struct vmw_private *dev_priv) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index fda8e85dd5a2..ad0dd566aded 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -3663,7 +3663,7 @@ int vmw_execbuf_fence_commands(struct drm_file *file_priv, *p_fence = NULL; } - return 0; + return ret; } /** diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c index aec6e9eef489..55884cb5a0fc 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c @@ -531,11 +531,9 @@ static int vmw_fb_set_par(struct fb_info *info) 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) }; - struct drm_display_mode *old_mode; struct drm_display_mode *mode; int ret; - old_mode = par->set_mode; mode = drm_mode_duplicate(vmw_priv->dev, &new_mode); if (!mode) { DRM_ERROR("Could not create new fb mode.\n"); @@ -546,11 +544,7 @@ static int vmw_fb_set_par(struct fb_info *info) mode->vdisplay = var->yres; vmw_guess_mode_timing(mode); - if (old_mode && drm_mode_equal(old_mode, mode)) { - drm_mode_destroy(vmw_priv->dev, mode); - mode = old_mode; - old_mode = NULL; - } else if (!vmw_kms_validate_mode_vram(vmw_priv, + if (!vmw_kms_validate_mode_vram(vmw_priv, mode->hdisplay * DIV_ROUND_UP(var->bits_per_pixel, 8), mode->vdisplay)) { @@ -613,8 +607,8 @@ static int vmw_fb_set_par(struct fb_info *info) schedule_delayed_work(&par->local_work, 0); out_unlock: - if (old_mode) - drm_mode_destroy(vmw_priv->dev, old_mode); + if (par->set_mode) + drm_mode_destroy(vmw_priv->dev, par->set_mode); par->set_mode = mode; drm_modeset_unlock_all(vmw_priv->dev); diff --git a/drivers/gpu/ipu-v3/ipu-common.c b/drivers/gpu/ipu-v3/ipu-common.c index 5030cba4a581..df295a0ce87d 100644 --- a/drivers/gpu/ipu-v3/ipu-common.c +++ b/drivers/gpu/ipu-v3/ipu-common.c @@ -746,8 +746,8 @@ static struct ipu_devtype ipu_type_imx51 = { .cpmem_ofs = 0x1f000000, .srm_ofs = 0x1f040000, .tpm_ofs = 0x1f060000, - .csi0_ofs = 0x1f030000, - .csi1_ofs = 0x1f038000, + .csi0_ofs = 0x1e030000, + .csi1_ofs = 0x1e038000, .ic_ofs = 0x1e020000, .disp0_ofs = 0x1e040000, .disp1_ofs = 0x1e048000, @@ -762,8 +762,8 @@ static struct ipu_devtype ipu_type_imx53 = { .cpmem_ofs = 0x07000000, .srm_ofs = 0x07040000, .tpm_ofs = 0x07060000, - .csi0_ofs = 0x07030000, - .csi1_ofs = 0x07038000, + .csi0_ofs = 0x06030000, + .csi1_ofs = 0x06038000, .ic_ofs = 0x06020000, .disp0_ofs = 0x06040000, .disp1_ofs = 0x06048000, diff --git a/drivers/hid/hid-debug.c b/drivers/hid/hid-debug.c index 6c60f4b63d21..d7179dd3c9ef 100644 --- a/drivers/hid/hid-debug.c +++ b/drivers/hid/hid-debug.c @@ -30,6 +30,7 @@ #include #include +#include #include #include #include @@ -455,7 +456,7 @@ static char *resolv_usage_page(unsigned page, struct seq_file *f) { char *buf = NULL; if (!f) { - buf = kzalloc(sizeof(char) * HID_DEBUG_BUFSIZE, GFP_ATOMIC); + buf = kzalloc(HID_DEBUG_BUFSIZE, GFP_ATOMIC); if (!buf) return ERR_PTR(-ENOMEM); } @@ -659,17 +660,12 @@ EXPORT_SYMBOL_GPL(hid_dump_device); /* enqueue string to 'events' ring buffer */ void hid_debug_event(struct hid_device *hdev, char *buf) { - int i; struct hid_debug_list *list; unsigned long flags; spin_lock_irqsave(&hdev->debug_list_lock, flags); - list_for_each_entry(list, &hdev->debug_list, node) { - for (i = 0; i < strlen(buf); i++) - list->hid_debug_buf[(list->tail + i) % HID_DEBUG_BUFSIZE] = - buf[i]; - list->tail = (list->tail + i) % HID_DEBUG_BUFSIZE; - } + list_for_each_entry(list, &hdev->debug_list, node) + kfifo_in(&list->hid_debug_fifo, buf, strlen(buf)); spin_unlock_irqrestore(&hdev->debug_list_lock, flags); wake_up_interruptible(&hdev->debug_wait); @@ -720,8 +716,7 @@ void hid_dump_input(struct hid_device *hdev, struct hid_usage *usage, __s32 valu hid_debug_event(hdev, buf); kfree(buf); - wake_up_interruptible(&hdev->debug_wait); - + wake_up_interruptible(&hdev->debug_wait); } EXPORT_SYMBOL_GPL(hid_dump_input); @@ -1086,8 +1081,8 @@ static int hid_debug_events_open(struct inode *inode, struct file *file) goto out; } - if (!(list->hid_debug_buf = kzalloc(sizeof(char) * HID_DEBUG_BUFSIZE, GFP_KERNEL))) { - err = -ENOMEM; + err = kfifo_alloc(&list->hid_debug_fifo, HID_DEBUG_FIFOSIZE, GFP_KERNEL); + if (err) { kfree(list); goto out; } @@ -1107,77 +1102,57 @@ static ssize_t hid_debug_events_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos) { struct hid_debug_list *list = file->private_data; - int ret = 0, len; + int ret = 0, copied; DECLARE_WAITQUEUE(wait, current); mutex_lock(&list->read_mutex); - while (ret == 0) { - if (list->head == list->tail) { - add_wait_queue(&list->hdev->debug_wait, &wait); - set_current_state(TASK_INTERRUPTIBLE); + if (kfifo_is_empty(&list->hid_debug_fifo)) { + add_wait_queue(&list->hdev->debug_wait, &wait); + set_current_state(TASK_INTERRUPTIBLE); - while (list->head == list->tail) { - if (file->f_flags & O_NONBLOCK) { - ret = -EAGAIN; - break; - } - if (signal_pending(current)) { - ret = -ERESTARTSYS; - break; - } - - if (!list->hdev || !list->hdev->debug) { - ret = -EIO; - set_current_state(TASK_RUNNING); - goto out; - } - - /* allow O_NONBLOCK from other threads */ - mutex_unlock(&list->read_mutex); - schedule(); - mutex_lock(&list->read_mutex); - set_current_state(TASK_INTERRUPTIBLE); + while (kfifo_is_empty(&list->hid_debug_fifo)) { + if (file->f_flags & O_NONBLOCK) { + ret = -EAGAIN; + break; } - set_current_state(TASK_RUNNING); - remove_wait_queue(&list->hdev->debug_wait, &wait); + if (signal_pending(current)) { + ret = -ERESTARTSYS; + break; + } + + /* if list->hdev is NULL we cannot remove_wait_queue(). + * if list->hdev->debug is 0 then hid_debug_unregister() + * was already called and list->hdev is being destroyed. + * if we add remove_wait_queue() here we can hit a race. + */ + if (!list->hdev || !list->hdev->debug) { + ret = -EIO; + set_current_state(TASK_RUNNING); + goto out; + } + + /* allow O_NONBLOCK from other threads */ + mutex_unlock(&list->read_mutex); + schedule(); + mutex_lock(&list->read_mutex); + set_current_state(TASK_INTERRUPTIBLE); } + __set_current_state(TASK_RUNNING); + remove_wait_queue(&list->hdev->debug_wait, &wait); + if (ret) goto out; - - /* pass the ringbuffer contents to userspace */ -copy_rest: - if (list->tail == list->head) - goto out; - if (list->tail > list->head) { - len = list->tail - list->head; - if (len > count) - len = count; - - if (copy_to_user(buffer + ret, &list->hid_debug_buf[list->head], len)) { - ret = -EFAULT; - goto out; - } - ret += len; - list->head += len; - } else { - len = HID_DEBUG_BUFSIZE - list->head; - if (len > count) - len = count; - - if (copy_to_user(buffer, &list->hid_debug_buf[list->head], len)) { - ret = -EFAULT; - goto out; - } - list->head = 0; - ret += len; - count -= len; - if (count > 0) - goto copy_rest; - } - } + + /* pass the fifo content to userspace, locking is not needed with only + * one concurrent reader and one concurrent writer + */ + ret = kfifo_to_user(&list->hid_debug_fifo, buffer, count, &copied); + if (ret) + goto out; + ret = copied; out: mutex_unlock(&list->read_mutex); return ret; @@ -1188,7 +1163,7 @@ static unsigned int hid_debug_events_poll(struct file *file, poll_table *wait) struct hid_debug_list *list = file->private_data; poll_wait(file, &list->hdev->debug_wait, wait); - if (list->head != list->tail) + if (!kfifo_is_empty(&list->hid_debug_fifo)) return POLLIN | POLLRDNORM; if (!list->hdev->debug) return POLLERR | POLLHUP; @@ -1203,7 +1178,7 @@ static int hid_debug_events_release(struct inode *inode, struct file *file) spin_lock_irqsave(&list->hdev->debug_list_lock, flags); list_del(&list->node); spin_unlock_irqrestore(&list->hdev->debug_list_lock, flags); - kfree(list->hid_debug_buf); + kfifo_free(&list->hid_debug_fifo); kfree(list); return 0; @@ -1254,4 +1229,3 @@ void hid_debug_exit(void) { debugfs_remove_recursive(hid_debug_root); } - diff --git a/drivers/hid/hid-lenovo.c b/drivers/hid/hid-lenovo.c index 8979f1fd5208..24a4a23bdc90 100644 --- a/drivers/hid/hid-lenovo.c +++ b/drivers/hid/hid-lenovo.c @@ -703,7 +703,9 @@ static int lenovo_probe_tpkbd(struct hid_device *hdev) data_pointer->led_mute.brightness_get = lenovo_led_brightness_get_tpkbd; data_pointer->led_mute.brightness_set = lenovo_led_brightness_set_tpkbd; data_pointer->led_mute.dev = dev; - led_classdev_register(dev, &data_pointer->led_mute); + ret = led_classdev_register(dev, &data_pointer->led_mute); + if (ret < 0) + goto err; data_pointer->led_micmute.name = name_micmute; data_pointer->led_micmute.brightness_get = @@ -711,7 +713,11 @@ static int lenovo_probe_tpkbd(struct hid_device *hdev) data_pointer->led_micmute.brightness_set = lenovo_led_brightness_set_tpkbd; data_pointer->led_micmute.dev = dev; - led_classdev_register(dev, &data_pointer->led_micmute); + ret = led_classdev_register(dev, &data_pointer->led_micmute); + if (ret < 0) { + led_classdev_unregister(&data_pointer->led_mute); + goto err; + } lenovo_features_set_tpkbd(hdev); diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 802dcb409030..b877cce0409b 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -316,6 +316,8 @@ static ssize_t out_intr_mask_show(struct device *dev, if (!hv_dev->channel) return -ENODEV; + if (hv_dev->channel->state != CHANNEL_OPENED_STATE) + return -EINVAL; hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound); return sprintf(buf, "%d\n", outbound.current_interrupt_mask); } @@ -329,6 +331,8 @@ static ssize_t out_read_index_show(struct device *dev, if (!hv_dev->channel) return -ENODEV; + if (hv_dev->channel->state != CHANNEL_OPENED_STATE) + return -EINVAL; hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound); return sprintf(buf, "%d\n", outbound.current_read_index); } @@ -343,6 +347,8 @@ static ssize_t out_write_index_show(struct device *dev, if (!hv_dev->channel) return -ENODEV; + if (hv_dev->channel->state != CHANNEL_OPENED_STATE) + return -EINVAL; hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound); return sprintf(buf, "%d\n", outbound.current_write_index); } @@ -357,6 +363,8 @@ static ssize_t out_read_bytes_avail_show(struct device *dev, if (!hv_dev->channel) return -ENODEV; + if (hv_dev->channel->state != CHANNEL_OPENED_STATE) + return -EINVAL; hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound); return sprintf(buf, "%d\n", outbound.bytes_avail_toread); } @@ -371,6 +379,8 @@ static ssize_t out_write_bytes_avail_show(struct device *dev, if (!hv_dev->channel) return -ENODEV; + if (hv_dev->channel->state != CHANNEL_OPENED_STATE) + return -EINVAL; hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound); return sprintf(buf, "%d\n", outbound.bytes_avail_towrite); } @@ -384,6 +394,8 @@ static ssize_t in_intr_mask_show(struct device *dev, if (!hv_dev->channel) return -ENODEV; + if (hv_dev->channel->state != CHANNEL_OPENED_STATE) + return -EINVAL; hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); return sprintf(buf, "%d\n", inbound.current_interrupt_mask); } @@ -397,6 +409,8 @@ static ssize_t in_read_index_show(struct device *dev, if (!hv_dev->channel) return -ENODEV; + if (hv_dev->channel->state != CHANNEL_OPENED_STATE) + return -EINVAL; hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); return sprintf(buf, "%d\n", inbound.current_read_index); } @@ -410,6 +424,8 @@ static ssize_t in_write_index_show(struct device *dev, if (!hv_dev->channel) return -ENODEV; + if (hv_dev->channel->state != CHANNEL_OPENED_STATE) + return -EINVAL; hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); return sprintf(buf, "%d\n", inbound.current_write_index); } @@ -424,6 +440,8 @@ static ssize_t in_read_bytes_avail_show(struct device *dev, if (!hv_dev->channel) return -ENODEV; + if (hv_dev->channel->state != CHANNEL_OPENED_STATE) + return -EINVAL; hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); return sprintf(buf, "%d\n", inbound.bytes_avail_toread); } @@ -438,6 +456,8 @@ static ssize_t in_write_bytes_avail_show(struct device *dev, if (!hv_dev->channel) return -ENODEV; + if (hv_dev->channel->state != CHANNEL_OPENED_STATE) + return -EINVAL; hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); return sprintf(buf, "%d\n", inbound.bytes_avail_towrite); } diff --git a/drivers/hwmon/ina2xx.c b/drivers/hwmon/ina2xx.c index 9ac6e1673375..1f291b344178 100644 --- a/drivers/hwmon/ina2xx.c +++ b/drivers/hwmon/ina2xx.c @@ -273,7 +273,7 @@ static int ina2xx_get_value(struct ina2xx_data *data, u8 reg, break; case INA2XX_CURRENT: /* signed register, result in mA */ - val = regval * data->current_lsb_uA; + val = (s16)regval * data->current_lsb_uA; val = DIV_ROUND_CLOSEST(val, 1000); break; case INA2XX_CALIBRATION: diff --git a/drivers/hwmon/lm80.c b/drivers/hwmon/lm80.c index 4bcd9b882948..be60bd5bab78 100644 --- a/drivers/hwmon/lm80.c +++ b/drivers/hwmon/lm80.c @@ -360,9 +360,11 @@ static ssize_t set_fan_div(struct device *dev, struct device_attribute *attr, struct i2c_client *client = data->client; unsigned long min, val; u8 reg; - int err = kstrtoul(buf, 10, &val); - if (err < 0) - return err; + int rv; + + rv = kstrtoul(buf, 10, &val); + if (rv < 0) + return rv; /* Save fan_min */ mutex_lock(&data->update_lock); @@ -390,8 +392,13 @@ static ssize_t set_fan_div(struct device *dev, struct device_attribute *attr, return -EINVAL; } - reg = (lm80_read_value(client, LM80_REG_FANDIV) & - ~(3 << (2 * (nr + 1)))) | (data->fan_div[nr] << (2 * (nr + 1))); + rv = lm80_read_value(client, LM80_REG_FANDIV); + if (rv < 0) { + mutex_unlock(&data->update_lock); + return rv; + } + reg = (rv & ~(3 << (2 * (nr + 1)))) + | (data->fan_div[nr] << (2 * (nr + 1))); lm80_write_value(client, LM80_REG_FANDIV, reg); /* Restore fan_min */ @@ -623,6 +630,7 @@ static int lm80_probe(struct i2c_client *client, struct device *dev = &client->dev; struct device *hwmon_dev; struct lm80_data *data; + int rv; data = devm_kzalloc(dev, sizeof(struct lm80_data), GFP_KERNEL); if (!data) @@ -635,8 +643,14 @@ static int lm80_probe(struct i2c_client *client, lm80_init_client(client); /* A few vars need to be filled upon startup */ - data->fan[f_min][0] = lm80_read_value(client, LM80_REG_FAN_MIN(1)); - data->fan[f_min][1] = lm80_read_value(client, LM80_REG_FAN_MIN(2)); + rv = lm80_read_value(client, LM80_REG_FAN_MIN(1)); + if (rv < 0) + return rv; + data->fan[f_min][0] = rv; + rv = lm80_read_value(client, LM80_REG_FAN_MIN(2)); + if (rv < 0) + return rv; + data->fan[f_min][1] = rv; hwmon_dev = devm_hwmon_device_register_with_groups(dev, client->name, data, lm80_groups); diff --git a/drivers/hwmon/w83795.c b/drivers/hwmon/w83795.c index 49276bbdac3d..1bb80f992aa8 100644 --- a/drivers/hwmon/w83795.c +++ b/drivers/hwmon/w83795.c @@ -1691,7 +1691,7 @@ store_sf_setup(struct device *dev, struct device_attribute *attr, * somewhere else in the code */ #define SENSOR_ATTR_TEMP(index) { \ - SENSOR_ATTR_2(temp##index##_type, S_IRUGO | (index < 4 ? S_IWUSR : 0), \ + SENSOR_ATTR_2(temp##index##_type, S_IRUGO | (index < 5 ? S_IWUSR : 0), \ show_temp_mode, store_temp_mode, NOT_USED, index - 1), \ SENSOR_ATTR_2(temp##index##_input, S_IRUGO, show_temp, \ NULL, TEMP_READ, index - 1), \ diff --git a/drivers/hwtracing/coresight/coresight-etm4x.c b/drivers/hwtracing/coresight/coresight-etm4x.c index 462f0dc15757..c80cc18747cb 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x.c +++ b/drivers/hwtracing/coresight/coresight-etm4x.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -26,19 +27,14 @@ #include #include #include -#include #include #include #include #include -#include #include -#include #include -#include #include "coresight-etm4x.h" -#include "coresight-etm-perf.h" static int boot_enable; module_param_named(boot_enable, boot_enable, int, S_IRUGO); @@ -46,19 +42,20 @@ module_param_named(boot_enable, boot_enable, int, S_IRUGO); /* The number of ETMv4 currently registered */ static int etm4_count; static struct etmv4_drvdata *etmdrvdata[NR_CPUS]; -static void etm4_set_default(struct etmv4_config *config); -static void etm4_os_unlock(struct etmv4_drvdata *drvdata) +static void etm4_os_unlock(void *info) { + struct etmv4_drvdata *drvdata = (struct etmv4_drvdata *)info; + /* Writing any value to ETMOSLAR unlocks the trace registers */ writel_relaxed(0x0, drvdata->base + TRCOSLAR); - drvdata->os_unlock = true; isb(); } static bool etm4_arch_supported(u8 arch) { - switch (arch) { + /* Mask out the minor version number */ + switch (arch & 0xf0) { case ETM_ARCH_V4: break; default: @@ -67,22 +64,16 @@ static bool etm4_arch_supported(u8 arch) return true; } -static int etm4_cpu_id(struct coresight_device *csdev) -{ - struct etmv4_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); - - return drvdata->cpu; -} - static int etm4_trace_id(struct coresight_device *csdev) { struct etmv4_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); unsigned long flags; int trace_id = -1; - if (!local_read(&drvdata->mode)) + if (!drvdata->enable) return drvdata->trcid; + pm_runtime_get_sync(drvdata->dev); spin_lock_irqsave(&drvdata->spinlock, flags); CS_UNLOCK(drvdata->base); @@ -91,6 +82,7 @@ static int etm4_trace_id(struct coresight_device *csdev) CS_LOCK(drvdata->base); spin_unlock_irqrestore(&drvdata->spinlock, flags); + pm_runtime_put(drvdata->dev); return trace_id; } @@ -99,7 +91,6 @@ static void etm4_enable_hw(void *info) { int i; struct etmv4_drvdata *drvdata = info; - struct etmv4_config *config = &drvdata->config; CS_UNLOCK(drvdata->base); @@ -114,69 +105,69 @@ static void etm4_enable_hw(void *info) "timeout observed when probing at offset %#x\n", TRCSTATR); - writel_relaxed(config->pe_sel, drvdata->base + TRCPROCSELR); - writel_relaxed(config->cfg, drvdata->base + TRCCONFIGR); + writel_relaxed(drvdata->pe_sel, drvdata->base + TRCPROCSELR); + writel_relaxed(drvdata->cfg, drvdata->base + TRCCONFIGR); /* nothing specific implemented */ writel_relaxed(0x0, drvdata->base + TRCAUXCTLR); - writel_relaxed(config->eventctrl0, drvdata->base + TRCEVENTCTL0R); - writel_relaxed(config->eventctrl1, drvdata->base + TRCEVENTCTL1R); - writel_relaxed(config->stall_ctrl, drvdata->base + TRCSTALLCTLR); - writel_relaxed(config->ts_ctrl, drvdata->base + TRCTSCTLR); - writel_relaxed(config->syncfreq, drvdata->base + TRCSYNCPR); - writel_relaxed(config->ccctlr, drvdata->base + TRCCCCTLR); - writel_relaxed(config->bb_ctrl, drvdata->base + TRCBBCTLR); + writel_relaxed(drvdata->eventctrl0, drvdata->base + TRCEVENTCTL0R); + writel_relaxed(drvdata->eventctrl1, drvdata->base + TRCEVENTCTL1R); + writel_relaxed(drvdata->stall_ctrl, drvdata->base + TRCSTALLCTLR); + writel_relaxed(drvdata->ts_ctrl, drvdata->base + TRCTSCTLR); + writel_relaxed(drvdata->syncfreq, drvdata->base + TRCSYNCPR); + writel_relaxed(drvdata->ccctlr, drvdata->base + TRCCCCTLR); + writel_relaxed(drvdata->bb_ctrl, drvdata->base + TRCBBCTLR); writel_relaxed(drvdata->trcid, drvdata->base + TRCTRACEIDR); - writel_relaxed(config->vinst_ctrl, drvdata->base + TRCVICTLR); - writel_relaxed(config->viiectlr, drvdata->base + TRCVIIECTLR); - writel_relaxed(config->vissctlr, + writel_relaxed(drvdata->vinst_ctrl, drvdata->base + TRCVICTLR); + writel_relaxed(drvdata->viiectlr, drvdata->base + TRCVIIECTLR); + writel_relaxed(drvdata->vissctlr, drvdata->base + TRCVISSCTLR); - writel_relaxed(config->vipcssctlr, + writel_relaxed(drvdata->vipcssctlr, drvdata->base + TRCVIPCSSCTLR); for (i = 0; i < drvdata->nrseqstate - 1; i++) - writel_relaxed(config->seq_ctrl[i], + writel_relaxed(drvdata->seq_ctrl[i], drvdata->base + TRCSEQEVRn(i)); - writel_relaxed(config->seq_rst, drvdata->base + TRCSEQRSTEVR); - writel_relaxed(config->seq_state, drvdata->base + TRCSEQSTR); - writel_relaxed(config->ext_inp, drvdata->base + TRCEXTINSELR); + writel_relaxed(drvdata->seq_rst, drvdata->base + TRCSEQRSTEVR); + writel_relaxed(drvdata->seq_state, drvdata->base + TRCSEQSTR); + writel_relaxed(drvdata->ext_inp, drvdata->base + TRCEXTINSELR); for (i = 0; i < drvdata->nr_cntr; i++) { - writel_relaxed(config->cntrldvr[i], + writel_relaxed(drvdata->cntrldvr[i], drvdata->base + TRCCNTRLDVRn(i)); - writel_relaxed(config->cntr_ctrl[i], + writel_relaxed(drvdata->cntr_ctrl[i], drvdata->base + TRCCNTCTLRn(i)); - writel_relaxed(config->cntr_val[i], + writel_relaxed(drvdata->cntr_val[i], drvdata->base + TRCCNTVRn(i)); } /* Resource selector pair 0 is always implemented and reserved */ - for (i = 0; i < drvdata->nr_resource * 2; i++) - writel_relaxed(config->res_ctrl[i], + for (i = 2; i < drvdata->nr_resource * 2; i++) + writel_relaxed(drvdata->res_ctrl[i], drvdata->base + TRCRSCTLRn(i)); for (i = 0; i < drvdata->nr_ss_cmp; i++) { - writel_relaxed(config->ss_ctrl[i], + writel_relaxed(drvdata->ss_ctrl[i], drvdata->base + TRCSSCCRn(i)); - writel_relaxed(config->ss_status[i], + writel_relaxed(drvdata->ss_status[i], drvdata->base + TRCSSCSRn(i)); - writel_relaxed(config->ss_pe_cmp[i], + writel_relaxed(drvdata->ss_pe_cmp[i], drvdata->base + TRCSSPCICRn(i)); } for (i = 0; i < drvdata->nr_addr_cmp; i++) { - writeq_relaxed(config->addr_val[i], + writeq_relaxed(drvdata->addr_val[i], drvdata->base + TRCACVRn(i)); - writeq_relaxed(config->addr_acc[i], + writeq_relaxed(drvdata->addr_acc[i], drvdata->base + TRCACATRn(i)); } for (i = 0; i < drvdata->numcidc; i++) - writeq_relaxed(config->ctxid_pid[i], + writeq_relaxed(drvdata->ctxid_pid[i], drvdata->base + TRCCIDCVRn(i)); - writel_relaxed(config->ctxid_mask0, drvdata->base + TRCCIDCCTLR0); - writel_relaxed(config->ctxid_mask1, drvdata->base + TRCCIDCCTLR1); + writel_relaxed(drvdata->ctxid_mask0, drvdata->base + TRCCIDCCTLR0); + writel_relaxed(drvdata->ctxid_mask1, drvdata->base + TRCCIDCCTLR1); for (i = 0; i < drvdata->numvmidc; i++) - writeq_relaxed(config->vmid_val[i], + writeq_relaxed(drvdata->vmid_val[i], drvdata->base + TRCVMIDCVRn(i)); - writel_relaxed(config->vmid_mask0, drvdata->base + TRCVMIDCCTLR0); - writel_relaxed(config->vmid_mask1, drvdata->base + TRCVMIDCCTLR1); + writel_relaxed(drvdata->vmid_mask0, drvdata->base + TRCVMIDCCTLR0); + writel_relaxed(drvdata->vmid_mask1, drvdata->base + TRCVMIDCCTLR1); /* Enable the trace unit */ writel_relaxed(1, drvdata->base + TRCPRGCTLR); @@ -192,63 +183,12 @@ static void etm4_enable_hw(void *info) dev_dbg(drvdata->dev, "cpu: %d enable smp call done\n", drvdata->cpu); } -static int etm4_parse_event_config(struct etmv4_drvdata *drvdata, - struct perf_event_attr *attr) -{ - struct etmv4_config *config = &drvdata->config; - - if (!attr) - return -EINVAL; - - /* Clear configuration from previous run */ - memset(config, 0, sizeof(struct etmv4_config)); - - if (attr->exclude_kernel) - config->mode = ETM_MODE_EXCL_KERN; - - if (attr->exclude_user) - config->mode = ETM_MODE_EXCL_USER; - - /* Always start from the default config */ - etm4_set_default(config); - - /* - * By default the tracers are configured to trace the whole address - * range. Narrow the field only if requested by user space. - */ - if (config->mode) - etm4_config_trace_mode(config); - - /* Go from generic option to ETMv4 specifics */ - if (attr->config & BIT(ETM_OPT_CYCACC)) - config->cfg |= ETMv4_MODE_CYCACC; - if (attr->config & BIT(ETM_OPT_TS)) - config->cfg |= ETMv4_MODE_TIMESTAMP; - - return 0; -} - -static int etm4_enable_perf(struct coresight_device *csdev, - struct perf_event_attr *attr) -{ - struct etmv4_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); - - if (WARN_ON_ONCE(drvdata->cpu != smp_processor_id())) - return -EINVAL; - - /* Configure the tracer based on the session's specifics */ - etm4_parse_event_config(drvdata, attr); - /* And enable it */ - etm4_enable_hw(drvdata); - - return 0; -} - -static int etm4_enable_sysfs(struct coresight_device *csdev) +static int etm4_enable(struct coresight_device *csdev) { struct etmv4_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); int ret; + pm_runtime_get_sync(drvdata->dev); spin_lock(&drvdata->spinlock); /* @@ -259,46 +199,16 @@ static int etm4_enable_sysfs(struct coresight_device *csdev) etm4_enable_hw, drvdata, 1); if (ret) goto err; - + drvdata->enable = true; drvdata->sticky_enable = true; + spin_unlock(&drvdata->spinlock); dev_info(drvdata->dev, "ETM tracing enabled\n"); return 0; - err: spin_unlock(&drvdata->spinlock); - return ret; -} - -static int etm4_enable(struct coresight_device *csdev, - struct perf_event_attr *attr, u32 mode) -{ - int ret; - u32 val; - struct etmv4_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); - - val = local_cmpxchg(&drvdata->mode, CS_MODE_DISABLED, mode); - - /* Someone is already using the tracer */ - if (val) - return -EBUSY; - - switch (mode) { - case CS_MODE_SYSFS: - ret = etm4_enable_sysfs(csdev); - break; - case CS_MODE_PERF: - ret = etm4_enable_perf(csdev, attr); - break; - default: - ret = -EINVAL; - } - - /* The tracer didn't start */ - if (ret) - local_set(&drvdata->mode, CS_MODE_DISABLED); - + pm_runtime_put(drvdata->dev); return ret; } @@ -324,18 +234,7 @@ static void etm4_disable_hw(void *info) dev_dbg(drvdata->dev, "cpu: %d disable smp call done\n", drvdata->cpu); } -static int etm4_disable_perf(struct coresight_device *csdev) -{ - struct etmv4_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); - - if (WARN_ON_ONCE(drvdata->cpu != smp_processor_id())) - return -EINVAL; - - etm4_disable_hw(drvdata); - return 0; -} - -static void etm4_disable_sysfs(struct coresight_device *csdev) +static void etm4_disable(struct coresight_device *csdev) { struct etmv4_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); @@ -353,42 +252,17 @@ static void etm4_disable_sysfs(struct coresight_device *csdev) * ensures that register writes occur when cpu is powered. */ smp_call_function_single(drvdata->cpu, etm4_disable_hw, drvdata, 1); + drvdata->enable = false; spin_unlock(&drvdata->spinlock); put_online_cpus(); + pm_runtime_put(drvdata->dev); + dev_info(drvdata->dev, "ETM tracing disabled\n"); } -static void etm4_disable(struct coresight_device *csdev) -{ - u32 mode; - struct etmv4_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); - - /* - * For as long as the tracer isn't disabled another entity can't - * change its status. As such we can read the status here without - * fearing it will change under us. - */ - mode = local_read(&drvdata->mode); - - switch (mode) { - case CS_MODE_DISABLED: - break; - case CS_MODE_SYSFS: - etm4_disable_sysfs(csdev); - break; - case CS_MODE_PERF: - etm4_disable_perf(csdev); - break; - } - - if (mode) - local_set(&drvdata->mode, CS_MODE_DISABLED); -} - static const struct coresight_ops_source etm4_source_ops = { - .cpu_id = etm4_cpu_id, .trace_id = etm4_trace_id, .enable = etm4_enable, .disable = etm4_disable, @@ -398,6 +272,2035 @@ static const struct coresight_ops etm4_cs_ops = { .source_ops = &etm4_source_ops, }; +static int etm4_set_mode_exclude(struct etmv4_drvdata *drvdata, bool exclude) +{ + u8 idx = drvdata->addr_idx; + + /* + * TRCACATRn.TYPE bit[1:0]: type of comparison + * the trace unit performs + */ + if (BMVAL(drvdata->addr_acc[idx], 0, 1) == ETM_INSTR_ADDR) { + if (idx % 2 != 0) + return -EINVAL; + + /* + * We are performing instruction address comparison. Set the + * relevant bit of ViewInst Include/Exclude Control register + * for corresponding address comparator pair. + */ + if (drvdata->addr_type[idx] != ETM_ADDR_TYPE_RANGE || + drvdata->addr_type[idx + 1] != ETM_ADDR_TYPE_RANGE) + return -EINVAL; + + if (exclude == true) { + /* + * Set exclude bit and unset the include bit + * corresponding to comparator pair + */ + drvdata->viiectlr |= BIT(idx / 2 + 16); + drvdata->viiectlr &= ~BIT(idx / 2); + } else { + /* + * Set include bit and unset exclude bit + * corresponding to comparator pair + */ + drvdata->viiectlr |= BIT(idx / 2); + drvdata->viiectlr &= ~BIT(idx / 2 + 16); + } + } + return 0; +} + +static ssize_t nr_pe_cmp_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + unsigned long val; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + val = drvdata->nr_pe_cmp; + return scnprintf(buf, PAGE_SIZE, "%#lx\n", val); +} +static DEVICE_ATTR_RO(nr_pe_cmp); + +static ssize_t nr_addr_cmp_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + unsigned long val; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + val = drvdata->nr_addr_cmp; + return scnprintf(buf, PAGE_SIZE, "%#lx\n", val); +} +static DEVICE_ATTR_RO(nr_addr_cmp); + +static ssize_t nr_cntr_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + unsigned long val; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + val = drvdata->nr_cntr; + return scnprintf(buf, PAGE_SIZE, "%#lx\n", val); +} +static DEVICE_ATTR_RO(nr_cntr); + +static ssize_t nr_ext_inp_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + unsigned long val; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + val = drvdata->nr_ext_inp; + return scnprintf(buf, PAGE_SIZE, "%#lx\n", val); +} +static DEVICE_ATTR_RO(nr_ext_inp); + +static ssize_t numcidc_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + unsigned long val; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + val = drvdata->numcidc; + return scnprintf(buf, PAGE_SIZE, "%#lx\n", val); +} +static DEVICE_ATTR_RO(numcidc); + +static ssize_t numvmidc_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + unsigned long val; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + val = drvdata->numvmidc; + return scnprintf(buf, PAGE_SIZE, "%#lx\n", val); +} +static DEVICE_ATTR_RO(numvmidc); + +static ssize_t nrseqstate_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + unsigned long val; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + val = drvdata->nrseqstate; + return scnprintf(buf, PAGE_SIZE, "%#lx\n", val); +} +static DEVICE_ATTR_RO(nrseqstate); + +static ssize_t nr_resource_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + unsigned long val; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + val = drvdata->nr_resource; + return scnprintf(buf, PAGE_SIZE, "%#lx\n", val); +} +static DEVICE_ATTR_RO(nr_resource); + +static ssize_t nr_ss_cmp_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + unsigned long val; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + val = drvdata->nr_ss_cmp; + return scnprintf(buf, PAGE_SIZE, "%#lx\n", val); +} +static DEVICE_ATTR_RO(nr_ss_cmp); + +static ssize_t reset_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + int i; + unsigned long val; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + if (kstrtoul(buf, 16, &val)) + return -EINVAL; + + spin_lock(&drvdata->spinlock); + if (val) + drvdata->mode = 0x0; + + /* Disable data tracing: do not trace load and store data transfers */ + drvdata->mode &= ~(ETM_MODE_LOAD | ETM_MODE_STORE); + drvdata->cfg &= ~(BIT(1) | BIT(2)); + + /* Disable data value and data address tracing */ + drvdata->mode &= ~(ETM_MODE_DATA_TRACE_ADDR | + ETM_MODE_DATA_TRACE_VAL); + drvdata->cfg &= ~(BIT(16) | BIT(17)); + + /* Disable all events tracing */ + drvdata->eventctrl0 = 0x0; + drvdata->eventctrl1 = 0x0; + + /* Disable timestamp event */ + drvdata->ts_ctrl = 0x0; + + /* Disable stalling */ + drvdata->stall_ctrl = 0x0; + + /* Reset trace synchronization period to 2^8 = 256 bytes*/ + if (drvdata->syncpr == false) + drvdata->syncfreq = 0x8; + + /* + * Enable ViewInst to trace everything with start-stop logic in + * started state. ARM recommends start-stop logic is set before + * each trace run. + */ + drvdata->vinst_ctrl |= BIT(0); + if (drvdata->nr_addr_cmp == true) { + drvdata->mode |= ETM_MODE_VIEWINST_STARTSTOP; + /* SSSTATUS, bit[9] */ + drvdata->vinst_ctrl |= BIT(9); + } + + /* No address range filtering for ViewInst */ + drvdata->viiectlr = 0x0; + + /* No start-stop filtering for ViewInst */ + drvdata->vissctlr = 0x0; + + /* Disable seq events */ + for (i = 0; i < drvdata->nrseqstate-1; i++) + drvdata->seq_ctrl[i] = 0x0; + drvdata->seq_rst = 0x0; + drvdata->seq_state = 0x0; + + /* Disable external input events */ + drvdata->ext_inp = 0x0; + + drvdata->cntr_idx = 0x0; + for (i = 0; i < drvdata->nr_cntr; i++) { + drvdata->cntrldvr[i] = 0x0; + drvdata->cntr_ctrl[i] = 0x0; + drvdata->cntr_val[i] = 0x0; + } + + /* Resource selector pair 0 is always implemented and reserved */ + drvdata->res_idx = 0x2; + for (i = 2; i < drvdata->nr_resource * 2; i++) + drvdata->res_ctrl[i] = 0x0; + + for (i = 0; i < drvdata->nr_ss_cmp; i++) { + drvdata->ss_ctrl[i] = 0x0; + drvdata->ss_pe_cmp[i] = 0x0; + } + + drvdata->addr_idx = 0x0; + for (i = 0; i < drvdata->nr_addr_cmp * 2; i++) { + drvdata->addr_val[i] = 0x0; + drvdata->addr_acc[i] = 0x0; + drvdata->addr_type[i] = ETM_ADDR_TYPE_NONE; + } + + drvdata->ctxid_idx = 0x0; + for (i = 0; i < drvdata->numcidc; i++) { + drvdata->ctxid_pid[i] = 0x0; + drvdata->ctxid_vpid[i] = 0x0; + } + + drvdata->ctxid_mask0 = 0x0; + drvdata->ctxid_mask1 = 0x0; + + drvdata->vmid_idx = 0x0; + for (i = 0; i < drvdata->numvmidc; i++) + drvdata->vmid_val[i] = 0x0; + drvdata->vmid_mask0 = 0x0; + drvdata->vmid_mask1 = 0x0; + + drvdata->trcid = drvdata->cpu + 1; + spin_unlock(&drvdata->spinlock); + return size; +} +static DEVICE_ATTR_WO(reset); + +static ssize_t mode_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + unsigned long val; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + val = drvdata->mode; + return scnprintf(buf, PAGE_SIZE, "%#lx\n", val); +} + +static ssize_t mode_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + unsigned long val, mode; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + if (kstrtoul(buf, 16, &val)) + return -EINVAL; + + spin_lock(&drvdata->spinlock); + drvdata->mode = val & ETMv4_MODE_ALL; + + if (drvdata->mode & ETM_MODE_EXCLUDE) + etm4_set_mode_exclude(drvdata, true); + else + etm4_set_mode_exclude(drvdata, false); + + if (drvdata->instrp0 == true) { + /* start by clearing instruction P0 field */ + drvdata->cfg &= ~(BIT(1) | BIT(2)); + if (drvdata->mode & ETM_MODE_LOAD) + /* 0b01 Trace load instructions as P0 instructions */ + drvdata->cfg |= BIT(1); + if (drvdata->mode & ETM_MODE_STORE) + /* 0b10 Trace store instructions as P0 instructions */ + drvdata->cfg |= BIT(2); + if (drvdata->mode & ETM_MODE_LOAD_STORE) + /* + * 0b11 Trace load and store instructions + * as P0 instructions + */ + drvdata->cfg |= BIT(1) | BIT(2); + } + + /* bit[3], Branch broadcast mode */ + if ((drvdata->mode & ETM_MODE_BB) && (drvdata->trcbb == true)) + drvdata->cfg |= BIT(3); + else + drvdata->cfg &= ~BIT(3); + + /* bit[4], Cycle counting instruction trace bit */ + if ((drvdata->mode & ETMv4_MODE_CYCACC) && + (drvdata->trccci == true)) + drvdata->cfg |= BIT(4); + else + drvdata->cfg &= ~BIT(4); + + /* bit[6], Context ID tracing bit */ + if ((drvdata->mode & ETMv4_MODE_CTXID) && (drvdata->ctxid_size)) + drvdata->cfg |= BIT(6); + else + drvdata->cfg &= ~BIT(6); + + if ((drvdata->mode & ETM_MODE_VMID) && (drvdata->vmid_size)) + drvdata->cfg |= BIT(7); + else + drvdata->cfg &= ~BIT(7); + + /* bits[10:8], Conditional instruction tracing bit */ + mode = ETM_MODE_COND(drvdata->mode); + if (drvdata->trccond == true) { + drvdata->cfg &= ~(BIT(8) | BIT(9) | BIT(10)); + drvdata->cfg |= mode << 8; + } + + /* bit[11], Global timestamp tracing bit */ + if ((drvdata->mode & ETMv4_MODE_TIMESTAMP) && (drvdata->ts_size)) + drvdata->cfg |= BIT(11); + else + drvdata->cfg &= ~BIT(11); + + /* bit[12], Return stack enable bit */ + if ((drvdata->mode & ETM_MODE_RETURNSTACK) && + (drvdata->retstack == true)) + drvdata->cfg |= BIT(12); + else + drvdata->cfg &= ~BIT(12); + + /* bits[14:13], Q element enable field */ + mode = ETM_MODE_QELEM(drvdata->mode); + /* start by clearing QE bits */ + drvdata->cfg &= ~(BIT(13) | BIT(14)); + /* if supported, Q elements with instruction counts are enabled */ + if ((mode & BIT(0)) && (drvdata->q_support & BIT(0))) + drvdata->cfg |= BIT(13); + /* + * if supported, Q elements with and without instruction + * counts are enabled + */ + if ((mode & BIT(1)) && (drvdata->q_support & BIT(1))) + drvdata->cfg |= BIT(14); + + /* bit[11], AMBA Trace Bus (ATB) trigger enable bit */ + if ((drvdata->mode & ETM_MODE_ATB_TRIGGER) && + (drvdata->atbtrig == true)) + drvdata->eventctrl1 |= BIT(11); + else + drvdata->eventctrl1 &= ~BIT(11); + + /* bit[12], Low-power state behavior override bit */ + if ((drvdata->mode & ETM_MODE_LPOVERRIDE) && + (drvdata->lpoverride == true)) + drvdata->eventctrl1 |= BIT(12); + else + drvdata->eventctrl1 &= ~BIT(12); + + /* bit[8], Instruction stall bit */ + if (drvdata->mode & ETM_MODE_ISTALL_EN) + drvdata->stall_ctrl |= BIT(8); + else + drvdata->stall_ctrl &= ~BIT(8); + + /* bit[10], Prioritize instruction trace bit */ + if (drvdata->mode & ETM_MODE_INSTPRIO) + drvdata->stall_ctrl |= BIT(10); + else + drvdata->stall_ctrl &= ~BIT(10); + + /* bit[13], Trace overflow prevention bit */ + if ((drvdata->mode & ETM_MODE_NOOVERFLOW) && + (drvdata->nooverflow == true)) + drvdata->stall_ctrl |= BIT(13); + else + drvdata->stall_ctrl &= ~BIT(13); + + /* bit[9] Start/stop logic control bit */ + if (drvdata->mode & ETM_MODE_VIEWINST_STARTSTOP) + drvdata->vinst_ctrl |= BIT(9); + else + drvdata->vinst_ctrl &= ~BIT(9); + + /* bit[10], Whether a trace unit must trace a Reset exception */ + if (drvdata->mode & ETM_MODE_TRACE_RESET) + drvdata->vinst_ctrl |= BIT(10); + else + drvdata->vinst_ctrl &= ~BIT(10); + + /* bit[11], Whether a trace unit must trace a system error exception */ + if ((drvdata->mode & ETM_MODE_TRACE_ERR) && + (drvdata->trc_error == true)) + drvdata->vinst_ctrl |= BIT(11); + else + drvdata->vinst_ctrl &= ~BIT(11); + + spin_unlock(&drvdata->spinlock); + return size; +} +static DEVICE_ATTR_RW(mode); + +static ssize_t pe_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + unsigned long val; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + val = drvdata->pe_sel; + return scnprintf(buf, PAGE_SIZE, "%#lx\n", val); +} + +static ssize_t pe_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + unsigned long val; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + if (kstrtoul(buf, 16, &val)) + return -EINVAL; + + spin_lock(&drvdata->spinlock); + if (val > drvdata->nr_pe) { + spin_unlock(&drvdata->spinlock); + return -EINVAL; + } + + drvdata->pe_sel = val; + spin_unlock(&drvdata->spinlock); + return size; +} +static DEVICE_ATTR_RW(pe); + +static ssize_t event_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + unsigned long val; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + val = drvdata->eventctrl0; + return scnprintf(buf, PAGE_SIZE, "%#lx\n", val); +} + +static ssize_t event_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + unsigned long val; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + if (kstrtoul(buf, 16, &val)) + return -EINVAL; + + spin_lock(&drvdata->spinlock); + switch (drvdata->nr_event) { + case 0x0: + /* EVENT0, bits[7:0] */ + drvdata->eventctrl0 = val & 0xFF; + break; + case 0x1: + /* EVENT1, bits[15:8] */ + drvdata->eventctrl0 = val & 0xFFFF; + break; + case 0x2: + /* EVENT2, bits[23:16] */ + drvdata->eventctrl0 = val & 0xFFFFFF; + break; + case 0x3: + /* EVENT3, bits[31:24] */ + drvdata->eventctrl0 = val; + break; + default: + break; + } + spin_unlock(&drvdata->spinlock); + return size; +} +static DEVICE_ATTR_RW(event); + +static ssize_t event_instren_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + unsigned long val; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + val = BMVAL(drvdata->eventctrl1, 0, 3); + return scnprintf(buf, PAGE_SIZE, "%#lx\n", val); +} + +static ssize_t event_instren_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + unsigned long val; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + if (kstrtoul(buf, 16, &val)) + return -EINVAL; + + spin_lock(&drvdata->spinlock); + /* start by clearing all instruction event enable bits */ + drvdata->eventctrl1 &= ~(BIT(0) | BIT(1) | BIT(2) | BIT(3)); + switch (drvdata->nr_event) { + case 0x0: + /* generate Event element for event 1 */ + drvdata->eventctrl1 |= val & BIT(1); + break; + case 0x1: + /* generate Event element for event 1 and 2 */ + drvdata->eventctrl1 |= val & (BIT(0) | BIT(1)); + break; + case 0x2: + /* generate Event element for event 1, 2 and 3 */ + drvdata->eventctrl1 |= val & (BIT(0) | BIT(1) | BIT(2)); + break; + case 0x3: + /* generate Event element for all 4 events */ + drvdata->eventctrl1 |= val & 0xF; + break; + default: + break; + } + spin_unlock(&drvdata->spinlock); + return size; +} +static DEVICE_ATTR_RW(event_instren); + +static ssize_t event_ts_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + unsigned long val; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + val = drvdata->ts_ctrl; + return scnprintf(buf, PAGE_SIZE, "%#lx\n", val); +} + +static ssize_t event_ts_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + unsigned long val; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + if (kstrtoul(buf, 16, &val)) + return -EINVAL; + if (!drvdata->ts_size) + return -EINVAL; + + drvdata->ts_ctrl = val & ETMv4_EVENT_MASK; + return size; +} +static DEVICE_ATTR_RW(event_ts); + +static ssize_t syncfreq_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + unsigned long val; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + val = drvdata->syncfreq; + return scnprintf(buf, PAGE_SIZE, "%#lx\n", val); +} + +static ssize_t syncfreq_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + unsigned long val; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + if (kstrtoul(buf, 16, &val)) + return -EINVAL; + if (drvdata->syncpr == true) + return -EINVAL; + + drvdata->syncfreq = val & ETMv4_SYNC_MASK; + return size; +} +static DEVICE_ATTR_RW(syncfreq); + +static ssize_t cyc_threshold_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + unsigned long val; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + val = drvdata->ccctlr; + return scnprintf(buf, PAGE_SIZE, "%#lx\n", val); +} + +static ssize_t cyc_threshold_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + unsigned long val; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + if (kstrtoul(buf, 16, &val)) + return -EINVAL; + if (val < drvdata->ccitmin) + return -EINVAL; + + drvdata->ccctlr = val & ETM_CYC_THRESHOLD_MASK; + return size; +} +static DEVICE_ATTR_RW(cyc_threshold); + +static ssize_t bb_ctrl_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + unsigned long val; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + val = drvdata->bb_ctrl; + return scnprintf(buf, PAGE_SIZE, "%#lx\n", val); +} + +static ssize_t bb_ctrl_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + unsigned long val; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + if (kstrtoul(buf, 16, &val)) + return -EINVAL; + if (drvdata->trcbb == false) + return -EINVAL; + if (!drvdata->nr_addr_cmp) + return -EINVAL; + /* + * Bit[7:0] selects which address range comparator is used for + * branch broadcast control. + */ + if (BMVAL(val, 0, 7) > drvdata->nr_addr_cmp) + return -EINVAL; + + drvdata->bb_ctrl = val; + return size; +} +static DEVICE_ATTR_RW(bb_ctrl); + +static ssize_t event_vinst_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + unsigned long val; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + val = drvdata->vinst_ctrl & ETMv4_EVENT_MASK; + return scnprintf(buf, PAGE_SIZE, "%#lx\n", val); +} + +static ssize_t event_vinst_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + unsigned long val; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + if (kstrtoul(buf, 16, &val)) + return -EINVAL; + + spin_lock(&drvdata->spinlock); + val &= ETMv4_EVENT_MASK; + drvdata->vinst_ctrl &= ~ETMv4_EVENT_MASK; + drvdata->vinst_ctrl |= val; + spin_unlock(&drvdata->spinlock); + return size; +} +static DEVICE_ATTR_RW(event_vinst); + +static ssize_t s_exlevel_vinst_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + unsigned long val; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + val = BMVAL(drvdata->vinst_ctrl, 16, 19); + return scnprintf(buf, PAGE_SIZE, "%#lx\n", val); +} + +static ssize_t s_exlevel_vinst_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + unsigned long val; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + if (kstrtoul(buf, 16, &val)) + return -EINVAL; + + spin_lock(&drvdata->spinlock); + /* clear all EXLEVEL_S bits (bit[18] is never implemented) */ + drvdata->vinst_ctrl &= ~(BIT(16) | BIT(17) | BIT(19)); + /* enable instruction tracing for corresponding exception level */ + val &= drvdata->s_ex_level; + drvdata->vinst_ctrl |= (val << 16); + spin_unlock(&drvdata->spinlock); + return size; +} +static DEVICE_ATTR_RW(s_exlevel_vinst); + +static ssize_t ns_exlevel_vinst_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + unsigned long val; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + /* EXLEVEL_NS, bits[23:20] */ + val = BMVAL(drvdata->vinst_ctrl, 20, 23); + return scnprintf(buf, PAGE_SIZE, "%#lx\n", val); +} + +static ssize_t ns_exlevel_vinst_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + unsigned long val; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + if (kstrtoul(buf, 16, &val)) + return -EINVAL; + + spin_lock(&drvdata->spinlock); + /* clear EXLEVEL_NS bits (bit[23] is never implemented */ + drvdata->vinst_ctrl &= ~(BIT(20) | BIT(21) | BIT(22)); + /* enable instruction tracing for corresponding exception level */ + val &= drvdata->ns_ex_level; + drvdata->vinst_ctrl |= (val << 20); + spin_unlock(&drvdata->spinlock); + return size; +} +static DEVICE_ATTR_RW(ns_exlevel_vinst); + +static ssize_t addr_idx_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + unsigned long val; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + val = drvdata->addr_idx; + return scnprintf(buf, PAGE_SIZE, "%#lx\n", val); +} + +static ssize_t addr_idx_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + unsigned long val; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + if (kstrtoul(buf, 16, &val)) + return -EINVAL; + if (val >= drvdata->nr_addr_cmp * 2) + return -EINVAL; + + /* + * Use spinlock to ensure index doesn't change while it gets + * dereferenced multiple times within a spinlock block elsewhere. + */ + spin_lock(&drvdata->spinlock); + drvdata->addr_idx = val; + spin_unlock(&drvdata->spinlock); + return size; +} +static DEVICE_ATTR_RW(addr_idx); + +static ssize_t addr_instdatatype_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + ssize_t len; + u8 val, idx; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + spin_lock(&drvdata->spinlock); + idx = drvdata->addr_idx; + val = BMVAL(drvdata->addr_acc[idx], 0, 1); + len = scnprintf(buf, PAGE_SIZE, "%s\n", + val == ETM_INSTR_ADDR ? "instr" : + (val == ETM_DATA_LOAD_ADDR ? "data_load" : + (val == ETM_DATA_STORE_ADDR ? "data_store" : + "data_load_store"))); + spin_unlock(&drvdata->spinlock); + return len; +} + +static ssize_t addr_instdatatype_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + u8 idx; + char str[20] = ""; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + if (strlen(buf) >= 20) + return -EINVAL; + if (sscanf(buf, "%s", str) != 1) + return -EINVAL; + + spin_lock(&drvdata->spinlock); + idx = drvdata->addr_idx; + if (!strcmp(str, "instr")) + /* TYPE, bits[1:0] */ + drvdata->addr_acc[idx] &= ~(BIT(0) | BIT(1)); + + spin_unlock(&drvdata->spinlock); + return size; +} +static DEVICE_ATTR_RW(addr_instdatatype); + +static ssize_t addr_single_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + u8 idx; + unsigned long val; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + idx = drvdata->addr_idx; + spin_lock(&drvdata->spinlock); + if (!(drvdata->addr_type[idx] == ETM_ADDR_TYPE_NONE || + drvdata->addr_type[idx] == ETM_ADDR_TYPE_SINGLE)) { + spin_unlock(&drvdata->spinlock); + return -EPERM; + } + val = (unsigned long)drvdata->addr_val[idx]; + spin_unlock(&drvdata->spinlock); + return scnprintf(buf, PAGE_SIZE, "%#lx\n", val); +} + +static ssize_t addr_single_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + u8 idx; + unsigned long val; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + if (kstrtoul(buf, 16, &val)) + return -EINVAL; + + spin_lock(&drvdata->spinlock); + idx = drvdata->addr_idx; + if (!(drvdata->addr_type[idx] == ETM_ADDR_TYPE_NONE || + drvdata->addr_type[idx] == ETM_ADDR_TYPE_SINGLE)) { + spin_unlock(&drvdata->spinlock); + return -EPERM; + } + + drvdata->addr_val[idx] = (u64)val; + drvdata->addr_type[idx] = ETM_ADDR_TYPE_SINGLE; + spin_unlock(&drvdata->spinlock); + return size; +} +static DEVICE_ATTR_RW(addr_single); + +static ssize_t addr_range_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + u8 idx; + unsigned long val1, val2; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + spin_lock(&drvdata->spinlock); + idx = drvdata->addr_idx; + if (idx % 2 != 0) { + spin_unlock(&drvdata->spinlock); + return -EPERM; + } + if (!((drvdata->addr_type[idx] == ETM_ADDR_TYPE_NONE && + drvdata->addr_type[idx + 1] == ETM_ADDR_TYPE_NONE) || + (drvdata->addr_type[idx] == ETM_ADDR_TYPE_RANGE && + drvdata->addr_type[idx + 1] == ETM_ADDR_TYPE_RANGE))) { + spin_unlock(&drvdata->spinlock); + return -EPERM; + } + + val1 = (unsigned long)drvdata->addr_val[idx]; + val2 = (unsigned long)drvdata->addr_val[idx + 1]; + spin_unlock(&drvdata->spinlock); + return scnprintf(buf, PAGE_SIZE, "%#lx %#lx\n", val1, val2); +} + +static ssize_t addr_range_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + u8 idx; + unsigned long val1, val2; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + if (sscanf(buf, "%lx %lx", &val1, &val2) != 2) + return -EINVAL; + /* lower address comparator cannot have a higher address value */ + if (val1 > val2) + return -EINVAL; + + spin_lock(&drvdata->spinlock); + idx = drvdata->addr_idx; + if (idx % 2 != 0) { + spin_unlock(&drvdata->spinlock); + return -EPERM; + } + + if (!((drvdata->addr_type[idx] == ETM_ADDR_TYPE_NONE && + drvdata->addr_type[idx + 1] == ETM_ADDR_TYPE_NONE) || + (drvdata->addr_type[idx] == ETM_ADDR_TYPE_RANGE && + drvdata->addr_type[idx + 1] == ETM_ADDR_TYPE_RANGE))) { + spin_unlock(&drvdata->spinlock); + return -EPERM; + } + + drvdata->addr_val[idx] = (u64)val1; + drvdata->addr_type[idx] = ETM_ADDR_TYPE_RANGE; + drvdata->addr_val[idx + 1] = (u64)val2; + drvdata->addr_type[idx + 1] = ETM_ADDR_TYPE_RANGE; + /* + * Program include or exclude control bits for vinst or vdata + * whenever we change addr comparators to ETM_ADDR_TYPE_RANGE + */ + if (drvdata->mode & ETM_MODE_EXCLUDE) + etm4_set_mode_exclude(drvdata, true); + else + etm4_set_mode_exclude(drvdata, false); + + spin_unlock(&drvdata->spinlock); + return size; +} +static DEVICE_ATTR_RW(addr_range); + +static ssize_t addr_start_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + u8 idx; + unsigned long val; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + spin_lock(&drvdata->spinlock); + idx = drvdata->addr_idx; + + if (!(drvdata->addr_type[idx] == ETM_ADDR_TYPE_NONE || + drvdata->addr_type[idx] == ETM_ADDR_TYPE_START)) { + spin_unlock(&drvdata->spinlock); + return -EPERM; + } + + val = (unsigned long)drvdata->addr_val[idx]; + spin_unlock(&drvdata->spinlock); + return scnprintf(buf, PAGE_SIZE, "%#lx\n", val); +} + +static ssize_t addr_start_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + u8 idx; + unsigned long val; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + if (kstrtoul(buf, 16, &val)) + return -EINVAL; + + spin_lock(&drvdata->spinlock); + idx = drvdata->addr_idx; + if (!drvdata->nr_addr_cmp) { + spin_unlock(&drvdata->spinlock); + return -EINVAL; + } + if (!(drvdata->addr_type[idx] == ETM_ADDR_TYPE_NONE || + drvdata->addr_type[idx] == ETM_ADDR_TYPE_START)) { + spin_unlock(&drvdata->spinlock); + return -EPERM; + } + + drvdata->addr_val[idx] = (u64)val; + drvdata->addr_type[idx] = ETM_ADDR_TYPE_START; + drvdata->vissctlr |= BIT(idx); + /* SSSTATUS, bit[9] - turn on start/stop logic */ + drvdata->vinst_ctrl |= BIT(9); + spin_unlock(&drvdata->spinlock); + return size; +} +static DEVICE_ATTR_RW(addr_start); + +static ssize_t addr_stop_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + u8 idx; + unsigned long val; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + spin_lock(&drvdata->spinlock); + idx = drvdata->addr_idx; + + if (!(drvdata->addr_type[idx] == ETM_ADDR_TYPE_NONE || + drvdata->addr_type[idx] == ETM_ADDR_TYPE_STOP)) { + spin_unlock(&drvdata->spinlock); + return -EPERM; + } + + val = (unsigned long)drvdata->addr_val[idx]; + spin_unlock(&drvdata->spinlock); + return scnprintf(buf, PAGE_SIZE, "%#lx\n", val); +} + +static ssize_t addr_stop_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + u8 idx; + unsigned long val; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + if (kstrtoul(buf, 16, &val)) + return -EINVAL; + + spin_lock(&drvdata->spinlock); + idx = drvdata->addr_idx; + if (!drvdata->nr_addr_cmp) { + spin_unlock(&drvdata->spinlock); + return -EINVAL; + } + if (!(drvdata->addr_type[idx] == ETM_ADDR_TYPE_NONE || + drvdata->addr_type[idx] == ETM_ADDR_TYPE_STOP)) { + spin_unlock(&drvdata->spinlock); + return -EPERM; + } + + drvdata->addr_val[idx] = (u64)val; + drvdata->addr_type[idx] = ETM_ADDR_TYPE_STOP; + drvdata->vissctlr |= BIT(idx + 16); + /* SSSTATUS, bit[9] - turn on start/stop logic */ + drvdata->vinst_ctrl |= BIT(9); + spin_unlock(&drvdata->spinlock); + return size; +} +static DEVICE_ATTR_RW(addr_stop); + +static ssize_t addr_ctxtype_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + ssize_t len; + u8 idx, val; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + spin_lock(&drvdata->spinlock); + idx = drvdata->addr_idx; + /* CONTEXTTYPE, bits[3:2] */ + val = BMVAL(drvdata->addr_acc[idx], 2, 3); + len = scnprintf(buf, PAGE_SIZE, "%s\n", val == ETM_CTX_NONE ? "none" : + (val == ETM_CTX_CTXID ? "ctxid" : + (val == ETM_CTX_VMID ? "vmid" : "all"))); + spin_unlock(&drvdata->spinlock); + return len; +} + +static ssize_t addr_ctxtype_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + u8 idx; + char str[10] = ""; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + if (strlen(buf) >= 10) + return -EINVAL; + if (sscanf(buf, "%s", str) != 1) + return -EINVAL; + + spin_lock(&drvdata->spinlock); + idx = drvdata->addr_idx; + if (!strcmp(str, "none")) + /* start by clearing context type bits */ + drvdata->addr_acc[idx] &= ~(BIT(2) | BIT(3)); + else if (!strcmp(str, "ctxid")) { + /* 0b01 The trace unit performs a Context ID */ + if (drvdata->numcidc) { + drvdata->addr_acc[idx] |= BIT(2); + drvdata->addr_acc[idx] &= ~BIT(3); + } + } else if (!strcmp(str, "vmid")) { + /* 0b10 The trace unit performs a VMID */ + if (drvdata->numvmidc) { + drvdata->addr_acc[idx] &= ~BIT(2); + drvdata->addr_acc[idx] |= BIT(3); + } + } else if (!strcmp(str, "all")) { + /* + * 0b11 The trace unit performs a Context ID + * comparison and a VMID + */ + if (drvdata->numcidc) + drvdata->addr_acc[idx] |= BIT(2); + if (drvdata->numvmidc) + drvdata->addr_acc[idx] |= BIT(3); + } + spin_unlock(&drvdata->spinlock); + return size; +} +static DEVICE_ATTR_RW(addr_ctxtype); + +static ssize_t addr_context_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + u8 idx; + unsigned long val; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + spin_lock(&drvdata->spinlock); + idx = drvdata->addr_idx; + /* context ID comparator bits[6:4] */ + val = BMVAL(drvdata->addr_acc[idx], 4, 6); + spin_unlock(&drvdata->spinlock); + return scnprintf(buf, PAGE_SIZE, "%#lx\n", val); +} + +static ssize_t addr_context_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + u8 idx; + unsigned long val; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + if (kstrtoul(buf, 16, &val)) + return -EINVAL; + if ((drvdata->numcidc <= 1) && (drvdata->numvmidc <= 1)) + return -EINVAL; + if (val >= (drvdata->numcidc >= drvdata->numvmidc ? + drvdata->numcidc : drvdata->numvmidc)) + return -EINVAL; + + spin_lock(&drvdata->spinlock); + idx = drvdata->addr_idx; + /* clear context ID comparator bits[6:4] */ + drvdata->addr_acc[idx] &= ~(BIT(4) | BIT(5) | BIT(6)); + drvdata->addr_acc[idx] |= (val << 4); + spin_unlock(&drvdata->spinlock); + return size; +} +static DEVICE_ATTR_RW(addr_context); + +static ssize_t seq_idx_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + unsigned long val; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + val = drvdata->seq_idx; + return scnprintf(buf, PAGE_SIZE, "%#lx\n", val); +} + +static ssize_t seq_idx_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + unsigned long val; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + if (kstrtoul(buf, 16, &val)) + return -EINVAL; + if (val >= drvdata->nrseqstate - 1) + return -EINVAL; + + /* + * Use spinlock to ensure index doesn't change while it gets + * dereferenced multiple times within a spinlock block elsewhere. + */ + spin_lock(&drvdata->spinlock); + drvdata->seq_idx = val; + spin_unlock(&drvdata->spinlock); + return size; +} +static DEVICE_ATTR_RW(seq_idx); + +static ssize_t seq_state_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + unsigned long val; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + val = drvdata->seq_state; + return scnprintf(buf, PAGE_SIZE, "%#lx\n", val); +} + +static ssize_t seq_state_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + unsigned long val; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + if (kstrtoul(buf, 16, &val)) + return -EINVAL; + if (val >= drvdata->nrseqstate) + return -EINVAL; + + drvdata->seq_state = val; + return size; +} +static DEVICE_ATTR_RW(seq_state); + +static ssize_t seq_event_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + u8 idx; + unsigned long val; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + spin_lock(&drvdata->spinlock); + idx = drvdata->seq_idx; + val = drvdata->seq_ctrl[idx]; + spin_unlock(&drvdata->spinlock); + return scnprintf(buf, PAGE_SIZE, "%#lx\n", val); +} + +static ssize_t seq_event_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + u8 idx; + unsigned long val; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + if (kstrtoul(buf, 16, &val)) + return -EINVAL; + + spin_lock(&drvdata->spinlock); + idx = drvdata->seq_idx; + /* RST, bits[7:0] */ + drvdata->seq_ctrl[idx] = val & 0xFF; + spin_unlock(&drvdata->spinlock); + return size; +} +static DEVICE_ATTR_RW(seq_event); + +static ssize_t seq_reset_event_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + unsigned long val; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + val = drvdata->seq_rst; + return scnprintf(buf, PAGE_SIZE, "%#lx\n", val); +} + +static ssize_t seq_reset_event_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + unsigned long val; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + if (kstrtoul(buf, 16, &val)) + return -EINVAL; + if (!(drvdata->nrseqstate)) + return -EINVAL; + + drvdata->seq_rst = val & ETMv4_EVENT_MASK; + return size; +} +static DEVICE_ATTR_RW(seq_reset_event); + +static ssize_t cntr_idx_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + unsigned long val; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + val = drvdata->cntr_idx; + return scnprintf(buf, PAGE_SIZE, "%#lx\n", val); +} + +static ssize_t cntr_idx_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + unsigned long val; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + if (kstrtoul(buf, 16, &val)) + return -EINVAL; + if (val >= drvdata->nr_cntr) + return -EINVAL; + + /* + * Use spinlock to ensure index doesn't change while it gets + * dereferenced multiple times within a spinlock block elsewhere. + */ + spin_lock(&drvdata->spinlock); + drvdata->cntr_idx = val; + spin_unlock(&drvdata->spinlock); + return size; +} +static DEVICE_ATTR_RW(cntr_idx); + +static ssize_t cntrldvr_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + u8 idx; + unsigned long val; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + spin_lock(&drvdata->spinlock); + idx = drvdata->cntr_idx; + val = drvdata->cntrldvr[idx]; + spin_unlock(&drvdata->spinlock); + return scnprintf(buf, PAGE_SIZE, "%#lx\n", val); +} + +static ssize_t cntrldvr_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + u8 idx; + unsigned long val; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + if (kstrtoul(buf, 16, &val)) + return -EINVAL; + if (val > ETM_CNTR_MAX_VAL) + return -EINVAL; + + spin_lock(&drvdata->spinlock); + idx = drvdata->cntr_idx; + drvdata->cntrldvr[idx] = val; + spin_unlock(&drvdata->spinlock); + return size; +} +static DEVICE_ATTR_RW(cntrldvr); + +static ssize_t cntr_val_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + u8 idx; + unsigned long val; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + spin_lock(&drvdata->spinlock); + idx = drvdata->cntr_idx; + val = drvdata->cntr_val[idx]; + spin_unlock(&drvdata->spinlock); + return scnprintf(buf, PAGE_SIZE, "%#lx\n", val); +} + +static ssize_t cntr_val_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + u8 idx; + unsigned long val; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + if (kstrtoul(buf, 16, &val)) + return -EINVAL; + if (val > ETM_CNTR_MAX_VAL) + return -EINVAL; + + spin_lock(&drvdata->spinlock); + idx = drvdata->cntr_idx; + drvdata->cntr_val[idx] = val; + spin_unlock(&drvdata->spinlock); + return size; +} +static DEVICE_ATTR_RW(cntr_val); + +static ssize_t cntr_ctrl_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + u8 idx; + unsigned long val; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + spin_lock(&drvdata->spinlock); + idx = drvdata->cntr_idx; + val = drvdata->cntr_ctrl[idx]; + spin_unlock(&drvdata->spinlock); + return scnprintf(buf, PAGE_SIZE, "%#lx\n", val); +} + +static ssize_t cntr_ctrl_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + u8 idx; + unsigned long val; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + if (kstrtoul(buf, 16, &val)) + return -EINVAL; + + spin_lock(&drvdata->spinlock); + idx = drvdata->cntr_idx; + drvdata->cntr_ctrl[idx] = val; + spin_unlock(&drvdata->spinlock); + return size; +} +static DEVICE_ATTR_RW(cntr_ctrl); + +static ssize_t res_idx_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + unsigned long val; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + val = drvdata->res_idx; + return scnprintf(buf, PAGE_SIZE, "%#lx\n", val); +} + +static ssize_t res_idx_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + unsigned long val; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + if (kstrtoul(buf, 16, &val)) + return -EINVAL; + /* Resource selector pair 0 is always implemented and reserved */ + if (val < 2 || val >= drvdata->nr_resource * 2) + return -EINVAL; + + /* + * Use spinlock to ensure index doesn't change while it gets + * dereferenced multiple times within a spinlock block elsewhere. + */ + spin_lock(&drvdata->spinlock); + drvdata->res_idx = val; + spin_unlock(&drvdata->spinlock); + return size; +} +static DEVICE_ATTR_RW(res_idx); + +static ssize_t res_ctrl_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + u8 idx; + unsigned long val; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + spin_lock(&drvdata->spinlock); + idx = drvdata->res_idx; + val = drvdata->res_ctrl[idx]; + spin_unlock(&drvdata->spinlock); + return scnprintf(buf, PAGE_SIZE, "%#lx\n", val); +} + +static ssize_t res_ctrl_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + u8 idx; + unsigned long val; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + if (kstrtoul(buf, 16, &val)) + return -EINVAL; + + spin_lock(&drvdata->spinlock); + idx = drvdata->res_idx; + /* For odd idx pair inversal bit is RES0 */ + if (idx % 2 != 0) + /* PAIRINV, bit[21] */ + val &= ~BIT(21); + drvdata->res_ctrl[idx] = val; + spin_unlock(&drvdata->spinlock); + return size; +} +static DEVICE_ATTR_RW(res_ctrl); + +static ssize_t ctxid_idx_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + unsigned long val; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + val = drvdata->ctxid_idx; + return scnprintf(buf, PAGE_SIZE, "%#lx\n", val); +} + +static ssize_t ctxid_idx_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + unsigned long val; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + if (kstrtoul(buf, 16, &val)) + return -EINVAL; + if (val >= drvdata->numcidc) + return -EINVAL; + + /* + * Use spinlock to ensure index doesn't change while it gets + * dereferenced multiple times within a spinlock block elsewhere. + */ + spin_lock(&drvdata->spinlock); + drvdata->ctxid_idx = val; + spin_unlock(&drvdata->spinlock); + return size; +} +static DEVICE_ATTR_RW(ctxid_idx); + +static ssize_t ctxid_pid_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + u8 idx; + unsigned long val; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + spin_lock(&drvdata->spinlock); + idx = drvdata->ctxid_idx; + val = (unsigned long)drvdata->ctxid_vpid[idx]; + spin_unlock(&drvdata->spinlock); + return scnprintf(buf, PAGE_SIZE, "%#lx\n", val); +} + +static ssize_t ctxid_pid_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + u8 idx; + unsigned long vpid, pid; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + /* + * only implemented when ctxid tracing is enabled, i.e. at least one + * ctxid comparator is implemented and ctxid is greater than 0 bits + * in length + */ + if (!drvdata->ctxid_size || !drvdata->numcidc) + return -EINVAL; + if (kstrtoul(buf, 16, &vpid)) + return -EINVAL; + + pid = coresight_vpid_to_pid(vpid); + + spin_lock(&drvdata->spinlock); + idx = drvdata->ctxid_idx; + drvdata->ctxid_pid[idx] = (u64)pid; + drvdata->ctxid_vpid[idx] = (u64)vpid; + spin_unlock(&drvdata->spinlock); + return size; +} +static DEVICE_ATTR_RW(ctxid_pid); + +static ssize_t ctxid_masks_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + unsigned long val1, val2; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + spin_lock(&drvdata->spinlock); + val1 = drvdata->ctxid_mask0; + val2 = drvdata->ctxid_mask1; + spin_unlock(&drvdata->spinlock); + return scnprintf(buf, PAGE_SIZE, "%#lx %#lx\n", val1, val2); +} + +static ssize_t ctxid_masks_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + u8 i, j, maskbyte; + unsigned long val1, val2, mask; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + /* + * only implemented when ctxid tracing is enabled, i.e. at least one + * ctxid comparator is implemented and ctxid is greater than 0 bits + * in length + */ + if (!drvdata->ctxid_size || !drvdata->numcidc) + return -EINVAL; + if (sscanf(buf, "%lx %lx", &val1, &val2) != 2) + return -EINVAL; + + spin_lock(&drvdata->spinlock); + /* + * each byte[0..3] controls mask value applied to ctxid + * comparator[0..3] + */ + switch (drvdata->numcidc) { + case 0x1: + /* COMP0, bits[7:0] */ + drvdata->ctxid_mask0 = val1 & 0xFF; + break; + case 0x2: + /* COMP1, bits[15:8] */ + drvdata->ctxid_mask0 = val1 & 0xFFFF; + break; + case 0x3: + /* COMP2, bits[23:16] */ + drvdata->ctxid_mask0 = val1 & 0xFFFFFF; + break; + case 0x4: + /* COMP3, bits[31:24] */ + drvdata->ctxid_mask0 = val1; + break; + case 0x5: + /* COMP4, bits[7:0] */ + drvdata->ctxid_mask0 = val1; + drvdata->ctxid_mask1 = val2 & 0xFF; + break; + case 0x6: + /* COMP5, bits[15:8] */ + drvdata->ctxid_mask0 = val1; + drvdata->ctxid_mask1 = val2 & 0xFFFF; + break; + case 0x7: + /* COMP6, bits[23:16] */ + drvdata->ctxid_mask0 = val1; + drvdata->ctxid_mask1 = val2 & 0xFFFFFF; + break; + case 0x8: + /* COMP7, bits[31:24] */ + drvdata->ctxid_mask0 = val1; + drvdata->ctxid_mask1 = val2; + break; + default: + break; + } + /* + * If software sets a mask bit to 1, it must program relevant byte + * of ctxid comparator value 0x0, otherwise behavior is unpredictable. + * For example, if bit[3] of ctxid_mask0 is 1, we must clear bits[31:24] + * of ctxid comparator0 value (corresponding to byte 0) register. + */ + mask = drvdata->ctxid_mask0; + for (i = 0; i < drvdata->numcidc; i++) { + /* mask value of corresponding ctxid comparator */ + maskbyte = mask & ETMv4_EVENT_MASK; + /* + * each bit corresponds to a byte of respective ctxid comparator + * value register + */ + for (j = 0; j < 8; j++) { + if (maskbyte & 1) + drvdata->ctxid_pid[i] &= ~(0xFF << (j * 8)); + maskbyte >>= 1; + } + /* Select the next ctxid comparator mask value */ + if (i == 3) + /* ctxid comparators[4-7] */ + mask = drvdata->ctxid_mask1; + else + mask >>= 0x8; + } + + spin_unlock(&drvdata->spinlock); + return size; +} +static DEVICE_ATTR_RW(ctxid_masks); + +static ssize_t vmid_idx_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + unsigned long val; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + val = drvdata->vmid_idx; + return scnprintf(buf, PAGE_SIZE, "%#lx\n", val); +} + +static ssize_t vmid_idx_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + unsigned long val; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + if (kstrtoul(buf, 16, &val)) + return -EINVAL; + if (val >= drvdata->numvmidc) + return -EINVAL; + + /* + * Use spinlock to ensure index doesn't change while it gets + * dereferenced multiple times within a spinlock block elsewhere. + */ + spin_lock(&drvdata->spinlock); + drvdata->vmid_idx = val; + spin_unlock(&drvdata->spinlock); + return size; +} +static DEVICE_ATTR_RW(vmid_idx); + +static ssize_t vmid_val_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + unsigned long val; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + val = (unsigned long)drvdata->vmid_val[drvdata->vmid_idx]; + return scnprintf(buf, PAGE_SIZE, "%#lx\n", val); +} + +static ssize_t vmid_val_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + unsigned long val; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + /* + * only implemented when vmid tracing is enabled, i.e. at least one + * vmid comparator is implemented and at least 8 bit vmid size + */ + if (!drvdata->vmid_size || !drvdata->numvmidc) + return -EINVAL; + if (kstrtoul(buf, 16, &val)) + return -EINVAL; + + spin_lock(&drvdata->spinlock); + drvdata->vmid_val[drvdata->vmid_idx] = (u64)val; + spin_unlock(&drvdata->spinlock); + return size; +} +static DEVICE_ATTR_RW(vmid_val); + +static ssize_t vmid_masks_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + unsigned long val1, val2; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + spin_lock(&drvdata->spinlock); + val1 = drvdata->vmid_mask0; + val2 = drvdata->vmid_mask1; + spin_unlock(&drvdata->spinlock); + return scnprintf(buf, PAGE_SIZE, "%#lx %#lx\n", val1, val2); +} + +static ssize_t vmid_masks_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + u8 i, j, maskbyte; + unsigned long val1, val2, mask; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + /* + * only implemented when vmid tracing is enabled, i.e. at least one + * vmid comparator is implemented and at least 8 bit vmid size + */ + if (!drvdata->vmid_size || !drvdata->numvmidc) + return -EINVAL; + if (sscanf(buf, "%lx %lx", &val1, &val2) != 2) + return -EINVAL; + + spin_lock(&drvdata->spinlock); + + /* + * each byte[0..3] controls mask value applied to vmid + * comparator[0..3] + */ + switch (drvdata->numvmidc) { + case 0x1: + /* COMP0, bits[7:0] */ + drvdata->vmid_mask0 = val1 & 0xFF; + break; + case 0x2: + /* COMP1, bits[15:8] */ + drvdata->vmid_mask0 = val1 & 0xFFFF; + break; + case 0x3: + /* COMP2, bits[23:16] */ + drvdata->vmid_mask0 = val1 & 0xFFFFFF; + break; + case 0x4: + /* COMP3, bits[31:24] */ + drvdata->vmid_mask0 = val1; + break; + case 0x5: + /* COMP4, bits[7:0] */ + drvdata->vmid_mask0 = val1; + drvdata->vmid_mask1 = val2 & 0xFF; + break; + case 0x6: + /* COMP5, bits[15:8] */ + drvdata->vmid_mask0 = val1; + drvdata->vmid_mask1 = val2 & 0xFFFF; + break; + case 0x7: + /* COMP6, bits[23:16] */ + drvdata->vmid_mask0 = val1; + drvdata->vmid_mask1 = val2 & 0xFFFFFF; + break; + case 0x8: + /* COMP7, bits[31:24] */ + drvdata->vmid_mask0 = val1; + drvdata->vmid_mask1 = val2; + break; + default: + break; + } + + /* + * If software sets a mask bit to 1, it must program relevant byte + * of vmid comparator value 0x0, otherwise behavior is unpredictable. + * For example, if bit[3] of vmid_mask0 is 1, we must clear bits[31:24] + * of vmid comparator0 value (corresponding to byte 0) register. + */ + mask = drvdata->vmid_mask0; + for (i = 0; i < drvdata->numvmidc; i++) { + /* mask value of corresponding vmid comparator */ + maskbyte = mask & ETMv4_EVENT_MASK; + /* + * each bit corresponds to a byte of respective vmid comparator + * value register + */ + for (j = 0; j < 8; j++) { + if (maskbyte & 1) + drvdata->vmid_val[i] &= ~(0xFF << (j * 8)); + maskbyte >>= 1; + } + /* Select the next vmid comparator mask value */ + if (i == 3) + /* vmid comparators[4-7] */ + mask = drvdata->vmid_mask1; + else + mask >>= 0x8; + } + spin_unlock(&drvdata->spinlock); + return size; +} +static DEVICE_ATTR_RW(vmid_masks); + +static ssize_t cpu_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int val; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + val = drvdata->cpu; + return scnprintf(buf, PAGE_SIZE, "%d\n", val); + +} +static DEVICE_ATTR_RO(cpu); + +static struct attribute *coresight_etmv4_attrs[] = { + &dev_attr_nr_pe_cmp.attr, + &dev_attr_nr_addr_cmp.attr, + &dev_attr_nr_cntr.attr, + &dev_attr_nr_ext_inp.attr, + &dev_attr_numcidc.attr, + &dev_attr_numvmidc.attr, + &dev_attr_nrseqstate.attr, + &dev_attr_nr_resource.attr, + &dev_attr_nr_ss_cmp.attr, + &dev_attr_reset.attr, + &dev_attr_mode.attr, + &dev_attr_pe.attr, + &dev_attr_event.attr, + &dev_attr_event_instren.attr, + &dev_attr_event_ts.attr, + &dev_attr_syncfreq.attr, + &dev_attr_cyc_threshold.attr, + &dev_attr_bb_ctrl.attr, + &dev_attr_event_vinst.attr, + &dev_attr_s_exlevel_vinst.attr, + &dev_attr_ns_exlevel_vinst.attr, + &dev_attr_addr_idx.attr, + &dev_attr_addr_instdatatype.attr, + &dev_attr_addr_single.attr, + &dev_attr_addr_range.attr, + &dev_attr_addr_start.attr, + &dev_attr_addr_stop.attr, + &dev_attr_addr_ctxtype.attr, + &dev_attr_addr_context.attr, + &dev_attr_seq_idx.attr, + &dev_attr_seq_state.attr, + &dev_attr_seq_event.attr, + &dev_attr_seq_reset_event.attr, + &dev_attr_cntr_idx.attr, + &dev_attr_cntrldvr.attr, + &dev_attr_cntr_val.attr, + &dev_attr_cntr_ctrl.attr, + &dev_attr_res_idx.attr, + &dev_attr_res_ctrl.attr, + &dev_attr_ctxid_idx.attr, + &dev_attr_ctxid_pid.attr, + &dev_attr_ctxid_masks.attr, + &dev_attr_vmid_idx.attr, + &dev_attr_vmid_val.attr, + &dev_attr_vmid_masks.attr, + &dev_attr_cpu.attr, + NULL, +}; + +#define coresight_simple_func(name, offset) \ +static ssize_t name##_show(struct device *_dev, \ + struct device_attribute *attr, char *buf) \ +{ \ + struct etmv4_drvdata *drvdata = dev_get_drvdata(_dev->parent); \ + return scnprintf(buf, PAGE_SIZE, "0x%x\n", \ + readl_relaxed(drvdata->base + offset)); \ +} \ +static DEVICE_ATTR_RO(name) + +coresight_simple_func(trcoslsr, TRCOSLSR); +coresight_simple_func(trcpdcr, TRCPDCR); +coresight_simple_func(trcpdsr, TRCPDSR); +coresight_simple_func(trclsr, TRCLSR); +coresight_simple_func(trcauthstatus, TRCAUTHSTATUS); +coresight_simple_func(trcdevid, TRCDEVID); +coresight_simple_func(trcdevtype, TRCDEVTYPE); +coresight_simple_func(trcpidr0, TRCPIDR0); +coresight_simple_func(trcpidr1, TRCPIDR1); +coresight_simple_func(trcpidr2, TRCPIDR2); +coresight_simple_func(trcpidr3, TRCPIDR3); + +static struct attribute *coresight_etmv4_mgmt_attrs[] = { + &dev_attr_trcoslsr.attr, + &dev_attr_trcpdcr.attr, + &dev_attr_trcpdsr.attr, + &dev_attr_trclsr.attr, + &dev_attr_trcauthstatus.attr, + &dev_attr_trcdevid.attr, + &dev_attr_trcdevtype.attr, + &dev_attr_trcpidr0.attr, + &dev_attr_trcpidr1.attr, + &dev_attr_trcpidr2.attr, + &dev_attr_trcpidr3.attr, + NULL, +}; + +coresight_simple_func(trcidr0, TRCIDR0); +coresight_simple_func(trcidr1, TRCIDR1); +coresight_simple_func(trcidr2, TRCIDR2); +coresight_simple_func(trcidr3, TRCIDR3); +coresight_simple_func(trcidr4, TRCIDR4); +coresight_simple_func(trcidr5, TRCIDR5); +/* trcidr[6,7] are reserved */ +coresight_simple_func(trcidr8, TRCIDR8); +coresight_simple_func(trcidr9, TRCIDR9); +coresight_simple_func(trcidr10, TRCIDR10); +coresight_simple_func(trcidr11, TRCIDR11); +coresight_simple_func(trcidr12, TRCIDR12); +coresight_simple_func(trcidr13, TRCIDR13); + +static struct attribute *coresight_etmv4_trcidr_attrs[] = { + &dev_attr_trcidr0.attr, + &dev_attr_trcidr1.attr, + &dev_attr_trcidr2.attr, + &dev_attr_trcidr3.attr, + &dev_attr_trcidr4.attr, + &dev_attr_trcidr5.attr, + /* trcidr[6,7] are reserved */ + &dev_attr_trcidr8.attr, + &dev_attr_trcidr9.attr, + &dev_attr_trcidr10.attr, + &dev_attr_trcidr11.attr, + &dev_attr_trcidr12.attr, + &dev_attr_trcidr13.attr, + NULL, +}; + +static const struct attribute_group coresight_etmv4_group = { + .attrs = coresight_etmv4_attrs, +}; + +static const struct attribute_group coresight_etmv4_mgmt_group = { + .attrs = coresight_etmv4_mgmt_attrs, + .name = "mgmt", +}; + +static const struct attribute_group coresight_etmv4_trcidr_group = { + .attrs = coresight_etmv4_trcidr_attrs, + .name = "trcidr", +}; + +static const struct attribute_group *coresight_etmv4_groups[] = { + &coresight_etmv4_group, + &coresight_etmv4_mgmt_group, + &coresight_etmv4_trcidr_group, + NULL, +}; + static void etm4_init_arch_data(void *info) { u32 etmidr0; @@ -408,9 +2311,6 @@ static void etm4_init_arch_data(void *info) u32 etmidr5; struct etmv4_drvdata *drvdata = info; - /* Make sure all registers are accessible */ - etm4_os_unlock(drvdata); - CS_UNLOCK(drvdata->base); /* find all capabilities of the tracing unit */ @@ -562,115 +2462,93 @@ static void etm4_init_arch_data(void *info) CS_LOCK(drvdata->base); } -static void etm4_set_default(struct etmv4_config *config) +static void etm4_init_default_data(struct etmv4_drvdata *drvdata) { - if (WARN_ON_ONCE(!config)) - return; + int i; - /* - * Make default initialisation trace everything - * - * Select the "always true" resource selector on the - * "Enablign Event" line and configure address range comparator - * '0' to trace all the possible address range. From there - * configure the "include/exclude" engine to include address - * range comparator '0'. - */ + drvdata->pe_sel = 0x0; + drvdata->cfg = (ETMv4_MODE_CTXID | ETM_MODE_VMID | + ETMv4_MODE_TIMESTAMP | ETM_MODE_RETURNSTACK); /* disable all events tracing */ - config->eventctrl0 = 0x0; - config->eventctrl1 = 0x0; + drvdata->eventctrl0 = 0x0; + drvdata->eventctrl1 = 0x0; /* disable stalling */ - config->stall_ctrl = 0x0; - - /* enable trace synchronization every 4096 bytes, if available */ - config->syncfreq = 0xC; + drvdata->stall_ctrl = 0x0; /* disable timestamp event */ - config->ts_ctrl = 0x0; + drvdata->ts_ctrl = 0x0; - /* TRCVICTLR::EVENT = 0x01, select the always on logic */ - config->vinst_ctrl |= BIT(0); + /* enable trace synchronization every 4096 bytes for trace */ + if (drvdata->syncpr == false) + drvdata->syncfreq = 0xC; /* - * TRCVICTLR::SSSTATUS == 1, the start-stop logic is - * in the started state + * enable viewInst to trace everything with start-stop logic in + * started state */ - config->vinst_ctrl |= BIT(9); - - /* - * Configure address range comparator '0' to encompass all - * possible addresses. - */ - - /* First half of default address comparator: start at address 0 */ - config->addr_val[ETM_DEFAULT_ADDR_COMP] = 0x0; - /* trace instruction addresses */ - config->addr_acc[ETM_DEFAULT_ADDR_COMP] &= ~(BIT(0) | BIT(1)); - /* EXLEVEL_NS, bits[12:15], only trace application and kernel space */ - config->addr_acc[ETM_DEFAULT_ADDR_COMP] |= ETM_EXLEVEL_NS_HYP; - /* EXLEVEL_S, bits[11:8], don't trace anything in secure state */ - config->addr_acc[ETM_DEFAULT_ADDR_COMP] |= (ETM_EXLEVEL_S_APP | - ETM_EXLEVEL_S_OS | - ETM_EXLEVEL_S_HYP); - config->addr_type[ETM_DEFAULT_ADDR_COMP] = ETM_ADDR_TYPE_RANGE; - - /* - * Second half of default address comparator: go all - * the way to the top. - */ - config->addr_val[ETM_DEFAULT_ADDR_COMP + 1] = ~0x0; - /* trace instruction addresses */ - config->addr_acc[ETM_DEFAULT_ADDR_COMP + 1] &= ~(BIT(0) | BIT(1)); - /* Address comparator type must be equal for both halves */ - config->addr_acc[ETM_DEFAULT_ADDR_COMP + 1] = - config->addr_acc[ETM_DEFAULT_ADDR_COMP]; - config->addr_type[ETM_DEFAULT_ADDR_COMP + 1] = ETM_ADDR_TYPE_RANGE; - - /* - * Configure the ViewInst function to filter on address range - * comparator '0'. - */ - config->viiectlr = BIT(0); + drvdata->vinst_ctrl |= BIT(0); + /* set initial state of start-stop logic */ + if (drvdata->nr_addr_cmp) + drvdata->vinst_ctrl |= BIT(9); + /* no address range filtering for ViewInst */ + drvdata->viiectlr = 0x0; /* no start-stop filtering for ViewInst */ - config->vissctlr = 0x0; -} + drvdata->vissctlr = 0x0; -void etm4_config_trace_mode(struct etmv4_config *config) -{ - u32 addr_acc, mode; + /* disable seq events */ + for (i = 0; i < drvdata->nrseqstate-1; i++) + drvdata->seq_ctrl[i] = 0x0; + drvdata->seq_rst = 0x0; + drvdata->seq_state = 0x0; - mode = config->mode; - mode &= (ETM_MODE_EXCL_KERN | ETM_MODE_EXCL_USER); + /* disable external input events */ + drvdata->ext_inp = 0x0; - /* excluding kernel AND user space doesn't make sense */ - WARN_ON_ONCE(mode == (ETM_MODE_EXCL_KERN | ETM_MODE_EXCL_USER)); + for (i = 0; i < drvdata->nr_cntr; i++) { + drvdata->cntrldvr[i] = 0x0; + drvdata->cntr_ctrl[i] = 0x0; + drvdata->cntr_val[i] = 0x0; + } - /* nothing to do if neither flags are set */ - if (!(mode & ETM_MODE_EXCL_KERN) && !(mode & ETM_MODE_EXCL_USER)) - return; + /* Resource selector pair 0 is always implemented and reserved */ + drvdata->res_idx = 0x2; + for (i = 2; i < drvdata->nr_resource * 2; i++) + drvdata->res_ctrl[i] = 0x0; - addr_acc = config->addr_acc[ETM_DEFAULT_ADDR_COMP]; - /* clear default config */ - addr_acc &= ~(ETM_EXLEVEL_NS_APP | ETM_EXLEVEL_NS_OS); + for (i = 0; i < drvdata->nr_ss_cmp; i++) { + drvdata->ss_ctrl[i] = 0x0; + drvdata->ss_pe_cmp[i] = 0x0; + } + + if (drvdata->nr_addr_cmp >= 1) { + drvdata->addr_val[0] = (unsigned long)_stext; + drvdata->addr_val[1] = (unsigned long)_etext; + drvdata->addr_type[0] = ETM_ADDR_TYPE_RANGE; + drvdata->addr_type[1] = ETM_ADDR_TYPE_RANGE; + } + + for (i = 0; i < drvdata->numcidc; i++) { + drvdata->ctxid_pid[i] = 0x0; + drvdata->ctxid_vpid[i] = 0x0; + } + + drvdata->ctxid_mask0 = 0x0; + drvdata->ctxid_mask1 = 0x0; + + for (i = 0; i < drvdata->numvmidc; i++) + drvdata->vmid_val[i] = 0x0; + drvdata->vmid_mask0 = 0x0; + drvdata->vmid_mask1 = 0x0; /* - * EXLEVEL_NS, bits[15:12] - * The Exception levels are: - * Bit[12] Exception level 0 - Application - * Bit[13] Exception level 1 - OS - * Bit[14] Exception level 2 - Hypervisor - * Bit[15] Never implemented + * A trace ID value of 0 is invalid, so let's start at some + * random value that fits in 7 bits. ETMv3.x has 0x10 so let's + * start at 0x20. */ - if (mode & ETM_MODE_EXCL_KERN) - addr_acc |= ETM_EXLEVEL_NS_OS; - else - addr_acc |= ETM_EXLEVEL_NS_APP; - - config->addr_acc[ETM_DEFAULT_ADDR_COMP] = addr_acc; - config->addr_acc[ETM_DEFAULT_ADDR_COMP + 1] = addr_acc; + drvdata->trcid = 0x20 + drvdata->cpu; } static int etm4_cpu_callback(struct notifier_block *nfb, unsigned long action, @@ -689,7 +2567,7 @@ static int etm4_cpu_callback(struct notifier_block *nfb, unsigned long action, etmdrvdata[cpu]->os_unlock = true; } - if (local_read(&etmdrvdata[cpu]->mode)) + if (etmdrvdata[cpu]->enable) etm4_enable_hw(etmdrvdata[cpu]); spin_unlock(&etmdrvdata[cpu]->spinlock); break; @@ -702,7 +2580,7 @@ static int etm4_cpu_callback(struct notifier_block *nfb, unsigned long action, case CPU_DYING: spin_lock(&etmdrvdata[cpu]->spinlock); - if (local_read(&etmdrvdata[cpu]->mode)) + if (etmdrvdata[cpu]->enable) etm4_disable_hw(etmdrvdata[cpu]); spin_unlock(&etmdrvdata[cpu]->spinlock); break; @@ -715,11 +2593,6 @@ static struct notifier_block etm4_cpu_notifier = { .notifier_call = etm4_cpu_callback, }; -static void etm4_init_trace_id(struct etmv4_drvdata *drvdata) -{ - drvdata->trcid = coresight_get_trace_id(drvdata->cpu); -} - static int etm4_probe(struct amba_device *adev, const struct amba_id *id) { int ret; @@ -763,6 +2636,9 @@ static int etm4_probe(struct amba_device *adev, const struct amba_id *id) get_online_cpus(); etmdrvdata[drvdata->cpu] = drvdata; + if (!smp_call_function_single(drvdata->cpu, etm4_os_unlock, drvdata, 1)) + drvdata->os_unlock = true; + if (smp_call_function_single(drvdata->cpu, etm4_init_arch_data, drvdata, 1)) dev_err(dev, "ETM arch init failed\n"); @@ -776,9 +2652,9 @@ static int etm4_probe(struct amba_device *adev, const struct amba_id *id) ret = -EINVAL; goto err_arch_supported; } + etm4_init_default_data(drvdata); - etm4_init_trace_id(drvdata); - etm4_set_default(&drvdata->config); + pm_runtime_put(&adev->dev); desc->type = CORESIGHT_DEV_TYPE_SOURCE; desc->subtype.source_subtype = CORESIGHT_DEV_SUBTYPE_SOURCE_PROC; @@ -789,16 +2665,9 @@ static int etm4_probe(struct amba_device *adev, const struct amba_id *id) drvdata->csdev = coresight_register(desc); if (IS_ERR(drvdata->csdev)) { ret = PTR_ERR(drvdata->csdev); - goto err_arch_supported; + goto err_coresight_register; } - ret = etm_perf_symlink(drvdata->csdev, true); - if (ret) { - coresight_unregister(drvdata->csdev); - goto err_arch_supported; - } - - pm_runtime_put(&adev->dev); dev_info(dev, "%s initialized\n", (char *)id->data); if (boot_enable) { @@ -809,6 +2678,8 @@ static int etm4_probe(struct amba_device *adev, const struct amba_id *id) return 0; err_arch_supported: + pm_runtime_put(&adev->dev); +err_coresight_register: if (--etm4_count == 0) unregister_hotcpu_notifier(&etm4_cpu_notifier); return ret; @@ -825,11 +2696,6 @@ static struct amba_id etm4_ids[] = { .mask = 0x000fffff, .data = "ETM 4.0", }, - { /* ETM 4.0 - A72, Maia, HiSilicon */ - .id = 0x000bb95a, - .mask = 0x000fffff, - .data = "ETM 4.0", - }, { 0, 0}, }; @@ -841,4 +2707,5 @@ static struct amba_driver etm4x_driver = { .probe = etm4_probe, .id_table = etm4_ids, }; -builtin_amba_driver(etm4x_driver); + +module_amba_driver(etm4x_driver); diff --git a/drivers/hwtracing/coresight/coresight-funnel.c b/drivers/hwtracing/coresight/coresight-funnel.c index 05df789056cc..25e8ea140a09 100644 --- a/drivers/hwtracing/coresight/coresight-funnel.c +++ b/drivers/hwtracing/coresight/coresight-funnel.c @@ -1,6 +1,4 @@ /* Copyright (c) 2011-2012, The Linux Foundation. All rights reserved. - * - * Description: CoreSight Funnel driver * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -13,6 +11,7 @@ */ #include +#include #include #include #include @@ -70,6 +69,7 @@ static int funnel_enable(struct coresight_device *csdev, int inport, { struct funnel_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); + pm_runtime_get_sync(drvdata->dev); funnel_enable_hw(drvdata, inport); dev_info(drvdata->dev, "FUNNEL inport %d enabled\n", inport); @@ -95,6 +95,7 @@ static void funnel_disable(struct coresight_device *csdev, int inport, struct funnel_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); funnel_disable_hw(drvdata, inport); + pm_runtime_put(drvdata->dev); dev_info(drvdata->dev, "FUNNEL inport %d disabled\n", inport); } @@ -221,6 +222,7 @@ static int funnel_probe(struct amba_device *adev, const struct amba_id *id) if (IS_ERR(drvdata->csdev)) return PTR_ERR(drvdata->csdev); + dev_info(dev, "FUNNEL initialized\n"); return 0; } @@ -268,4 +270,8 @@ static struct amba_driver funnel_driver = { .probe = funnel_probe, .id_table = funnel_ids, }; -builtin_amba_driver(funnel_driver); + +module_amba_driver(funnel_driver); + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("CoreSight Funnel driver"); diff --git a/drivers/hwtracing/coresight/coresight-tmc.c b/drivers/hwtracing/coresight/coresight-tmc.c index 3978cbb6b038..c4fa70ed14ce 100644 --- a/drivers/hwtracing/coresight/coresight-tmc.c +++ b/drivers/hwtracing/coresight/coresight-tmc.c @@ -1,6 +1,4 @@ /* Copyright (c) 2012, The Linux Foundation. All rights reserved. - * - * Description: CoreSight Trace Memory Controller driver * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -13,6 +11,7 @@ */ #include +#include #include #include #include @@ -30,27 +29,127 @@ #include #include "coresight-priv.h" -#include "coresight-tmc.h" -void tmc_wait_for_tmcready(struct tmc_drvdata *drvdata) +#define TMC_RSZ 0x004 +#define TMC_STS 0x00c +#define TMC_RRD 0x010 +#define TMC_RRP 0x014 +#define TMC_RWP 0x018 +#define TMC_TRG 0x01c +#define TMC_CTL 0x020 +#define TMC_RWD 0x024 +#define TMC_MODE 0x028 +#define TMC_LBUFLEVEL 0x02c +#define TMC_CBUFLEVEL 0x030 +#define TMC_BUFWM 0x034 +#define TMC_RRPHI 0x038 +#define TMC_RWPHI 0x03c +#define TMC_AXICTL 0x110 +#define TMC_DBALO 0x118 +#define TMC_DBAHI 0x11c +#define TMC_FFSR 0x300 +#define TMC_FFCR 0x304 +#define TMC_PSCR 0x308 +#define TMC_ITMISCOP0 0xee0 +#define TMC_ITTRFLIN 0xee8 +#define TMC_ITATBDATA0 0xeec +#define TMC_ITATBCTR2 0xef0 +#define TMC_ITATBCTR1 0xef4 +#define TMC_ITATBCTR0 0xef8 + +/* register description */ +/* TMC_CTL - 0x020 */ +#define TMC_CTL_CAPT_EN BIT(0) +/* TMC_STS - 0x00C */ +#define TMC_STS_TRIGGERED BIT(1) +/* TMC_AXICTL - 0x110 */ +#define TMC_AXICTL_PROT_CTL_B0 BIT(0) +#define TMC_AXICTL_PROT_CTL_B1 BIT(1) +#define TMC_AXICTL_SCT_GAT_MODE BIT(7) +#define TMC_AXICTL_WR_BURST_LEN 0xF00 +/* TMC_FFCR - 0x304 */ +#define TMC_FFCR_EN_FMT BIT(0) +#define TMC_FFCR_EN_TI BIT(1) +#define TMC_FFCR_FON_FLIN BIT(4) +#define TMC_FFCR_FON_TRIG_EVT BIT(5) +#define TMC_FFCR_FLUSHMAN BIT(6) +#define TMC_FFCR_TRIGON_TRIGIN BIT(8) +#define TMC_FFCR_STOP_ON_FLUSH BIT(12) + +#define TMC_STS_TRIGGERED_BIT 2 +#define TMC_FFCR_FLUSHMAN_BIT 6 + +enum tmc_config_type { + TMC_CONFIG_TYPE_ETB, + TMC_CONFIG_TYPE_ETR, + TMC_CONFIG_TYPE_ETF, +}; + +enum tmc_mode { + TMC_MODE_CIRCULAR_BUFFER, + TMC_MODE_SOFTWARE_FIFO, + TMC_MODE_HARDWARE_FIFO, +}; + +enum tmc_mem_intf_width { + TMC_MEM_INTF_WIDTH_32BITS = 0x2, + TMC_MEM_INTF_WIDTH_64BITS = 0x3, + TMC_MEM_INTF_WIDTH_128BITS = 0x4, + TMC_MEM_INTF_WIDTH_256BITS = 0x5, +}; + +/** + * struct tmc_drvdata - specifics associated to an TMC component + * @base: memory mapped base address for this component. + * @dev: the device entity associated to this component. + * @csdev: component vitals needed by the framework. + * @miscdev: specifics to handle "/dev/xyz.tmc" entry. + * @spinlock: only one at a time pls. + * @read_count: manages preparation of buffer for reading. + * @buf: area of memory where trace data get sent. + * @paddr: DMA start location in RAM. + * @vaddr: virtual representation of @paddr. + * @size: @buf size. + * @enable: this TMC is being used. + * @config_type: TMC variant, must be of type @tmc_config_type. + * @trigger_cntr: amount of words to store after a trigger. + */ +struct tmc_drvdata { + void __iomem *base; + struct device *dev; + struct coresight_device *csdev; + struct miscdevice miscdev; + spinlock_t spinlock; + int read_count; + bool reading; + char *buf; + dma_addr_t paddr; + void *vaddr; + u32 size; + bool enable; + enum tmc_config_type config_type; + u32 trigger_cntr; +}; + +static void tmc_wait_for_ready(struct tmc_drvdata *drvdata) { /* Ensure formatter, unformatter and hardware fifo are empty */ if (coresight_timeout(drvdata->base, - TMC_STS, TMC_STS_TMCREADY_BIT, 1)) { + TMC_STS, TMC_STS_TRIGGERED_BIT, 1)) { dev_err(drvdata->dev, "timeout observed when probing at offset %#x\n", TMC_STS); } } -void tmc_flush_and_stop(struct tmc_drvdata *drvdata) +static void tmc_flush_and_stop(struct tmc_drvdata *drvdata) { u32 ffcr; ffcr = readl_relaxed(drvdata->base + TMC_FFCR); ffcr |= TMC_FFCR_STOP_ON_FLUSH; writel_relaxed(ffcr, drvdata->base + TMC_FFCR); - ffcr |= BIT(TMC_FFCR_FLUSHMAN_BIT); + ffcr |= TMC_FFCR_FLUSHMAN; writel_relaxed(ffcr, drvdata->base + TMC_FFCR); /* Ensure flush completes */ if (coresight_timeout(drvdata->base, @@ -60,73 +159,343 @@ void tmc_flush_and_stop(struct tmc_drvdata *drvdata) TMC_FFCR); } - tmc_wait_for_tmcready(drvdata); + tmc_wait_for_ready(drvdata); } -void tmc_enable_hw(struct tmc_drvdata *drvdata) +static void tmc_enable_hw(struct tmc_drvdata *drvdata) { writel_relaxed(TMC_CTL_CAPT_EN, drvdata->base + TMC_CTL); } -void tmc_disable_hw(struct tmc_drvdata *drvdata) +static void tmc_disable_hw(struct tmc_drvdata *drvdata) { writel_relaxed(0x0, drvdata->base + TMC_CTL); } -static int tmc_read_prepare(struct tmc_drvdata *drvdata) +static void tmc_etb_enable_hw(struct tmc_drvdata *drvdata) { - int ret = 0; + /* Zero out the memory to help with debug */ + memset(drvdata->buf, 0, drvdata->size); - switch (drvdata->config_type) { - case TMC_CONFIG_TYPE_ETB: - case TMC_CONFIG_TYPE_ETF: - ret = tmc_read_prepare_etb(drvdata); - break; - case TMC_CONFIG_TYPE_ETR: - ret = tmc_read_prepare_etr(drvdata); - break; - default: - ret = -EINVAL; + CS_UNLOCK(drvdata->base); + + writel_relaxed(TMC_MODE_CIRCULAR_BUFFER, drvdata->base + TMC_MODE); + writel_relaxed(TMC_FFCR_EN_FMT | TMC_FFCR_EN_TI | + TMC_FFCR_FON_FLIN | TMC_FFCR_FON_TRIG_EVT | + TMC_FFCR_TRIGON_TRIGIN, + drvdata->base + TMC_FFCR); + + writel_relaxed(drvdata->trigger_cntr, drvdata->base + TMC_TRG); + tmc_enable_hw(drvdata); + + CS_LOCK(drvdata->base); +} + +static void tmc_etr_enable_hw(struct tmc_drvdata *drvdata) +{ + u32 axictl; + + /* Zero out the memory to help with debug */ + memset(drvdata->vaddr, 0, drvdata->size); + + CS_UNLOCK(drvdata->base); + + writel_relaxed(drvdata->size / 4, drvdata->base + TMC_RSZ); + writel_relaxed(TMC_MODE_CIRCULAR_BUFFER, drvdata->base + TMC_MODE); + + axictl = readl_relaxed(drvdata->base + TMC_AXICTL); + axictl |= TMC_AXICTL_WR_BURST_LEN; + writel_relaxed(axictl, drvdata->base + TMC_AXICTL); + axictl &= ~TMC_AXICTL_SCT_GAT_MODE; + writel_relaxed(axictl, drvdata->base + TMC_AXICTL); + axictl = (axictl & + ~(TMC_AXICTL_PROT_CTL_B0 | TMC_AXICTL_PROT_CTL_B1)) | + TMC_AXICTL_PROT_CTL_B1; + writel_relaxed(axictl, drvdata->base + TMC_AXICTL); + + writel_relaxed(drvdata->paddr, drvdata->base + TMC_DBALO); + writel_relaxed(0x0, drvdata->base + TMC_DBAHI); + writel_relaxed(TMC_FFCR_EN_FMT | TMC_FFCR_EN_TI | + TMC_FFCR_FON_FLIN | TMC_FFCR_FON_TRIG_EVT | + TMC_FFCR_TRIGON_TRIGIN, + drvdata->base + TMC_FFCR); + writel_relaxed(drvdata->trigger_cntr, drvdata->base + TMC_TRG); + tmc_enable_hw(drvdata); + + CS_LOCK(drvdata->base); +} + +static void tmc_etf_enable_hw(struct tmc_drvdata *drvdata) +{ + CS_UNLOCK(drvdata->base); + + writel_relaxed(TMC_MODE_HARDWARE_FIFO, drvdata->base + TMC_MODE); + writel_relaxed(TMC_FFCR_EN_FMT | TMC_FFCR_EN_TI, + drvdata->base + TMC_FFCR); + writel_relaxed(0x0, drvdata->base + TMC_BUFWM); + tmc_enable_hw(drvdata); + + CS_LOCK(drvdata->base); +} + +static int tmc_enable(struct tmc_drvdata *drvdata, enum tmc_mode mode) +{ + unsigned long flags; + + pm_runtime_get_sync(drvdata->dev); + + spin_lock_irqsave(&drvdata->spinlock, flags); + if (drvdata->reading) { + spin_unlock_irqrestore(&drvdata->spinlock, flags); + pm_runtime_put(drvdata->dev); + return -EBUSY; } - if (!ret) - dev_info(drvdata->dev, "TMC read start\n"); + if (drvdata->config_type == TMC_CONFIG_TYPE_ETB) { + tmc_etb_enable_hw(drvdata); + } else if (drvdata->config_type == TMC_CONFIG_TYPE_ETR) { + tmc_etr_enable_hw(drvdata); + } else { + if (mode == TMC_MODE_CIRCULAR_BUFFER) + tmc_etb_enable_hw(drvdata); + else + tmc_etf_enable_hw(drvdata); + } + drvdata->enable = true; + spin_unlock_irqrestore(&drvdata->spinlock, flags); + dev_info(drvdata->dev, "TMC enabled\n"); + return 0; +} + +static int tmc_enable_sink(struct coresight_device *csdev) +{ + struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); + + return tmc_enable(drvdata, TMC_MODE_CIRCULAR_BUFFER); +} + +static int tmc_enable_link(struct coresight_device *csdev, int inport, + int outport) +{ + struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); + + return tmc_enable(drvdata, TMC_MODE_HARDWARE_FIFO); +} + +static void tmc_etb_dump_hw(struct tmc_drvdata *drvdata) +{ + enum tmc_mem_intf_width memwidth; + u8 memwords; + char *bufp; + u32 read_data; + int i; + + memwidth = BMVAL(readl_relaxed(drvdata->base + CORESIGHT_DEVID), 8, 10); + if (memwidth == TMC_MEM_INTF_WIDTH_32BITS) + memwords = 1; + else if (memwidth == TMC_MEM_INTF_WIDTH_64BITS) + memwords = 2; + else if (memwidth == TMC_MEM_INTF_WIDTH_128BITS) + memwords = 4; + else + memwords = 8; + + bufp = drvdata->buf; + while (1) { + for (i = 0; i < memwords; i++) { + read_data = readl_relaxed(drvdata->base + TMC_RRD); + if (read_data == 0xFFFFFFFF) + return; + memcpy(bufp, &read_data, 4); + bufp += 4; + } + } +} + +static void tmc_etb_disable_hw(struct tmc_drvdata *drvdata) +{ + CS_UNLOCK(drvdata->base); + + tmc_flush_and_stop(drvdata); + tmc_etb_dump_hw(drvdata); + tmc_disable_hw(drvdata); + + CS_LOCK(drvdata->base); +} + +static void tmc_etr_dump_hw(struct tmc_drvdata *drvdata) +{ + u32 rwp, val; + + rwp = readl_relaxed(drvdata->base + TMC_RWP); + val = readl_relaxed(drvdata->base + TMC_STS); + + /* How much memory do we still have */ + if (val & BIT(0)) + drvdata->buf = drvdata->vaddr + rwp - drvdata->paddr; + else + drvdata->buf = drvdata->vaddr; +} + +static void tmc_etr_disable_hw(struct tmc_drvdata *drvdata) +{ + CS_UNLOCK(drvdata->base); + + tmc_flush_and_stop(drvdata); + tmc_etr_dump_hw(drvdata); + tmc_disable_hw(drvdata); + + CS_LOCK(drvdata->base); +} + +static void tmc_etf_disable_hw(struct tmc_drvdata *drvdata) +{ + CS_UNLOCK(drvdata->base); + + tmc_flush_and_stop(drvdata); + tmc_disable_hw(drvdata); + + CS_LOCK(drvdata->base); +} + +static void tmc_disable(struct tmc_drvdata *drvdata, enum tmc_mode mode) +{ + unsigned long flags; + + spin_lock_irqsave(&drvdata->spinlock, flags); + if (drvdata->reading) + goto out; + + if (drvdata->config_type == TMC_CONFIG_TYPE_ETB) { + tmc_etb_disable_hw(drvdata); + } else if (drvdata->config_type == TMC_CONFIG_TYPE_ETR) { + tmc_etr_disable_hw(drvdata); + } else { + if (mode == TMC_MODE_CIRCULAR_BUFFER) + tmc_etb_disable_hw(drvdata); + else + tmc_etf_disable_hw(drvdata); + } +out: + drvdata->enable = false; + spin_unlock_irqrestore(&drvdata->spinlock, flags); + + pm_runtime_put(drvdata->dev); + + dev_info(drvdata->dev, "TMC disabled\n"); +} + +static void tmc_disable_sink(struct coresight_device *csdev) +{ + struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); + + tmc_disable(drvdata, TMC_MODE_CIRCULAR_BUFFER); +} + +static void tmc_disable_link(struct coresight_device *csdev, int inport, + int outport) +{ + struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); + + tmc_disable(drvdata, TMC_MODE_HARDWARE_FIFO); +} + +static const struct coresight_ops_sink tmc_sink_ops = { + .enable = tmc_enable_sink, + .disable = tmc_disable_sink, +}; + +static const struct coresight_ops_link tmc_link_ops = { + .enable = tmc_enable_link, + .disable = tmc_disable_link, +}; + +static const struct coresight_ops tmc_etb_cs_ops = { + .sink_ops = &tmc_sink_ops, +}; + +static const struct coresight_ops tmc_etr_cs_ops = { + .sink_ops = &tmc_sink_ops, +}; + +static const struct coresight_ops tmc_etf_cs_ops = { + .sink_ops = &tmc_sink_ops, + .link_ops = &tmc_link_ops, +}; + +static int tmc_read_prepare(struct tmc_drvdata *drvdata) +{ + int ret; + unsigned long flags; + enum tmc_mode mode; + + spin_lock_irqsave(&drvdata->spinlock, flags); + if (!drvdata->enable) + goto out; + + if (drvdata->config_type == TMC_CONFIG_TYPE_ETB) { + tmc_etb_disable_hw(drvdata); + } else if (drvdata->config_type == TMC_CONFIG_TYPE_ETR) { + tmc_etr_disable_hw(drvdata); + } else { + mode = readl_relaxed(drvdata->base + TMC_MODE); + if (mode == TMC_MODE_CIRCULAR_BUFFER) { + tmc_etb_disable_hw(drvdata); + } else { + ret = -ENODEV; + goto err; + } + } +out: + drvdata->reading = true; + spin_unlock_irqrestore(&drvdata->spinlock, flags); + + dev_info(drvdata->dev, "TMC read start\n"); + return 0; +err: + spin_unlock_irqrestore(&drvdata->spinlock, flags); return ret; } -static int tmc_read_unprepare(struct tmc_drvdata *drvdata) +static void tmc_read_unprepare(struct tmc_drvdata *drvdata) { - int ret = 0; + unsigned long flags; + enum tmc_mode mode; - switch (drvdata->config_type) { - case TMC_CONFIG_TYPE_ETB: - case TMC_CONFIG_TYPE_ETF: - ret = tmc_read_unprepare_etb(drvdata); - break; - case TMC_CONFIG_TYPE_ETR: - ret = tmc_read_unprepare_etr(drvdata); - break; - default: - ret = -EINVAL; + spin_lock_irqsave(&drvdata->spinlock, flags); + if (!drvdata->enable) + goto out; + + if (drvdata->config_type == TMC_CONFIG_TYPE_ETB) { + tmc_etb_enable_hw(drvdata); + } else if (drvdata->config_type == TMC_CONFIG_TYPE_ETR) { + tmc_etr_enable_hw(drvdata); + } else { + mode = readl_relaxed(drvdata->base + TMC_MODE); + if (mode == TMC_MODE_CIRCULAR_BUFFER) + tmc_etb_enable_hw(drvdata); } +out: + drvdata->reading = false; + spin_unlock_irqrestore(&drvdata->spinlock, flags); - if (!ret) - dev_info(drvdata->dev, "TMC read end\n"); - - return ret; + dev_info(drvdata->dev, "TMC read end\n"); } static int tmc_open(struct inode *inode, struct file *file) { - int ret; struct tmc_drvdata *drvdata = container_of(file->private_data, struct tmc_drvdata, miscdev); + int ret = 0; + + if (drvdata->read_count++) + goto out; ret = tmc_read_prepare(drvdata); if (ret) return ret; - +out: nonseekable_open(inode, file); dev_dbg(drvdata->dev, "%s: successfully opened\n", __func__); @@ -166,14 +535,19 @@ static ssize_t tmc_read(struct file *file, char __user *data, size_t len, static int tmc_release(struct inode *inode, struct file *file) { - int ret; struct tmc_drvdata *drvdata = container_of(file->private_data, struct tmc_drvdata, miscdev); - ret = tmc_read_unprepare(drvdata); - if (ret) - return ret; + if (--drvdata->read_count) { + if (drvdata->read_count < 0) { + dev_err(drvdata->dev, "mismatched close\n"); + drvdata->read_count = 0; + } + goto out; + } + tmc_read_unprepare(drvdata); +out: dev_dbg(drvdata->dev, "%s: released\n", __func__); return 0; } @@ -186,71 +560,56 @@ static const struct file_operations tmc_fops = { .llseek = no_llseek, }; -static enum tmc_mem_intf_width tmc_get_memwidth(u32 devid) +static ssize_t status_show(struct device *dev, + struct device_attribute *attr, char *buf) { - enum tmc_mem_intf_width memwidth; + unsigned long flags; + u32 tmc_rsz, tmc_sts, tmc_rrp, tmc_rwp, tmc_trg; + u32 tmc_ctl, tmc_ffsr, tmc_ffcr, tmc_mode, tmc_pscr; + u32 devid; + struct tmc_drvdata *drvdata = dev_get_drvdata(dev->parent); - /* - * Excerpt from the TRM: - * - * DEVID::MEMWIDTH[10:8] - * 0x2 Memory interface databus is 32 bits wide. - * 0x3 Memory interface databus is 64 bits wide. - * 0x4 Memory interface databus is 128 bits wide. - * 0x5 Memory interface databus is 256 bits wide. - */ - switch (BMVAL(devid, 8, 10)) { - case 0x2: - memwidth = TMC_MEM_INTF_WIDTH_32BITS; - break; - case 0x3: - memwidth = TMC_MEM_INTF_WIDTH_64BITS; - break; - case 0x4: - memwidth = TMC_MEM_INTF_WIDTH_128BITS; - break; - case 0x5: - memwidth = TMC_MEM_INTF_WIDTH_256BITS; - break; - default: - memwidth = 0; - } + pm_runtime_get_sync(drvdata->dev); + spin_lock_irqsave(&drvdata->spinlock, flags); + CS_UNLOCK(drvdata->base); - return memwidth; + tmc_rsz = readl_relaxed(drvdata->base + TMC_RSZ); + tmc_sts = readl_relaxed(drvdata->base + TMC_STS); + tmc_rrp = readl_relaxed(drvdata->base + TMC_RRP); + tmc_rwp = readl_relaxed(drvdata->base + TMC_RWP); + tmc_trg = readl_relaxed(drvdata->base + TMC_TRG); + tmc_ctl = readl_relaxed(drvdata->base + TMC_CTL); + tmc_ffsr = readl_relaxed(drvdata->base + TMC_FFSR); + tmc_ffcr = readl_relaxed(drvdata->base + TMC_FFCR); + tmc_mode = readl_relaxed(drvdata->base + TMC_MODE); + tmc_pscr = readl_relaxed(drvdata->base + TMC_PSCR); + devid = readl_relaxed(drvdata->base + CORESIGHT_DEVID); + + CS_LOCK(drvdata->base); + spin_unlock_irqrestore(&drvdata->spinlock, flags); + pm_runtime_put(drvdata->dev); + + return sprintf(buf, + "Depth:\t\t0x%x\n" + "Status:\t\t0x%x\n" + "RAM read ptr:\t0x%x\n" + "RAM wrt ptr:\t0x%x\n" + "Trigger cnt:\t0x%x\n" + "Control:\t0x%x\n" + "Flush status:\t0x%x\n" + "Flush ctrl:\t0x%x\n" + "Mode:\t\t0x%x\n" + "PSRC:\t\t0x%x\n" + "DEVID:\t\t0x%x\n", + tmc_rsz, tmc_sts, tmc_rrp, tmc_rwp, tmc_trg, + tmc_ctl, tmc_ffsr, tmc_ffcr, tmc_mode, tmc_pscr, devid); + + return -EINVAL; } +static DEVICE_ATTR_RO(status); -#define coresight_tmc_simple_func(name, offset) \ - coresight_simple_func(struct tmc_drvdata, name, offset) - -coresight_tmc_simple_func(rsz, TMC_RSZ); -coresight_tmc_simple_func(sts, TMC_STS); -coresight_tmc_simple_func(rrp, TMC_RRP); -coresight_tmc_simple_func(rwp, TMC_RWP); -coresight_tmc_simple_func(trg, TMC_TRG); -coresight_tmc_simple_func(ctl, TMC_CTL); -coresight_tmc_simple_func(ffsr, TMC_FFSR); -coresight_tmc_simple_func(ffcr, TMC_FFCR); -coresight_tmc_simple_func(mode, TMC_MODE); -coresight_tmc_simple_func(pscr, TMC_PSCR); -coresight_tmc_simple_func(devid, CORESIGHT_DEVID); - -static struct attribute *coresight_tmc_mgmt_attrs[] = { - &dev_attr_rsz.attr, - &dev_attr_sts.attr, - &dev_attr_rrp.attr, - &dev_attr_rwp.attr, - &dev_attr_trg.attr, - &dev_attr_ctl.attr, - &dev_attr_ffsr.attr, - &dev_attr_ffcr.attr, - &dev_attr_mode.attr, - &dev_attr_pscr.attr, - &dev_attr_devid.attr, - NULL, -}; - -ssize_t trigger_cntr_show(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t trigger_cntr_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct tmc_drvdata *drvdata = dev_get_drvdata(dev->parent); unsigned long val = drvdata->trigger_cntr; @@ -275,25 +634,26 @@ static ssize_t trigger_cntr_store(struct device *dev, } static DEVICE_ATTR_RW(trigger_cntr); -static struct attribute *coresight_tmc_attrs[] = { +static struct attribute *coresight_etb_attrs[] = { &dev_attr_trigger_cntr.attr, + &dev_attr_status.attr, NULL, }; +ATTRIBUTE_GROUPS(coresight_etb); -static const struct attribute_group coresight_tmc_group = { - .attrs = coresight_tmc_attrs, -}; - -static const struct attribute_group coresight_tmc_mgmt_group = { - .attrs = coresight_tmc_mgmt_attrs, - .name = "mgmt", -}; - -const struct attribute_group *coresight_tmc_groups[] = { - &coresight_tmc_group, - &coresight_tmc_mgmt_group, +static struct attribute *coresight_etr_attrs[] = { + &dev_attr_trigger_cntr.attr, + &dev_attr_status.attr, NULL, }; +ATTRIBUTE_GROUPS(coresight_etr); + +static struct attribute *coresight_etf_attrs[] = { + &dev_attr_trigger_cntr.attr, + &dev_attr_status.attr, + NULL, +}; +ATTRIBUTE_GROUPS(coresight_etf); static int tmc_probe(struct amba_device *adev, const struct amba_id *id) { @@ -332,7 +692,6 @@ static int tmc_probe(struct amba_device *adev, const struct amba_id *id) devid = readl_relaxed(drvdata->base + CORESIGHT_DEVID); drvdata->config_type = BMVAL(devid, 6, 7); - drvdata->memwidth = tmc_get_memwidth(devid); if (drvdata->config_type == TMC_CONFIG_TYPE_ETR) { if (np) @@ -347,6 +706,20 @@ static int tmc_probe(struct amba_device *adev, const struct amba_id *id) pm_runtime_put(&adev->dev); + if (drvdata->config_type == TMC_CONFIG_TYPE_ETR) { + drvdata->vaddr = dma_alloc_coherent(dev, drvdata->size, + &drvdata->paddr, GFP_KERNEL); + if (!drvdata->vaddr) + return -ENOMEM; + + memset(drvdata->vaddr, 0, drvdata->size); + drvdata->buf = drvdata->vaddr; + } else { + drvdata->buf = devm_kzalloc(dev, drvdata->size, GFP_KERNEL); + if (!drvdata->buf) + return -ENOMEM; + } + desc = devm_kzalloc(dev, sizeof(*desc), GFP_KERNEL); if (!desc) { ret = -ENOMEM; @@ -356,18 +729,20 @@ static int tmc_probe(struct amba_device *adev, const struct amba_id *id) desc->pdata = pdata; desc->dev = dev; desc->subtype.sink_subtype = CORESIGHT_DEV_SUBTYPE_SINK_BUFFER; - desc->groups = coresight_tmc_groups; if (drvdata->config_type == TMC_CONFIG_TYPE_ETB) { desc->type = CORESIGHT_DEV_TYPE_SINK; desc->ops = &tmc_etb_cs_ops; + desc->groups = coresight_etb_groups; } else if (drvdata->config_type == TMC_CONFIG_TYPE_ETR) { desc->type = CORESIGHT_DEV_TYPE_SINK; desc->ops = &tmc_etr_cs_ops; + desc->groups = coresight_etr_groups; } else { desc->type = CORESIGHT_DEV_TYPE_LINKSINK; desc->subtype.link_subtype = CORESIGHT_DEV_SUBTYPE_LINK_FIFO; desc->ops = &tmc_etf_cs_ops; + desc->groups = coresight_etf_groups; } drvdata->csdev = coresight_register(desc); @@ -383,11 +758,15 @@ static int tmc_probe(struct amba_device *adev, const struct amba_id *id) if (ret) goto err_misc_register; + dev_info(dev, "TMC initialized\n"); return 0; err_misc_register: coresight_unregister(drvdata->csdev); err_devm_kzalloc: + if (drvdata->config_type == TMC_CONFIG_TYPE_ETR) + dma_free_coherent(dev, drvdata->size, + drvdata->vaddr, drvdata->paddr); return ret; } @@ -408,4 +787,8 @@ static struct amba_driver tmc_driver = { .probe = tmc_probe, .id_table = tmc_ids, }; -builtin_amba_driver(tmc_driver); + +module_amba_driver(tmc_driver); + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("CoreSight Trace Memory Controller driver"); diff --git a/drivers/hwtracing/coresight/coresight-tpiu.c b/drivers/hwtracing/coresight/coresight-tpiu.c index ee4225d73994..105c192eb2c1 100644 --- a/drivers/hwtracing/coresight/coresight-tpiu.c +++ b/drivers/hwtracing/coresight/coresight-tpiu.c @@ -1,6 +1,4 @@ /* Copyright (c) 2011-2012, The Linux Foundation. All rights reserved. - * - * Description: CoreSight Trace Port Interface Unit driver * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -13,6 +11,7 @@ */ #include +#include #include #include #include @@ -75,10 +74,11 @@ static void tpiu_enable_hw(struct tpiu_drvdata *drvdata) CS_LOCK(drvdata->base); } -static int tpiu_enable(struct coresight_device *csdev, u32 mode) +static int tpiu_enable(struct coresight_device *csdev) { struct tpiu_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); + pm_runtime_get_sync(csdev->dev.parent); tpiu_enable_hw(drvdata); dev_info(drvdata->dev, "TPIU enabled\n"); @@ -106,6 +106,7 @@ static void tpiu_disable(struct coresight_device *csdev) struct tpiu_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); tpiu_disable_hw(drvdata); + pm_runtime_put(csdev->dev.parent); dev_info(drvdata->dev, "TPIU disabled\n"); } @@ -175,6 +176,7 @@ static int tpiu_probe(struct amba_device *adev, const struct amba_id *id) if (IS_ERR(drvdata->csdev)) return PTR_ERR(drvdata->csdev); + dev_info(dev, "TPIU initialized\n"); return 0; } @@ -226,4 +228,8 @@ static struct amba_driver tpiu_driver = { .probe = tpiu_probe, .id_table = tpiu_ids, }; -builtin_amba_driver(tpiu_driver); + +module_amba_driver(tpiu_driver); + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("CoreSight Trace Port Interface Unit driver"); diff --git a/drivers/hwtracing/intel_th/gth.c b/drivers/hwtracing/intel_th/gth.c index 2dc5378ccd3a..eb43943cdf07 100644 --- a/drivers/hwtracing/intel_th/gth.c +++ b/drivers/hwtracing/intel_th/gth.c @@ -591,11 +591,15 @@ static void intel_th_gth_unassign(struct intel_th_device *thdev, { struct gth_device *gth = dev_get_drvdata(&thdev->dev); int port = othdev->output.port; + int master; spin_lock(>h->gth_lock); othdev->output.port = -1; othdev->output.active = false; gth->output[port].output = NULL; + for (master = 0; master < TH_CONFIGURABLE_MASTERS; master++) + if (gth->master[master] == port) + gth->master[master] = -1; spin_unlock(>h->gth_lock); } diff --git a/drivers/hwtracing/intel_th/msu.c b/drivers/hwtracing/intel_th/msu.c index 70ca27e45602..9d9e47eb0842 100644 --- a/drivers/hwtracing/intel_th/msu.c +++ b/drivers/hwtracing/intel_th/msu.c @@ -1418,7 +1418,8 @@ nr_pages_store(struct device *dev, struct device_attribute *attr, if (!end) break; - len -= end - p; + /* consume the number and the following comma, hence +1 */ + len -= end - p + 1; p = end + 1; } while (len); diff --git a/drivers/hwtracing/stm/Kconfig b/drivers/hwtracing/stm/Kconfig index 847a39b35307..e0ac75395526 100644 --- a/drivers/hwtracing/stm/Kconfig +++ b/drivers/hwtracing/stm/Kconfig @@ -28,15 +28,4 @@ config STM_SOURCE_CONSOLE If you want to send kernel console messages over STM devices, say Y. -config STM_SOURCE_HEARTBEAT - tristate "Heartbeat over STM devices" - help - This is a kernel space trace source that sends periodic - heartbeat messages to trace hosts over STM devices. It is - also useful for testing stm class drivers and the stm class - framework itself. - - If you want to send heartbeat messages over STM devices, - say Y. - endif diff --git a/drivers/hwtracing/stm/core.c b/drivers/hwtracing/stm/core.c index 129fcf1c06d9..b6cc841de79d 100644 --- a/drivers/hwtracing/stm/core.c +++ b/drivers/hwtracing/stm/core.c @@ -68,24 +68,9 @@ static ssize_t channels_show(struct device *dev, static DEVICE_ATTR_RO(channels); -static ssize_t hw_override_show(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct stm_device *stm = to_stm_device(dev); - int ret; - - ret = sprintf(buf, "%u\n", stm->data->hw_override); - - return ret; -} - -static DEVICE_ATTR_RO(hw_override); - static struct attribute *stm_attrs[] = { &dev_attr_masters.attr, &dev_attr_channels.attr, - &dev_attr_hw_override.attr, NULL, }; @@ -251,6 +236,9 @@ static int find_free_channels(unsigned long *bitmap, unsigned int start, ; if (i == width) return pos; + + /* step over [pos..pos+i) to continue search */ + pos += i; } return -1; @@ -392,19 +380,14 @@ err_free: static int stm_char_release(struct inode *inode, struct file *file) { struct stm_file *stmf = file->private_data; - struct stm_device *stm = stmf->stm; - if (stm->data->unlink) - stm->data->unlink(stm->data, stmf->output.master, - stmf->output.channel); - - stm_output_free(stm, &stmf->output); + stm_output_free(stmf->stm, &stmf->output); /* * matches the stm_char_open()'s * class_find_device() + try_module_get() */ - stm_put_device(stm); + stm_put_device(stmf->stm); kfree(stmf); return 0; @@ -425,8 +408,8 @@ static int stm_file_assign(struct stm_file *stmf, char *id, unsigned int width) return ret; } -static ssize_t stm_write(struct stm_data *data, unsigned int master, - unsigned int channel, const char *buf, size_t count) +static void stm_write(struct stm_data *data, unsigned int master, + unsigned int channel, const char *buf, size_t count) { unsigned int flags = STP_PACKET_TIMESTAMPED; const unsigned char *p = buf, nil = 0; @@ -438,14 +421,9 @@ static ssize_t stm_write(struct stm_data *data, unsigned int master, sz = data->packet(data, master, channel, STP_PACKET_DATA, flags, sz, p); flags = 0; - - if (sz < 0) - break; } data->packet(data, master, channel, STP_PACKET_FLAG, 0, 0, &nil); - - return pos; } static ssize_t stm_char_write(struct file *file, const char __user *buf, @@ -483,8 +461,8 @@ static ssize_t stm_char_write(struct file *file, const char __user *buf, return -EFAULT; } - count = stm_write(stm->data, stmf->output.master, stmf->output.channel, - kbuf, count); + stm_write(stm->data, stmf->output.master, stmf->output.channel, kbuf, + count); kfree(kbuf); @@ -526,7 +504,7 @@ static int stm_char_policy_set_ioctl(struct stm_file *stmf, void __user *arg) { struct stm_device *stm = stmf->stm; struct stp_policy_id *id; - int ret = -EINVAL; + int ret = -EINVAL, wlimit = 1; u32 size; if (stmf->output.nr_chans) @@ -554,14 +532,18 @@ static int stm_char_policy_set_ioctl(struct stm_file *stmf, void __user *arg) if (id->__reserved_0 || id->__reserved_1) goto err_free; - if (id->width < 1 || - id->width > PAGE_SIZE / stm->data->sw_mmiosz) + if (stm->data->sw_mmiosz) + wlimit = PAGE_SIZE / stm->data->sw_mmiosz; + + if (id->width < 1 || id->width > wlimit) goto err_free; ret = stm_file_assign(stmf, id->id, id->width); if (ret) goto err_free; + ret = 0; + if (stm->data->link) ret = stm->data->link(stm->data, stmf->output.master, stmf->output.channel); @@ -887,18 +869,8 @@ unlock: spin_unlock(&src->link_lock); spin_unlock(&stm->link_lock); - /* - * Call the unlink callbacks for both source and stm, when we know - * that we have actually performed the unlinking. - */ - if (!ret) { - if (src->data->unlink) - src->data->unlink(src->data); - - if (stm->data->unlink) - stm->data->unlink(stm->data, src->output.master, - src->output.channel); - } + if (!ret && src->data->unlink) + src->data->unlink(src->data); return ret; } @@ -1087,9 +1059,9 @@ int stm_source_write(struct stm_source_data *data, unsigned int chan, stm = srcu_dereference(src->link, &stm_source_srcu); if (stm) - count = stm_write(stm->data, src->output.master, - src->output.channel + chan, - buf, count); + stm_write(stm->data, src->output.master, + src->output.channel + chan, + buf, count); else count = -ENODEV; diff --git a/drivers/i2c/busses/i2c-axxia.c b/drivers/i2c/busses/i2c-axxia.c index c335cc7852f9..1c68b05c8649 100644 --- a/drivers/i2c/busses/i2c-axxia.c +++ b/drivers/i2c/busses/i2c-axxia.c @@ -74,8 +74,7 @@ MST_STATUS_ND) #define MST_STATUS_ERR (MST_STATUS_NAK | \ MST_STATUS_AL | \ - MST_STATUS_IP | \ - MST_STATUS_TSS) + MST_STATUS_IP) #define MST_TX_BYTES_XFRD 0x50 #define MST_RX_BYTES_XFRD 0x54 #define SCL_HIGH_PERIOD 0x80 @@ -241,7 +240,7 @@ static int axxia_i2c_empty_rx_fifo(struct axxia_i2c_dev *idev) */ if (c <= 0 || c > I2C_SMBUS_BLOCK_MAX) { idev->msg_err = -EPROTO; - i2c_int_disable(idev, ~0); + i2c_int_disable(idev, ~MST_STATUS_TSS); complete(&idev->msg_complete); break; } @@ -297,17 +296,7 @@ static irqreturn_t axxia_i2c_isr(int irq, void *_dev) i2c_int_disable(idev, MST_STATUS_TFL); } - if (status & MST_STATUS_SCC) { - /* Stop completed */ - i2c_int_disable(idev, ~0); - complete(&idev->msg_complete); - } else if (status & MST_STATUS_SNS) { - /* Transfer done */ - i2c_int_disable(idev, ~0); - if (i2c_m_rd(idev->msg) && idev->msg_xfrd < idev->msg->len) - axxia_i2c_empty_rx_fifo(idev); - complete(&idev->msg_complete); - } else if (unlikely(status & MST_STATUS_ERR)) { + if (unlikely(status & MST_STATUS_ERR)) { /* Transfer error */ i2c_int_disable(idev, ~0); if (status & MST_STATUS_AL) @@ -324,6 +313,21 @@ static irqreturn_t axxia_i2c_isr(int irq, void *_dev) readl(idev->base + MST_TX_BYTES_XFRD), readl(idev->base + MST_TX_XFER)); complete(&idev->msg_complete); + } else if (status & MST_STATUS_SCC) { + /* Stop completed */ + i2c_int_disable(idev, ~MST_STATUS_TSS); + complete(&idev->msg_complete); + } else if (status & MST_STATUS_SNS) { + /* Transfer done */ + i2c_int_disable(idev, ~MST_STATUS_TSS); + if (i2c_m_rd(idev->msg) && idev->msg_xfrd < idev->msg->len) + axxia_i2c_empty_rx_fifo(idev); + complete(&idev->msg_complete); + } else if (status & MST_STATUS_TSS) { + /* Transfer timeout */ + idev->msg_err = -ETIMEDOUT; + i2c_int_disable(idev, ~MST_STATUS_TSS); + complete(&idev->msg_complete); } out: @@ -339,10 +343,10 @@ static int axxia_i2c_xfer_msg(struct axxia_i2c_dev *idev, struct i2c_msg *msg) u32 rx_xfer, tx_xfer; u32 addr_1, addr_2; unsigned long time_left; + unsigned int wt_value; idev->msg = msg; idev->msg_xfrd = 0; - idev->msg_err = 0; reinit_completion(&idev->msg_complete); if (i2c_m_ten(msg)) { @@ -382,9 +386,18 @@ static int axxia_i2c_xfer_msg(struct axxia_i2c_dev *idev, struct i2c_msg *msg) else if (axxia_i2c_fill_tx_fifo(idev) != 0) int_mask |= MST_STATUS_TFL; + wt_value = WT_VALUE(readl(idev->base + WAIT_TIMER_CONTROL)); + /* Disable wait timer temporarly */ + writel(wt_value, idev->base + WAIT_TIMER_CONTROL); + /* Check if timeout error happened */ + if (idev->msg_err) + goto out; + /* Start manual mode */ writel(CMD_MANUAL, idev->base + MST_COMMAND); + writel(WT_EN | wt_value, idev->base + WAIT_TIMER_CONTROL); + i2c_int_enable(idev, int_mask); time_left = wait_for_completion_timeout(&idev->msg_complete, @@ -395,13 +408,15 @@ static int axxia_i2c_xfer_msg(struct axxia_i2c_dev *idev, struct i2c_msg *msg) if (readl(idev->base + MST_COMMAND) & CMD_BUSY) dev_warn(idev->dev, "busy after xfer\n"); - if (time_left == 0) + if (time_left == 0) { idev->msg_err = -ETIMEDOUT; - - if (idev->msg_err == -ETIMEDOUT) i2c_recover_bus(&idev->adapter); + axxia_i2c_init(idev); + } - if (unlikely(idev->msg_err) && idev->msg_err != -ENXIO) +out: + if (unlikely(idev->msg_err) && idev->msg_err != -ENXIO && + idev->msg_err != -ETIMEDOUT) axxia_i2c_init(idev); return idev->msg_err; @@ -409,7 +424,7 @@ static int axxia_i2c_xfer_msg(struct axxia_i2c_dev *idev, struct i2c_msg *msg) static int axxia_i2c_stop(struct axxia_i2c_dev *idev) { - u32 int_mask = MST_STATUS_ERR | MST_STATUS_SCC; + u32 int_mask = MST_STATUS_ERR | MST_STATUS_SCC | MST_STATUS_TSS; unsigned long time_left; reinit_completion(&idev->msg_complete); @@ -436,6 +451,9 @@ axxia_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num) int i; int ret = 0; + idev->msg_err = 0; + i2c_int_enable(idev, MST_STATUS_TSS); + for (i = 0; ret == 0 && i < num; ++i) ret = axxia_i2c_xfer_msg(idev, &msgs[i]); diff --git a/drivers/i2c/busses/i2c-cadence.c b/drivers/i2c/busses/i2c-cadence.c index 84deed6571bd..6d32e6da3110 100644 --- a/drivers/i2c/busses/i2c-cadence.c +++ b/drivers/i2c/busses/i2c-cadence.c @@ -378,8 +378,10 @@ static void cdns_i2c_mrecv(struct cdns_i2c *id) * Check for the message size against FIFO depth and set the * 'hold bus' bit if it is greater than FIFO depth. */ - if (id->recv_count > CDNS_I2C_FIFO_DEPTH) + if ((id->recv_count > CDNS_I2C_FIFO_DEPTH) || id->bus_hold_flag) ctrl_reg |= CDNS_I2C_CR_HOLD; + else + ctrl_reg = ctrl_reg & ~CDNS_I2C_CR_HOLD; cdns_i2c_writereg(ctrl_reg, CDNS_I2C_CR_OFFSET); @@ -436,8 +438,11 @@ static void cdns_i2c_msend(struct cdns_i2c *id) * Check for the message size against FIFO depth and set the * 'hold bus' bit if it is greater than FIFO depth. */ - if (id->send_count > CDNS_I2C_FIFO_DEPTH) + if ((id->send_count > CDNS_I2C_FIFO_DEPTH) || id->bus_hold_flag) ctrl_reg |= CDNS_I2C_CR_HOLD; + else + ctrl_reg = ctrl_reg & ~CDNS_I2C_CR_HOLD; + cdns_i2c_writereg(ctrl_reg, CDNS_I2C_CR_OFFSET); /* Clear the interrupts in interrupt status register. */ diff --git a/drivers/i2c/busses/i2c-scmi.c b/drivers/i2c/busses/i2c-scmi.c index efefcfa24a4c..d2178f701b41 100644 --- a/drivers/i2c/busses/i2c-scmi.c +++ b/drivers/i2c/busses/i2c-scmi.c @@ -364,6 +364,7 @@ static int acpi_smbus_cmi_add(struct acpi_device *device) { struct acpi_smbus_cmi *smbus_cmi; const struct acpi_device_id *id; + int ret; smbus_cmi = kzalloc(sizeof(struct acpi_smbus_cmi), GFP_KERNEL); if (!smbus_cmi) @@ -385,8 +386,10 @@ static int acpi_smbus_cmi_add(struct acpi_device *device) acpi_walk_namespace(ACPI_TYPE_METHOD, smbus_cmi->handle, 1, acpi_smbus_cmi_query_methods, NULL, smbus_cmi, NULL); - if (smbus_cmi->cap_info == 0) + if (smbus_cmi->cap_info == 0) { + ret = -ENODEV; goto err; + } snprintf(smbus_cmi->adapter.name, sizeof(smbus_cmi->adapter.name), "SMBus CMI adapter %s", @@ -397,7 +400,8 @@ static int acpi_smbus_cmi_add(struct acpi_device *device) smbus_cmi->adapter.class = I2C_CLASS_HWMON | I2C_CLASS_SPD; smbus_cmi->adapter.dev.parent = &device->dev; - if (i2c_add_adapter(&smbus_cmi->adapter)) { + ret = i2c_add_adapter(&smbus_cmi->adapter); + if (ret) { dev_err(&device->dev, "Couldn't register adapter!\n"); goto err; } @@ -407,7 +411,7 @@ static int acpi_smbus_cmi_add(struct acpi_device *device) err: kfree(smbus_cmi); device->driver_data = NULL; - return -EIO; + return ret; } static int acpi_smbus_cmi_remove(struct acpi_device *device) diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index a0522fcc4ff8..1004422dbb10 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -696,7 +696,7 @@ static const struct i2c_algorithm tegra_i2c_algo = { /* payload size is only 12 bit */ static struct i2c_adapter_quirks tegra_i2c_quirks = { .max_read_len = 4096, - .max_write_len = 4096, + .max_write_len = 4096 - 12, }; static const struct tegra_i2c_hw_feature tegra20_i2c_hw = { diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c index 011b50caea28..9b2ee397894c 100644 --- a/drivers/i2c/i2c-core.c +++ b/drivers/i2c/i2c-core.c @@ -3021,16 +3021,16 @@ static s32 i2c_smbus_xfer_emulated(struct i2c_adapter *adapter, u16 addr, the underlying bus driver */ break; case I2C_SMBUS_I2C_BLOCK_DATA: + if (data->block[0] > I2C_SMBUS_BLOCK_MAX) { + dev_err(&adapter->dev, "Invalid block %s size %d\n", + read_write == I2C_SMBUS_READ ? "read" : "write", + data->block[0]); + return -EINVAL; + } if (read_write == I2C_SMBUS_READ) { msg[1].len = data->block[0]; } else { msg[0].len = data->block[0] + 1; - if (msg[0].len > I2C_SMBUS_BLOCK_MAX + 1) { - dev_err(&adapter->dev, - "Invalid block write size %d\n", - data->block[0]); - return -EINVAL; - } for (i = 1; i <= data->block[0]; i++) msgbuf0[i] = data->block[i]; } diff --git a/drivers/i2c/i2c-dev.c b/drivers/i2c/i2c-dev.c index 94c837046786..57e3790c87b1 100644 --- a/drivers/i2c/i2c-dev.c +++ b/drivers/i2c/i2c-dev.c @@ -459,9 +459,15 @@ static long i2cdev_ioctl(struct file *file, unsigned int cmd, unsigned long arg) return i2cdev_ioctl_smbus(client, arg); case I2C_RETRIES: + if (arg > INT_MAX) + return -EINVAL; + client->adapter->retries = arg; break; case I2C_TIMEOUT: + if (arg > INT_MAX) + return -EINVAL; + /* For historical reasons, user-space sets the timeout * value in units of 10 ms. */ diff --git a/drivers/ide/pmac.c b/drivers/ide/pmac.c index 96a345248224..0add5bb3cee8 100644 --- a/drivers/ide/pmac.c +++ b/drivers/ide/pmac.c @@ -920,6 +920,7 @@ static u8 pmac_ide_cable_detect(ide_hwif_t *hwif) struct device_node *root = of_find_node_by_path("/"); const char *model = of_get_property(root, "model", NULL); + of_node_put(root); /* Get cable type from device-tree. */ if (cable && !strncmp(cable, "80-", 3)) { /* Some drives fail to detect 80c cable in PowerBook */ diff --git a/drivers/iio/accel/kxcjk-1013.c b/drivers/iio/accel/kxcjk-1013.c index 18c1b06684c1..0667b2875ee4 100644 --- a/drivers/iio/accel/kxcjk-1013.c +++ b/drivers/iio/accel/kxcjk-1013.c @@ -1343,6 +1343,8 @@ static int kxcjk1013_resume(struct device *dev) mutex_lock(&data->mutex); ret = kxcjk1013_set_mode(data, OPERATION); + if (ret == 0) + ret = kxcjk1013_set_range(data, data->range); mutex_unlock(&data->mutex); return ret; diff --git a/drivers/iio/adc/ad_sigma_delta.c b/drivers/iio/adc/ad_sigma_delta.c index 22c4c17cd996..a1d072ecb717 100644 --- a/drivers/iio/adc/ad_sigma_delta.c +++ b/drivers/iio/adc/ad_sigma_delta.c @@ -121,6 +121,7 @@ static int ad_sd_read_reg_raw(struct ad_sigma_delta *sigma_delta, if (sigma_delta->info->has_registers) { data[0] = reg << sigma_delta->info->addr_shift; data[0] |= sigma_delta->info->read_mask; + data[0] |= sigma_delta->comm; spi_message_add_tail(&t[0], &m); } spi_message_add_tail(&t[1], &m); diff --git a/drivers/iio/adc/at91_adc.c b/drivers/iio/adc/at91_adc.c index d83e5b75a37b..4b317ffd144c 100644 --- a/drivers/iio/adc/at91_adc.c +++ b/drivers/iio/adc/at91_adc.c @@ -702,23 +702,29 @@ static int at91_adc_read_raw(struct iio_dev *idev, ret = wait_event_interruptible_timeout(st->wq_data_avail, st->done, msecs_to_jiffies(1000)); - if (ret == 0) - ret = -ETIMEDOUT; - if (ret < 0) { - mutex_unlock(&st->lock); - return ret; - } - - *val = st->last_value; + /* Disable interrupts, regardless if adc conversion was + * successful or not + */ at91_adc_writel(st, AT91_ADC_CHDR, AT91_ADC_CH(chan->channel)); at91_adc_writel(st, AT91_ADC_IDR, BIT(chan->channel)); - st->last_value = 0; - st->done = false; + if (ret > 0) { + /* a valid conversion took place */ + *val = st->last_value; + st->last_value = 0; + st->done = false; + ret = IIO_VAL_INT; + } else if (ret == 0) { + /* conversion timeout */ + dev_err(&idev->dev, "ADC Channel %d timeout.\n", + chan->channel); + ret = -ETIMEDOUT; + } + mutex_unlock(&st->lock); - return IIO_VAL_INT; + return ret; case IIO_CHAN_INFO_SCALE: *val = st->vref_mv; diff --git a/drivers/iio/gyro/bmg160_core.c b/drivers/iio/gyro/bmg160_core.c index 90841abd3ce4..a4dc6a3783d0 100644 --- a/drivers/iio/gyro/bmg160_core.c +++ b/drivers/iio/gyro/bmg160_core.c @@ -519,11 +519,10 @@ static int bmg160_read_raw(struct iio_dev *indio_dev, } else return -EINVAL; case IIO_CHAN_INFO_SCALE: - *val = 0; switch (chan->type) { case IIO_TEMP: - *val2 = 500000; - return IIO_VAL_INT_PLUS_MICRO; + *val = 500; + return IIO_VAL_INT; case IIO_ANGL_VEL: { int i; @@ -531,6 +530,7 @@ static int bmg160_read_raw(struct iio_dev *indio_dev, for (i = 0; i < ARRAY_SIZE(bmg160_scale_table); ++i) { if (bmg160_scale_table[i].dps_range == data->dps_range) { + *val = 0; *val2 = bmg160_scale_table[i].scale; return IIO_VAL_INT_PLUS_MICRO; } diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 98fd9a594841..8762eac47570 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -95,6 +95,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, DEFINE_DMA_ATTRS(attrs); struct scatterlist *sg, *sg_list_start; int need_release = 0; + unsigned int gup_flags = FOLL_WRITE; if (dmasync) dma_set_attr(DMA_ATTR_WRITE_BARRIER, &attrs); @@ -177,6 +178,9 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, if (ret) goto out; + if (!umem->writable) + gup_flags |= FOLL_FORCE; + need_release = 1; sg_list_start = umem->sg_head.sgl; @@ -184,7 +188,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, ret = get_user_pages(current, current->mm, cur_base, min_t(unsigned long, npages, PAGE_SIZE / sizeof (struct page *)), - 1, !umem->writable, page_list, vma_list); + gup_flags, page_list, vma_list); if (ret < 0) goto out; diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index 40becdb3196e..738ccfee7cae 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -527,6 +527,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt, u64 off; int j, k, ret = 0, start_idx, npages = 0; u64 base_virt_addr; + unsigned int flags = 0; if (access_mask == 0) return -EINVAL; @@ -556,6 +557,9 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt, goto out_put_task; } + if (access_mask & ODP_WRITE_ALLOWED_BIT) + flags |= FOLL_WRITE; + start_idx = (user_virt - ib_umem_start(umem)) >> PAGE_SHIFT; k = start_idx; @@ -574,8 +578,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt, */ npages = get_user_pages(owning_process, owning_mm, user_virt, gup_num_pages, - access_mask & ODP_WRITE_ALLOWED_BIT, 0, - local_page_list, NULL); + flags, local_page_list, NULL); up_read(&owning_mm->mmap_sem); if (npages < 0) diff --git a/drivers/infiniband/hw/mlx4/alias_GUID.c b/drivers/infiniband/hw/mlx4/alias_GUID.c index 21cb41a60fe8..3a70b418d913 100644 --- a/drivers/infiniband/hw/mlx4/alias_GUID.c +++ b/drivers/infiniband/hw/mlx4/alias_GUID.c @@ -805,8 +805,8 @@ void mlx4_ib_destroy_alias_guid_service(struct mlx4_ib_dev *dev) unsigned long flags; for (i = 0 ; i < dev->num_ports; i++) { - cancel_delayed_work(&dev->sriov.alias_guid.ports_guid[i].alias_guid_work); det = &sriov->alias_guid.ports_guid[i]; + cancel_delayed_work_sync(&det->alias_guid_work); spin_lock_irqsave(&sriov->alias_guid.ag_work_lock, flags); while (!list_empty(&det->cb_list)) { cb_ctx = list_entry(det->cb_list.next, diff --git a/drivers/infiniband/hw/mlx4/cm.c b/drivers/infiniband/hw/mlx4/cm.c index 39a488889fc7..5dc920fe1326 100644 --- a/drivers/infiniband/hw/mlx4/cm.c +++ b/drivers/infiniband/hw/mlx4/cm.c @@ -39,7 +39,7 @@ #include "mlx4_ib.h" -#define CM_CLEANUP_CACHE_TIMEOUT (5 * HZ) +#define CM_CLEANUP_CACHE_TIMEOUT (30 * HZ) struct id_map_entry { struct rb_node node; diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c index 7d2e42dd6926..8676685dbf3d 100644 --- a/drivers/infiniband/hw/mthca/mthca_memfree.c +++ b/drivers/infiniband/hw/mthca/mthca_memfree.c @@ -472,8 +472,8 @@ int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar, goto out; } - ret = get_user_pages(current, current->mm, uaddr & PAGE_MASK, 1, 1, 0, - pages, NULL); + ret = get_user_pages(current, current->mm, uaddr & PAGE_MASK, 1, + FOLL_WRITE, pages, NULL); if (ret < 0) goto out; diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c index 59193f67ea78..56bd59bc08b5 100644 --- a/drivers/infiniband/hw/qib/qib_ud.c +++ b/drivers/infiniband/hw/qib/qib_ud.c @@ -515,7 +515,6 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr, opcode == IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE) { wc.ex.imm_data = ohdr->u.ud.imm_data; wc.wc_flags = IB_WC_WITH_IMM; - tlen -= sizeof(u32); } else if (opcode == IB_OPCODE_UD_SEND_ONLY) { wc.ex.imm_data = 0; wc.wc_flags = 0; diff --git a/drivers/infiniband/hw/qib/qib_user_pages.c b/drivers/infiniband/hw/qib/qib_user_pages.c index ab1588ae1c85..75c3f0dffe63 100644 --- a/drivers/infiniband/hw/qib/qib_user_pages.c +++ b/drivers/infiniband/hw/qib/qib_user_pages.c @@ -68,7 +68,8 @@ static int __qib_get_user_pages(unsigned long start_page, size_t num_pages, for (got = 0; got < num_pages; got += ret) { ret = get_user_pages(current, current->mm, start_page + got * PAGE_SIZE, - num_pages - got, 1, 1, + num_pages - got, + FOLL_WRITE | FOLL_FORCE, p + got, NULL); if (ret < 0) goto bail_release; diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c index 645a5f6e6c88..7f0d75e29441 100644 --- a/drivers/infiniband/hw/usnic/usnic_uiom.c +++ b/drivers/infiniband/hw/usnic/usnic_uiom.c @@ -113,6 +113,7 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable, int flags; dma_addr_t pa; DEFINE_DMA_ATTRS(attrs); + unsigned int gup_flags; if (dmasync) dma_set_attr(DMA_ATTR_WRITE_BARRIER, &attrs); @@ -140,6 +141,8 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable, flags = IOMMU_READ | IOMMU_CACHE; flags |= (writable) ? IOMMU_WRITE : 0; + gup_flags = FOLL_WRITE; + gup_flags |= (writable) ? 0 : FOLL_FORCE; cur_base = addr & PAGE_MASK; ret = 0; @@ -147,7 +150,7 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable, ret = get_user_pages(current, current->mm, cur_base, min_t(unsigned long, npages, PAGE_SIZE / sizeof(struct page *)), - 1, !writable, page_list, NULL); + gup_flags, page_list, NULL); if (ret < 0) goto out; diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 1897c4080346..3dbc3ed263c2 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -2594,7 +2594,6 @@ static int srp_reset_device(struct scsi_cmnd *scmnd) { struct srp_target_port *target = host_to_target(scmnd->device->host); struct srp_rdma_ch *ch; - int i, j; u8 status; shost_printk(KERN_ERR, target->scsi_host, "SRP reset_device called\n"); @@ -2606,15 +2605,6 @@ static int srp_reset_device(struct scsi_cmnd *scmnd) if (status) return FAILED; - for (i = 0; i < target->ch_count; i++) { - ch = &target->ch[i]; - for (j = 0; j < target->req_ring_size; ++j) { - struct srp_request *req = &ch->req_ring[j]; - - srp_finish_req(ch, req, scmnd->device, DID_RESET << 16); - } - } - return SUCCESS; } diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index f55dcdf99bc5..26476a64e663 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -255,6 +255,8 @@ static const struct xpad_device { { 0x0f30, 0x0202, "Joytech Advanced Controller", 0, XTYPE_XBOX }, { 0x0f30, 0x8888, "BigBen XBMiniPad Controller", 0, XTYPE_XBOX }, { 0x102c, 0xff0c, "Joytech Wireless Advanced Controller", 0, XTYPE_XBOX }, + { 0x1038, 0x1430, "SteelSeries Stratus Duo", 0, XTYPE_XBOX360 }, + { 0x1038, 0x1431, "SteelSeries Stratus Duo", 0, XTYPE_XBOX360 }, { 0x11c9, 0x55f0, "Nacon GC-100XF", 0, XTYPE_XBOX360 }, { 0x12ab, 0x0004, "Honey Bee Xbox360 dancepad", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 }, { 0x12ab, 0x0301, "PDP AFTERGLOW AX.1", 0, XTYPE_XBOX360 }, @@ -431,6 +433,7 @@ static const struct usb_device_id xpad_table[] = { XPAD_XBOXONE_VENDOR(0x0e6f), /* 0x0e6f X-Box One controllers */ XPAD_XBOX360_VENDOR(0x0f0d), /* Hori Controllers */ XPAD_XBOXONE_VENDOR(0x0f0d), /* Hori Controllers */ + XPAD_XBOX360_VENDOR(0x1038), /* SteelSeries Controllers */ XPAD_XBOX360_VENDOR(0x11c9), /* Nacon GC100XF */ XPAD_XBOX360_VENDOR(0x12ab), /* X-Box 360 dance pads */ XPAD_XBOX360_VENDOR(0x1430), /* RedOctane X-Box 360 controllers */ diff --git a/drivers/input/keyboard/matrix_keypad.c b/drivers/input/keyboard/matrix_keypad.c index c64d87442a62..2e12e31f45c5 100644 --- a/drivers/input/keyboard/matrix_keypad.c +++ b/drivers/input/keyboard/matrix_keypad.c @@ -220,7 +220,7 @@ static void matrix_keypad_stop(struct input_dev *dev) keypad->stopped = true; spin_unlock_irq(&keypad->lock); - flush_work(&keypad->work.work); + flush_delayed_work(&keypad->work); /* * matrix_keypad_scan() will leave IRQs enabled; * we should disable them now. diff --git a/drivers/input/keyboard/omap4-keypad.c b/drivers/input/keyboard/omap4-keypad.c index 6639b2b8528a..3d2c60c8de83 100644 --- a/drivers/input/keyboard/omap4-keypad.c +++ b/drivers/input/keyboard/omap4-keypad.c @@ -60,8 +60,18 @@ /* OMAP4 values */ #define OMAP4_VAL_IRQDISABLE 0x0 -#define OMAP4_VAL_DEBOUNCINGTIME 0x7 -#define OMAP4_VAL_PVT 0x7 + +/* + * Errata i689: If a key is released for a time shorter than debounce time, + * the keyboard will idle and never detect the key release. The workaround + * is to use at least a 12ms debounce time. See omap5432 TRM chapter + * "26.4.6.2 Keyboard Controller Timer" for more information. + */ +#define OMAP4_KEYPAD_PTV_DIV_128 0x6 +#define OMAP4_KEYPAD_DEBOUNCINGTIME_MS(dbms, ptv) \ + ((((dbms) * 1000) / ((1 << ((ptv) + 1)) * (1000000 / 32768))) - 1) +#define OMAP4_VAL_DEBOUNCINGTIME_16MS \ + OMAP4_KEYPAD_DEBOUNCINGTIME_MS(16, OMAP4_KEYPAD_PTV_DIV_128) enum { KBD_REVISION_OMAP4 = 0, @@ -116,12 +126,8 @@ static irqreturn_t omap4_keypad_irq_handler(int irq, void *dev_id) { struct omap4_keypad *keypad_data = dev_id; - if (kbd_read_irqreg(keypad_data, OMAP4_KBD_IRQSTATUS)) { - /* Disable interrupts */ - kbd_write_irqreg(keypad_data, OMAP4_KBD_IRQENABLE, - OMAP4_VAL_IRQDISABLE); + if (kbd_read_irqreg(keypad_data, OMAP4_KBD_IRQSTATUS)) return IRQ_WAKE_THREAD; - } return IRQ_NONE; } @@ -163,11 +169,6 @@ static irqreturn_t omap4_keypad_irq_thread_fn(int irq, void *dev_id) kbd_write_irqreg(keypad_data, OMAP4_KBD_IRQSTATUS, kbd_read_irqreg(keypad_data, OMAP4_KBD_IRQSTATUS)); - /* enable interrupts */ - kbd_write_irqreg(keypad_data, OMAP4_KBD_IRQENABLE, - OMAP4_DEF_IRQENABLE_EVENTEN | - OMAP4_DEF_IRQENABLE_LONGKEY); - return IRQ_HANDLED; } @@ -181,9 +182,9 @@ static int omap4_keypad_open(struct input_dev *input) kbd_writel(keypad_data, OMAP4_KBD_CTRL, OMAP4_DEF_CTRL_NOSOFTMODE | - (OMAP4_VAL_PVT << OMAP4_DEF_CTRL_PTV_SHIFT)); + (OMAP4_KEYPAD_PTV_DIV_128 << OMAP4_DEF_CTRL_PTV_SHIFT)); kbd_writel(keypad_data, OMAP4_KBD_DEBOUNCINGTIME, - OMAP4_VAL_DEBOUNCINGTIME); + OMAP4_VAL_DEBOUNCINGTIME_16MS); /* clear pending interrupts */ kbd_write_irqreg(keypad_data, OMAP4_KBD_IRQSTATUS, kbd_read_irqreg(keypad_data, OMAP4_KBD_IRQSTATUS)); @@ -204,9 +205,10 @@ static void omap4_keypad_close(struct input_dev *input) disable_irq(keypad_data->irq); - /* Disable interrupts */ + /* Disable interrupts and wake-up events */ kbd_write_irqreg(keypad_data, OMAP4_KBD_IRQENABLE, OMAP4_VAL_IRQDISABLE); + kbd_writel(keypad_data, OMAP4_KBD_WAKEUPENABLE, 0); /* clear pending interrupts */ kbd_write_irqreg(keypad_data, OMAP4_KBD_IRQSTATUS, @@ -354,7 +356,7 @@ static int omap4_keypad_probe(struct platform_device *pdev) } error = request_threaded_irq(keypad_data->irq, omap4_keypad_irq_handler, - omap4_keypad_irq_thread_fn, 0, + omap4_keypad_irq_thread_fn, IRQF_ONESHOT, "omap4-keypad", keypad_data); if (error) { dev_err(&pdev->dev, "failed to register interrupt\n"); diff --git a/drivers/input/keyboard/st-keyscan.c b/drivers/input/keyboard/st-keyscan.c index de7be4f03d91..ebf9f643d910 100644 --- a/drivers/input/keyboard/st-keyscan.c +++ b/drivers/input/keyboard/st-keyscan.c @@ -153,6 +153,8 @@ static int keyscan_probe(struct platform_device *pdev) input_dev->id.bustype = BUS_HOST; + keypad_data->input_dev = input_dev; + error = keypad_matrix_key_parse_dt(keypad_data); if (error) return error; @@ -168,8 +170,6 @@ static int keyscan_probe(struct platform_device *pdev) input_set_drvdata(input_dev, keypad_data); - keypad_data->input_dev = input_dev; - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); keypad_data->base = devm_ioremap_resource(&pdev->dev, res); if (IS_ERR(keypad_data->base)) diff --git a/drivers/input/misc/bma150.c b/drivers/input/misc/bma150.c index 1d0e61d7c131..b6c1d1d482c1 100644 --- a/drivers/input/misc/bma150.c +++ b/drivers/input/misc/bma150.c @@ -482,13 +482,14 @@ static int bma150_register_input_device(struct bma150_data *bma150) idev->close = bma150_irq_close; input_set_drvdata(idev, bma150); + bma150->input = idev; + error = input_register_device(idev); if (error) { input_free_device(idev); return error; } - bma150->input = idev; return 0; } @@ -511,15 +512,15 @@ static int bma150_register_polled_device(struct bma150_data *bma150) bma150_init_input_device(bma150, ipoll_dev->input); + bma150->input_polled = ipoll_dev; + bma150->input = ipoll_dev->input; + error = input_register_polled_device(ipoll_dev); if (error) { input_free_polled_device(ipoll_dev); return error; } - bma150->input_polled = ipoll_dev; - bma150->input = ipoll_dev->input; - return 0; } diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c index 471984ec2db0..16f5d5660053 100644 --- a/drivers/input/mouse/elan_i2c_core.c +++ b/drivers/input/mouse/elan_i2c_core.c @@ -1241,6 +1241,7 @@ static const struct acpi_device_id elan_acpi_id[] = { { "ELAN0000", 0 }, { "ELAN0100", 0 }, { "ELAN0600", 0 }, + { "ELAN0601", 0 }, { "ELAN0602", 0 }, { "ELAN0605", 0 }, { "ELAN0608", 0 }, @@ -1250,6 +1251,7 @@ static const struct acpi_device_id elan_acpi_id[] = { { "ELAN060C", 0 }, { "ELAN0611", 0 }, { "ELAN0612", 0 }, + { "ELAN0617", 0 }, { "ELAN0618", 0 }, { "ELAN061C", 0 }, { "ELAN061D", 0 }, diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c index 84aead19622c..4c1e527f14a5 100644 --- a/drivers/input/mouse/elantech.c +++ b/drivers/input/mouse/elantech.c @@ -1121,6 +1121,8 @@ static int elantech_get_resolution_v4(struct psmouse *psmouse, * Asus UX31 0x361f00 20, 15, 0e clickpad * Asus UX32VD 0x361f02 00, 15, 0e clickpad * Avatar AVIU-145A2 0x361f00 ? clickpad + * Fujitsu CELSIUS H760 0x570f02 40, 14, 0c 3 hw buttons (**) + * Fujitsu CELSIUS H780 0x5d0f02 41, 16, 0d 3 hw buttons (**) * Fujitsu LIFEBOOK E544 0x470f00 d0, 12, 09 2 hw buttons * Fujitsu LIFEBOOK E546 0x470f00 50, 12, 09 2 hw buttons * Fujitsu LIFEBOOK E547 0x470f00 50, 12, 09 2 hw buttons @@ -1173,6 +1175,13 @@ static const struct dmi_system_id elantech_dmi_has_middle_button[] = { DMI_MATCH(DMI_PRODUCT_NAME, "CELSIUS H760"), }, }, + { + /* Fujitsu H780 also has a middle button */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"), + DMI_MATCH(DMI_PRODUCT_NAME, "CELSIUS H780"), + }, + }, #endif { } }; diff --git a/drivers/input/tablet/wacom_serial4.c b/drivers/input/tablet/wacom_serial4.c index 20ab802461e7..1d46b763aae6 100644 --- a/drivers/input/tablet/wacom_serial4.c +++ b/drivers/input/tablet/wacom_serial4.c @@ -187,6 +187,7 @@ enum { MODEL_DIGITIZER_II = 0x5544, /* UD */ MODEL_GRAPHIRE = 0x4554, /* ET */ MODEL_PENPARTNER = 0x4354, /* CT */ + MODEL_ARTPAD_II = 0x4B54, /* KT */ }; static void wacom_handle_model_response(struct wacom *wacom) @@ -245,6 +246,7 @@ static void wacom_handle_model_response(struct wacom *wacom) wacom->flags = F_HAS_STYLUS2 | F_HAS_SCROLLWHEEL; break; + case MODEL_ARTPAD_II: case MODEL_DIGITIZER_II: wacom->dev->name = "Wacom Digitizer II"; wacom->dev->id.version = MODEL_DIGITIZER_II; diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 52c36394dba5..0ad8b7c78a43 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -1982,6 +1982,7 @@ static void do_attach(struct iommu_dev_data *dev_data, static void do_detach(struct iommu_dev_data *dev_data) { + struct protection_domain *domain = dev_data->domain; struct amd_iommu *iommu; u16 alias; @@ -1997,10 +1998,6 @@ static void do_detach(struct iommu_dev_data *dev_data) iommu = amd_iommu_rlookup_table[dev_data->devid]; alias = dev_data->alias; - /* decrease reference counters */ - dev_data->domain->dev_iommu[iommu->index] -= 1; - dev_data->domain->dev_cnt -= 1; - /* Update data structures */ dev_data->domain = NULL; list_del(&dev_data->list); @@ -2010,6 +2007,16 @@ static void do_detach(struct iommu_dev_data *dev_data) /* Flush the DTE entry */ device_flush_dte(dev_data); + + /* Flush IOTLB */ + domain_flush_tlb_pde(domain); + + /* Wait for the flushes to finish */ + domain_flush_complete(domain); + + /* decrease reference counters - needs to happen after the flushes */ + domain->dev_iommu[iommu->index] -= 1; + domain->dev_cnt -= 1; } /* diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index fc6eb752ab35..eb9937225d64 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -683,7 +683,13 @@ static void queue_inc_cons(struct arm_smmu_queue *q) u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1; q->cons = Q_OVF(q, q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons); - writel(q->cons, q->cons_reg); + + /* + * Ensure that all CPU accesses (reads and writes) to the queue + * are complete before we update the cons pointer. + */ + mb(); + writel_relaxed(q->cons, q->cons_reg); } static int queue_sync_prod(struct arm_smmu_queue *q) diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index 5a63e32a4a6b..cbad1926cec1 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -143,7 +143,7 @@ dmar_alloc_pci_notify_info(struct pci_dev *dev, unsigned long event) for (tmp = dev; tmp; tmp = tmp->bus->self) level++; - size = sizeof(*info) + level * sizeof(struct acpi_dmar_pci_path); + size = sizeof(*info) + level * sizeof(info->path[0]); if (size <= sizeof(dmar_pci_notify_info_buf)) { info = (struct dmar_pci_notify_info *)dmar_pci_notify_info_buf; } else { diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 7feaa82f8c7c..3e97c4b2ebed 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -1598,6 +1598,9 @@ static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu) u32 pmen; unsigned long flags; + if (!cap_plmr(iommu->cap) && !cap_phmr(iommu->cap)) + return; + raw_spin_lock_irqsave(&iommu->register_lock, flags); pmen = readl(iommu->reg + DMAR_PMEN_REG); pmen &= ~DMA_PMEN_EPM; @@ -2041,7 +2044,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain, * than default. Unnecessary for PT mode. */ if (translation != CONTEXT_TT_PASS_THROUGH) { - for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) { + for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) { ret = -ENOMEM; pgd = phys_to_virt(dma_pte_addr(pgd)); if (!dma_pte_present(pgd)) @@ -2055,7 +2058,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain, translation = CONTEXT_TT_MULTI_LEVEL; context_set_address_root(context, virt_to_phys(pgd)); - context_set_address_width(context, iommu->agaw); + context_set_address_width(context, agaw); } else { /* * In pass through mode, AW must be programmed to diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index c3d7a1461043..114d5883d497 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -1230,13 +1230,14 @@ static void its_free_device(struct its_device *its_dev) kfree(its_dev); } -static int its_alloc_device_irq(struct its_device *dev, irq_hw_number_t *hwirq) +static int its_alloc_device_irq(struct its_device *dev, int nvecs, irq_hw_number_t *hwirq) { int idx; - idx = find_first_zero_bit(dev->event_map.lpi_map, - dev->event_map.nr_lpis); - if (idx == dev->event_map.nr_lpis) + idx = bitmap_find_free_region(dev->event_map.lpi_map, + dev->event_map.nr_lpis, + get_count_order(nvecs)); + if (idx < 0) return -ENOSPC; *hwirq = dev->event_map.lpi_base + idx; @@ -1317,20 +1318,20 @@ static int its_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, int err; int i; - for (i = 0; i < nr_irqs; i++) { - err = its_alloc_device_irq(its_dev, &hwirq); - if (err) - return err; + err = its_alloc_device_irq(its_dev, nr_irqs, &hwirq); + if (err) + return err; - err = its_irq_gic_domain_alloc(domain, virq + i, hwirq); + for (i = 0; i < nr_irqs; i++) { + err = its_irq_gic_domain_alloc(domain, virq + i, hwirq + i); if (err) return err; irq_domain_set_hwirq_and_chip(domain, virq + i, - hwirq, &its_irq_chip, its_dev); + hwirq + i, &its_irq_chip, its_dev); pr_debug("ID:%d pID:%d vID:%d\n", - (int)(hwirq - its_dev->event_map.lpi_base), - (int) hwirq, virq + i); + (int)(hwirq + i - its_dev->event_map.lpi_base), + (int)(hwirq + i), virq + i); } return 0; diff --git a/drivers/irqchip/irq-mmp.c b/drivers/irqchip/irq-mmp.c index 013fc9659a84..2fe2bcb63a71 100644 --- a/drivers/irqchip/irq-mmp.c +++ b/drivers/irqchip/irq-mmp.c @@ -34,6 +34,9 @@ #define SEL_INT_PENDING (1 << 6) #define SEL_INT_NUM_MASK 0x3f +#define MMP2_ICU_INT_ROUTE_PJ4_IRQ (1 << 5) +#define MMP2_ICU_INT_ROUTE_PJ4_FIQ (1 << 6) + struct icu_chip_data { int nr_irqs; unsigned int virq_base; @@ -190,7 +193,8 @@ static struct mmp_intc_conf mmp_conf = { static struct mmp_intc_conf mmp2_conf = { .conf_enable = 0x20, .conf_disable = 0x0, - .conf_mask = 0x7f, + .conf_mask = MMP2_ICU_INT_ROUTE_PJ4_IRQ | + MMP2_ICU_INT_ROUTE_PJ4_FIQ, }; static void __exception_irq_entry mmp_handle_irq(struct pt_regs *regs) diff --git a/drivers/isdn/capi/kcapi.c b/drivers/isdn/capi/kcapi.c index dd7e38ac29bd..d15347de415a 100644 --- a/drivers/isdn/capi/kcapi.c +++ b/drivers/isdn/capi/kcapi.c @@ -851,7 +851,7 @@ u16 capi20_get_manufacturer(u32 contr, u8 *buf) u16 ret; if (contr == 0) { - strlcpy(buf, capi_manufakturer, CAPI_MANUFACTURER_LEN); + strncpy(buf, capi_manufakturer, CAPI_MANUFACTURER_LEN); return CAPI_NOERROR; } @@ -859,7 +859,7 @@ u16 capi20_get_manufacturer(u32 contr, u8 *buf) ctr = get_capi_ctr_by_nr(contr); if (ctr && ctr->state == CAPI_CTR_RUNNING) { - strlcpy(buf, ctr->manu, CAPI_MANUFACTURER_LEN); + strncpy(buf, ctr->manu, CAPI_MANUFACTURER_LEN); ret = CAPI_NOERROR; } else ret = CAPI_REGNOTINSTALLED; diff --git a/drivers/isdn/hardware/avm/b1.c b/drivers/isdn/hardware/avm/b1.c index 4d9b195547c5..df2a10157720 100644 --- a/drivers/isdn/hardware/avm/b1.c +++ b/drivers/isdn/hardware/avm/b1.c @@ -423,7 +423,7 @@ void b1_parse_version(avmctrl_info *cinfo) int i, j; for (j = 0; j < AVM_MAXVERSION; j++) - cinfo->version[j] = "\0\0" + 1; + cinfo->version[j] = ""; for (i = 0, j = 0; j < AVM_MAXVERSION && i < cinfo->versionlen; j++, i += cinfo->versionbuf[i] + 1) diff --git a/drivers/isdn/hardware/mISDN/hfcmulti.c b/drivers/isdn/hardware/mISDN/hfcmulti.c index 28543d795188..9a27809bdaf2 100644 --- a/drivers/isdn/hardware/mISDN/hfcmulti.c +++ b/drivers/isdn/hardware/mISDN/hfcmulti.c @@ -4370,7 +4370,8 @@ setup_pci(struct hfc_multi *hc, struct pci_dev *pdev, if (m->clock2) test_and_set_bit(HFC_CHIP_CLOCK2, &hc->chip); - if (ent->device == 0xB410) { + if (ent->vendor == PCI_VENDOR_ID_DIGIUM && + ent->device == PCI_DEVICE_ID_DIGIUM_HFC4S) { test_and_set_bit(HFC_CHIP_B410P, &hc->chip); test_and_set_bit(HFC_CHIP_PCM_MASTER, &hc->chip); test_and_clear_bit(HFC_CHIP_PCM_SLAVE, &hc->chip); diff --git a/drivers/isdn/hisax/hfc_pci.c b/drivers/isdn/hisax/hfc_pci.c index 90449e1e91e5..1b1453d62fed 100644 --- a/drivers/isdn/hisax/hfc_pci.c +++ b/drivers/isdn/hisax/hfc_pci.c @@ -1169,11 +1169,13 @@ HFCPCI_l1hw(struct PStack *st, int pr, void *arg) if (cs->debug & L1_DEB_LAPD) debugl1(cs, "-> PH_REQUEST_PULL"); #endif + spin_lock_irqsave(&cs->lock, flags); if (!cs->tx_skb) { test_and_clear_bit(FLG_L1_PULL_REQ, &st->l1.Flags); st->l1.l1l2(st, PH_PULL | CONFIRM, NULL); } else test_and_set_bit(FLG_L1_PULL_REQ, &st->l1.Flags); + spin_unlock_irqrestore(&cs->lock, flags); break; case (HW_RESET | REQUEST): spin_lock_irqsave(&cs->lock, flags); diff --git a/drivers/isdn/i4l/isdn_tty.c b/drivers/isdn/i4l/isdn_tty.c index 2175225af742..2da3f5cd0729 100644 --- a/drivers/isdn/i4l/isdn_tty.c +++ b/drivers/isdn/i4l/isdn_tty.c @@ -786,7 +786,7 @@ isdn_tty_suspend(char *id, modem_info *info, atemu *m) cmd.parm.cmsg.para[3] = 4; /* 16 bit 0x0004 Suspend */ cmd.parm.cmsg.para[4] = 0; cmd.parm.cmsg.para[5] = l; - strncpy(&cmd.parm.cmsg.para[6], id, l); + strscpy(&cmd.parm.cmsg.para[6], id, l); cmd.command = CAPI_PUT_MESSAGE; cmd.driver = info->isdn_driver; cmd.arg = info->isdn_channel; @@ -1459,15 +1459,19 @@ isdn_tty_set_termios(struct tty_struct *tty, struct ktermios *old_termios) { modem_info *info = (modem_info *) tty->driver_data; + mutex_lock(&modem_info_mutex); if (!old_termios) isdn_tty_change_speed(info); else { if (tty->termios.c_cflag == old_termios->c_cflag && tty->termios.c_ispeed == old_termios->c_ispeed && - tty->termios.c_ospeed == old_termios->c_ospeed) + tty->termios.c_ospeed == old_termios->c_ospeed) { + mutex_unlock(&modem_info_mutex); return; + } isdn_tty_change_speed(info); } + mutex_unlock(&modem_info_mutex); } /* diff --git a/drivers/isdn/mISDN/timerdev.c b/drivers/isdn/mISDN/timerdev.c index 9438d7ec3308..8b29e97cf668 100644 --- a/drivers/isdn/mISDN/timerdev.c +++ b/drivers/isdn/mISDN/timerdev.c @@ -168,8 +168,8 @@ dev_expire_timer(unsigned long data) spin_lock_irqsave(&timer->dev->lock, flags); if (timer->id >= 0) list_move_tail(&timer->list, &timer->dev->expired); - spin_unlock_irqrestore(&timer->dev->lock, flags); wake_up_interruptible(&timer->dev->wait); + spin_unlock_irqrestore(&timer->dev->lock, flags); } static int diff --git a/drivers/leds/leds-lp5523.c b/drivers/leds/leds-lp5523.c index 1d0187f42941..d12370352ae3 100644 --- a/drivers/leds/leds-lp5523.c +++ b/drivers/leds/leds-lp5523.c @@ -318,7 +318,9 @@ static int lp5523_init_program_engine(struct lp55xx_chip *chip) /* Let the programs run for couple of ms and check the engine status */ usleep_range(3000, 6000); - lp55xx_read(chip, LP5523_REG_STATUS, &status); + ret = lp55xx_read(chip, LP5523_REG_STATUS, &status); + if (ret) + return ret; status &= LP5523_ENG_STATUS_MASK; if (status != LP5523_ENG_STATUS_MASK) { diff --git a/drivers/leds/leds-lp55xx-common.c b/drivers/leds/leds-lp55xx-common.c index 59b76833f0d3..fd077c176a62 100644 --- a/drivers/leds/leds-lp55xx-common.c +++ b/drivers/leds/leds-lp55xx-common.c @@ -200,7 +200,7 @@ static void lp55xx_firmware_loaded(const struct firmware *fw, void *context) if (!fw) { dev_err(dev, "firmware request failed\n"); - goto out; + return; } /* handling firmware data is chip dependent */ @@ -213,9 +213,9 @@ static void lp55xx_firmware_loaded(const struct firmware *fw, void *context) mutex_unlock(&chip->lock); -out: /* firmware should be released for other channel use */ release_firmware(chip->fw); + chip->fw = NULL; } static int lp55xx_request_firmware(struct lp55xx_chip *chip) diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 95de7ee56dfc..44b2db61859a 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -459,21 +459,6 @@ config DM_VERITY If unsure, say N. -config DM_VERITY_HASH_PREFETCH_MIN_SIZE_128 - bool "Prefetch size 128" - -config DM_VERITY_HASH_PREFETCH_MIN_SIZE - int "Verity hash prefetch minimum size" - depends on DM_VERITY - range 1 4096 - default 128 if DM_VERITY_HASH_PREFETCH_MIN_SIZE_128 - default 1 - ---help--- - This sets minimum number of hash blocks to prefetch for dm-verity. - For devices like eMMC, having larger prefetch size like 128 can improve - performance with increased memory consumption for keeping more hashes - in RAM. - config DM_VERITY_FEC bool "Verity forward error correction support" depends on DM_VERITY @@ -537,7 +522,6 @@ config DM_ANDROID_VERITY depends on ASYMMETRIC_KEY_TYPE depends on ASYMMETRIC_PUBLIC_KEY_SUBTYPE depends on MD_LINEAR=y - select DM_VERITY_HASH_PREFETCH_MIN_SIZE_128 ---help--- This device-mapper target is virtually a VERITY target. This target is setup by reading the metadata contents piggybacked diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index 5a5c1f1bd8a5..463ce6757338 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -215,7 +215,9 @@ STORE(__cached_dev) d_strtoul(writeback_rate_d_term); d_strtoul_nonzero(writeback_rate_p_term_inverse); - d_strtoi_h(sequential_cutoff); + sysfs_strtoul_clamp(sequential_cutoff, + dc->sequential_cutoff, + 0, UINT_MAX); d_strtoi_h(readahead); if (attr == &sysfs_clear_stats) @@ -645,8 +647,17 @@ STORE(__bch_cache_set) c->error_limit = strtoul_or_return(buf) << IO_ERROR_SHIFT; /* See count_io_errors() for why 88 */ - if (attr == &sysfs_io_error_halflife) - c->error_decay = strtoul_or_return(buf) / 88; + if (attr == &sysfs_io_error_halflife) { + unsigned long v = 0; + ssize_t ret; + + ret = strtoul_safe_clamp(buf, v, 0, UINT_MAX); + if (!ret) { + c->error_decay = v / 88; + return size; + } + return ret; + } sysfs_strtoul(journal_delay_ms, c->journal_delay_ms); sysfs_strtoul(verify, c->verify); diff --git a/drivers/md/bcache/sysfs.h b/drivers/md/bcache/sysfs.h index 0526fe92a683..e7a3c12aa66f 100644 --- a/drivers/md/bcache/sysfs.h +++ b/drivers/md/bcache/sysfs.h @@ -80,9 +80,16 @@ do { \ #define sysfs_strtoul_clamp(file, var, min, max) \ do { \ - if (attr == &sysfs_ ## file) \ - return strtoul_safe_clamp(buf, var, min, max) \ - ?: (ssize_t) size; \ + if (attr == &sysfs_ ## file) { \ + unsigned long v = 0; \ + ssize_t ret; \ + ret = strtoul_safe_clamp(buf, v, min, max); \ + if (!ret) { \ + var = v; \ + return size; \ + } \ + return ret; \ + } \ } while (0) #define strtoul_or_return(cp) \ diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c index 54c308e6704f..04248394843e 100644 --- a/drivers/md/dm-kcopyd.c +++ b/drivers/md/dm-kcopyd.c @@ -55,15 +55,17 @@ struct dm_kcopyd_client { struct dm_kcopyd_throttle *throttle; /* - * We maintain three lists of jobs: + * We maintain four lists of jobs: * * i) jobs waiting for pages * ii) jobs that have pages, and are waiting for the io to be issued. - * iii) jobs that have completed. + * iii) jobs that don't need to do any IO and just run a callback + * iv) jobs that have completed. * - * All three of these are protected by job_lock. + * All four of these are protected by job_lock. */ spinlock_t job_lock; + struct list_head callback_jobs; struct list_head complete_jobs; struct list_head io_jobs; struct list_head pages_jobs; @@ -583,6 +585,7 @@ static void do_work(struct work_struct *work) struct dm_kcopyd_client *kc = container_of(work, struct dm_kcopyd_client, kcopyd_work); struct blk_plug plug; + unsigned long flags; /* * The order that these are called is *very* important. @@ -591,6 +594,10 @@ static void do_work(struct work_struct *work) * list. io jobs call wake when they complete and it all * starts again. */ + spin_lock_irqsave(&kc->job_lock, flags); + list_splice_tail_init(&kc->callback_jobs, &kc->complete_jobs); + spin_unlock_irqrestore(&kc->job_lock, flags); + blk_start_plug(&plug); process_jobs(&kc->complete_jobs, kc, run_complete_job); process_jobs(&kc->pages_jobs, kc, run_pages_job); @@ -608,7 +615,7 @@ static void dispatch_job(struct kcopyd_job *job) struct dm_kcopyd_client *kc = job->kc; atomic_inc(&kc->nr_jobs); if (unlikely(!job->source.count)) - push(&kc->complete_jobs, job); + push(&kc->callback_jobs, job); else if (job->pages == &zero_page_list) push(&kc->io_jobs, job); else @@ -795,7 +802,7 @@ void dm_kcopyd_do_callback(void *j, int read_err, unsigned long write_err) job->read_err = read_err; job->write_err = write_err; - push(&kc->complete_jobs, job); + push(&kc->callback_jobs, job); wake(kc); } EXPORT_SYMBOL(dm_kcopyd_do_callback); @@ -825,6 +832,7 @@ struct dm_kcopyd_client *dm_kcopyd_client_create(struct dm_kcopyd_throttle *thro return ERR_PTR(-ENOMEM); spin_lock_init(&kc->job_lock); + INIT_LIST_HEAD(&kc->callback_jobs); INIT_LIST_HEAD(&kc->complete_jobs); INIT_LIST_HEAD(&kc->io_jobs); INIT_LIST_HEAD(&kc->pages_jobs); @@ -874,6 +882,7 @@ void dm_kcopyd_client_destroy(struct dm_kcopyd_client *kc) /* Wait for completion of all jobs submitted by this client. */ wait_event(kc->destroyq, !atomic_read(&kc->nr_jobs)); + BUG_ON(!list_empty(&kc->callback_jobs)); BUG_ON(!list_empty(&kc->complete_jobs)); BUG_ON(!list_empty(&kc->io_jobs)); BUG_ON(!list_empty(&kc->pages_jobs)); diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index e4d1bafe78c1..2a855e5429ab 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "dm.h" @@ -105,6 +106,9 @@ struct dm_snapshot { /* The on disk metadata handler */ struct dm_exception_store *store; + /* Maximum number of in-flight COW jobs. */ + struct semaphore cow_count; + struct dm_kcopyd_client *kcopyd_client; /* Wait for events based on state_bits */ @@ -145,6 +149,19 @@ struct dm_snapshot { #define RUNNING_MERGE 0 #define SHUTDOWN_MERGE 1 +/* + * Maximum number of chunks being copied on write. + * + * The value was decided experimentally as a trade-off between memory + * consumption, stalling the kernel's workqueues and maintaining a high enough + * throughput. + */ +#define DEFAULT_COW_THRESHOLD 2048 + +static int cow_threshold = DEFAULT_COW_THRESHOLD; +module_param_named(snapshot_cow_threshold, cow_threshold, int, 0644); +MODULE_PARM_DESC(snapshot_cow_threshold, "Maximum number of chunks being copied on write"); + DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(snapshot_copy_throttle, "A percentage of time allocated for copy on write"); @@ -1189,6 +1206,8 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad_hash_tables; } + sema_init(&s->cow_count, (cow_threshold > 0) ? cow_threshold : INT_MAX); + s->kcopyd_client = dm_kcopyd_client_create(&dm_kcopyd_throttle); if (IS_ERR(s->kcopyd_client)) { r = PTR_ERR(s->kcopyd_client); @@ -1560,6 +1579,7 @@ static void copy_callback(int read_err, unsigned long write_err, void *context) } list_add(&pe->out_of_order_entry, lh); } + up(&s->cow_count); } /* @@ -1583,6 +1603,7 @@ static void start_copy(struct dm_snap_pending_exception *pe) dest.count = src.count; /* Hand over to kcopyd */ + down(&s->cow_count); dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 0, copy_callback, pe); } @@ -1602,6 +1623,7 @@ static void start_full_bio(struct dm_snap_pending_exception *pe, pe->full_bio = bio; pe->full_bio_end_io = bio->bi_end_io; + down(&s->cow_count); callback_data = dm_kcopyd_prepare_callback(s->kcopyd_client, copy_callback, pe); diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 265964e63582..9f021b4532b3 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1552,6 +1552,9 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, smp_mb(); if (dm_table_request_based(t)) queue_flag_set_unlocked(QUEUE_FLAG_STACKABLE, q); + + /* io_pages is used for readahead */ + q->backing_dev_info.io_pages = limits->max_sectors >> (PAGE_SHIFT - 9); } unsigned int dm_table_get_num_targets(struct dm_table *t) diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index bc4e6825ff62..d52ea584e0bc 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -256,6 +256,7 @@ struct pool { spinlock_t lock; struct bio_list deferred_flush_bios; + struct bio_list deferred_flush_completions; struct list_head prepared_mappings; struct list_head prepared_discards; struct list_head active_thins; @@ -920,6 +921,39 @@ static void process_prepared_mapping_fail(struct dm_thin_new_mapping *m) mempool_free(m, m->tc->pool->mapping_pool); } +static void complete_overwrite_bio(struct thin_c *tc, struct bio *bio) +{ + struct pool *pool = tc->pool; + unsigned long flags; + + /* + * If the bio has the REQ_FUA flag set we must commit the metadata + * before signaling its completion. + */ + if (!bio_triggers_commit(tc, bio)) { + bio_endio(bio); + return; + } + + /* + * Complete bio with an error if earlier I/O caused changes to the + * metadata that can't be committed, e.g, due to I/O errors on the + * metadata device. + */ + if (dm_thin_aborted_changes(tc->td)) { + bio_io_error(bio); + return; + } + + /* + * Batch together any bios that trigger commits and then issue a + * single commit for them in process_deferred_bios(). + */ + spin_lock_irqsave(&pool->lock, flags); + bio_list_add(&pool->deferred_flush_completions, bio); + spin_unlock_irqrestore(&pool->lock, flags); +} + static void process_prepared_mapping(struct dm_thin_new_mapping *m) { struct thin_c *tc = m->tc; @@ -952,7 +986,7 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m) */ if (bio) { inc_remap_and_issue_cell(tc, m->cell, m->data_block); - bio_endio(bio); + complete_overwrite_bio(tc, bio); } else { inc_all_io_entry(tc->pool, m->cell->holder); remap_and_issue(tc, m->cell->holder, m->data_block); @@ -2228,7 +2262,7 @@ static void process_deferred_bios(struct pool *pool) { unsigned long flags; struct bio *bio; - struct bio_list bios; + struct bio_list bios, bio_completions; struct thin_c *tc; tc = get_first_thin(pool); @@ -2239,26 +2273,36 @@ static void process_deferred_bios(struct pool *pool) } /* - * If there are any deferred flush bios, we must commit - * the metadata before issuing them. + * If there are any deferred flush bios, we must commit the metadata + * before issuing them or signaling their completion. */ bio_list_init(&bios); + bio_list_init(&bio_completions); + spin_lock_irqsave(&pool->lock, flags); bio_list_merge(&bios, &pool->deferred_flush_bios); bio_list_init(&pool->deferred_flush_bios); + + bio_list_merge(&bio_completions, &pool->deferred_flush_completions); + bio_list_init(&pool->deferred_flush_completions); spin_unlock_irqrestore(&pool->lock, flags); - if (bio_list_empty(&bios) && + if (bio_list_empty(&bios) && bio_list_empty(&bio_completions) && !(dm_pool_changed_this_transaction(pool->pmd) && need_commit_due_to_time(pool))) return; if (commit(pool)) { + bio_list_merge(&bios, &bio_completions); + while ((bio = bio_list_pop(&bios))) bio_io_error(bio); return; } pool->last_commit_jiffies = jiffies; + while ((bio = bio_list_pop(&bio_completions))) + bio_endio(bio); + while ((bio = bio_list_pop(&bios))) generic_make_request(bio); } @@ -2885,6 +2929,7 @@ static struct pool *pool_create(struct mapped_device *pool_md, INIT_DELAYED_WORK(&pool->no_space_timeout, do_no_space_timeout); spin_lock_init(&pool->lock); bio_list_init(&pool->deferred_flush_bios); + bio_list_init(&pool->deferred_flush_completions); INIT_LIST_HEAD(&pool->prepared_mappings); INIT_LIST_HEAD(&pool->prepared_discards); INIT_LIST_HEAD(&pool->active_thins); @@ -3165,6 +3210,13 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) as.argc = argc; as.argv = argv; + /* make sure metadata and data are different devices */ + if (!strcmp(argv[0], argv[1])) { + ti->error = "Error setting metadata or data device"; + r = -EINVAL; + goto out_unlock; + } + /* * Set default pool features. */ @@ -4047,6 +4099,12 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv) tc->sort_bio_list = RB_ROOT; if (argc == 3) { + if (!strcmp(argv[0], argv[2])) { + ti->error = "Error setting origin device"; + r = -EINVAL; + goto bad_origin_dev; + } + r = dm_get_device(ti, argv[2], FMODE_READ, &origin_dev); if (r) { ti->error = "Error opening origin device"; diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index d2e3abc182b3..131077aabd08 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -529,7 +529,6 @@ static void verity_prefetch_io(struct work_struct *work) container_of(work, struct dm_verity_prefetch_work, work); struct dm_verity *v = pw->v; int i; - sector_t prefetch_size; for (i = v->levels - 2; i >= 0; i--) { sector_t hash_block_start; @@ -552,14 +551,8 @@ static void verity_prefetch_io(struct work_struct *work) hash_block_end = v->hash_blocks - 1; } no_prefetch_cluster: - // for emmc, it is more efficient to send bigger read - prefetch_size = max((sector_t)CONFIG_DM_VERITY_HASH_PREFETCH_MIN_SIZE, - hash_block_end - hash_block_start + 1); - if ((hash_block_start + prefetch_size) >= (v->hash_start + v->hash_blocks)) { - prefetch_size = hash_block_end - hash_block_start + 1; - } dm_bufio_prefetch(v->bufio, hash_block_start, - prefetch_size); + hash_block_end - hash_block_start + 1); } kfree(pw); diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 8d613652d0e2..69e9abf00c74 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -3755,6 +3755,8 @@ static int run(struct mddev *mddev) set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); mddev->sync_thread = md_register_thread(md_do_sync, mddev, "reshape"); + if (!mddev->sync_thread) + goto out_free_conf; } return 0; @@ -4442,7 +4444,6 @@ bio_full: atomic_inc(&r10_bio->remaining); read_bio->bi_next = NULL; generic_make_request(read_bio); - sector_nr += nr_sectors; sectors_done += nr_sectors; if (sector_nr <= last) goto read_more; diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 0841d8f10a58..5e65dc6def7e 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -6973,6 +6973,8 @@ static int run(struct mddev *mddev) set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); mddev->sync_thread = md_register_thread(md_do_sync, mddev, "reshape"); + if (!mddev->sync_thread) + goto abort; } /* Ok, everything is just fine now */ diff --git a/drivers/media/dvb-frontends/ascot2e.c b/drivers/media/dvb-frontends/ascot2e.c index f770f6a2c987..3ea9edc8cdbe 100644 --- a/drivers/media/dvb-frontends/ascot2e.c +++ b/drivers/media/dvb-frontends/ascot2e.c @@ -155,7 +155,9 @@ static int ascot2e_write_regs(struct ascot2e_priv *priv, static int ascot2e_write_reg(struct ascot2e_priv *priv, u8 reg, u8 val) { - return ascot2e_write_regs(priv, reg, &val, 1); + u8 tmp = val; /* see gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */ + + return ascot2e_write_regs(priv, reg, &tmp, 1); } static int ascot2e_read_regs(struct ascot2e_priv *priv, diff --git a/drivers/media/dvb-frontends/cxd2841er.c b/drivers/media/dvb-frontends/cxd2841er.c index 107853b0fddd..bde77671a37c 100644 --- a/drivers/media/dvb-frontends/cxd2841er.c +++ b/drivers/media/dvb-frontends/cxd2841er.c @@ -241,7 +241,9 @@ static int cxd2841er_write_regs(struct cxd2841er_priv *priv, static int cxd2841er_write_reg(struct cxd2841er_priv *priv, u8 addr, u8 reg, u8 val) { - return cxd2841er_write_regs(priv, addr, reg, &val, 1); + u8 tmp = val; /* see gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */ + + return cxd2841er_write_regs(priv, addr, reg, &tmp, 1); } static int cxd2841er_read_regs(struct cxd2841er_priv *priv, diff --git a/drivers/media/dvb-frontends/horus3a.c b/drivers/media/dvb-frontends/horus3a.c index 000606af70f7..f770ab72a8e3 100644 --- a/drivers/media/dvb-frontends/horus3a.c +++ b/drivers/media/dvb-frontends/horus3a.c @@ -89,7 +89,9 @@ static int horus3a_write_regs(struct horus3a_priv *priv, static int horus3a_write_reg(struct horus3a_priv *priv, u8 reg, u8 val) { - return horus3a_write_regs(priv, reg, &val, 1); + u8 tmp = val; /* see gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */ + + return horus3a_write_regs(priv, reg, &tmp, 1); } static int horus3a_enter_power_save(struct horus3a_priv *priv) diff --git a/drivers/media/dvb-frontends/itd1000.c b/drivers/media/dvb-frontends/itd1000.c index cadcae4cff89..ac9d2591bb6f 100644 --- a/drivers/media/dvb-frontends/itd1000.c +++ b/drivers/media/dvb-frontends/itd1000.c @@ -99,8 +99,9 @@ static int itd1000_read_reg(struct itd1000_state *state, u8 reg) static inline int itd1000_write_reg(struct itd1000_state *state, u8 r, u8 v) { - int ret = itd1000_write_regs(state, r, &v, 1); - state->shadow[r] = v; + u8 tmp = v; /* see gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */ + int ret = itd1000_write_regs(state, r, &tmp, 1); + state->shadow[r] = tmp; return ret; } diff --git a/drivers/media/dvb-frontends/mt312.c b/drivers/media/dvb-frontends/mt312.c index c36e6764eead..c44188271028 100644 --- a/drivers/media/dvb-frontends/mt312.c +++ b/drivers/media/dvb-frontends/mt312.c @@ -142,7 +142,10 @@ static inline int mt312_readreg(struct mt312_state *state, static inline int mt312_writereg(struct mt312_state *state, const enum mt312_reg_addr reg, const u8 val) { - return mt312_write(state, reg, &val, 1); + u8 tmp = val; /* see gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */ + + + return mt312_write(state, reg, &tmp, 1); } static inline u32 mt312_div(u32 a, u32 b) diff --git a/drivers/media/dvb-frontends/stb0899_drv.c b/drivers/media/dvb-frontends/stb0899_drv.c index 756650f154ab..ad9b7d4f8d95 100644 --- a/drivers/media/dvb-frontends/stb0899_drv.c +++ b/drivers/media/dvb-frontends/stb0899_drv.c @@ -552,7 +552,8 @@ int stb0899_write_regs(struct stb0899_state *state, unsigned int reg, u8 *data, int stb0899_write_reg(struct stb0899_state *state, unsigned int reg, u8 data) { - return stb0899_write_regs(state, reg, &data, 1); + u8 tmp = data; + return stb0899_write_regs(state, reg, &tmp, 1); } /* diff --git a/drivers/media/dvb-frontends/stb6100.c b/drivers/media/dvb-frontends/stb6100.c index 4ef8a5c7003e..44fac2570034 100644 --- a/drivers/media/dvb-frontends/stb6100.c +++ b/drivers/media/dvb-frontends/stb6100.c @@ -226,12 +226,14 @@ static int stb6100_write_reg_range(struct stb6100_state *state, u8 buf[], int st static int stb6100_write_reg(struct stb6100_state *state, u8 reg, u8 data) { + u8 tmp = data; /* see gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */ + if (unlikely(reg >= STB6100_NUMREGS)) { dprintk(verbose, FE_ERROR, 1, "Invalid register offset 0x%x", reg); return -EREMOTEIO; } - data = (data & stb6100_template[reg].mask) | stb6100_template[reg].set; - return stb6100_write_reg_range(state, &data, reg, 1); + tmp = (tmp & stb6100_template[reg].mask) | stb6100_template[reg].set; + return stb6100_write_reg_range(state, &tmp, reg, 1); } diff --git a/drivers/media/dvb-frontends/stv0367.c b/drivers/media/dvb-frontends/stv0367.c index 44cb73f68af6..ddd0d778ad6e 100644 --- a/drivers/media/dvb-frontends/stv0367.c +++ b/drivers/media/dvb-frontends/stv0367.c @@ -804,7 +804,9 @@ int stv0367_writeregs(struct stv0367_state *state, u16 reg, u8 *data, int len) static int stv0367_writereg(struct stv0367_state *state, u16 reg, u8 data) { - return stv0367_writeregs(state, reg, &data, 1); + u8 tmp = data; /* see gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */ + + return stv0367_writeregs(state, reg, &tmp, 1); } static u8 stv0367_readreg(struct stv0367_state *state, u16 reg) diff --git a/drivers/media/dvb-frontends/stv090x.c b/drivers/media/dvb-frontends/stv090x.c index 25bdf6e0f963..f0377e2b341b 100644 --- a/drivers/media/dvb-frontends/stv090x.c +++ b/drivers/media/dvb-frontends/stv090x.c @@ -761,7 +761,9 @@ static int stv090x_write_regs(struct stv090x_state *state, unsigned int reg, u8 static int stv090x_write_reg(struct stv090x_state *state, unsigned int reg, u8 data) { - return stv090x_write_regs(state, reg, &data, 1); + u8 tmp = data; /* see gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */ + + return stv090x_write_regs(state, reg, &tmp, 1); } static int stv090x_i2c_gate_ctrl(struct stv090x_state *state, int enable) diff --git a/drivers/media/dvb-frontends/stv6110x.c b/drivers/media/dvb-frontends/stv6110x.c index e66154e5c1d7..45d14869e7b8 100644 --- a/drivers/media/dvb-frontends/stv6110x.c +++ b/drivers/media/dvb-frontends/stv6110x.c @@ -97,7 +97,9 @@ static int stv6110x_write_regs(struct stv6110x_state *stv6110x, int start, u8 da static int stv6110x_write_reg(struct stv6110x_state *stv6110x, u8 reg, u8 data) { - return stv6110x_write_regs(stv6110x, reg, &data, 1); + u8 tmp = data; /* see gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */ + + return stv6110x_write_regs(stv6110x, reg, &tmp, 1); } static int stv6110x_init(struct dvb_frontend *fe) diff --git a/drivers/media/dvb-frontends/zl10039.c b/drivers/media/dvb-frontends/zl10039.c index ee09ec26c553..b273e4fd8024 100644 --- a/drivers/media/dvb-frontends/zl10039.c +++ b/drivers/media/dvb-frontends/zl10039.c @@ -138,7 +138,9 @@ static inline int zl10039_writereg(struct zl10039_state *state, const enum zl10039_reg_addr reg, const u8 val) { - return zl10039_write(state, reg, &val, 1); + const u8 tmp = val; /* see gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */ + + return zl10039_write(state, reg, &tmp, 1); } static int zl10039_init(struct dvb_frontend *fe) diff --git a/drivers/media/firewire/firedtv-avc.c b/drivers/media/firewire/firedtv-avc.c index 251a556112a9..280b5ffea592 100644 --- a/drivers/media/firewire/firedtv-avc.c +++ b/drivers/media/firewire/firedtv-avc.c @@ -968,7 +968,8 @@ static int get_ca_object_length(struct avc_response_frame *r) return r->operand[7]; } -int avc_ca_app_info(struct firedtv *fdtv, char *app_info, unsigned int *len) +int avc_ca_app_info(struct firedtv *fdtv, unsigned char *app_info, + unsigned int *len) { struct avc_command_frame *c = (void *)fdtv->avc_data; struct avc_response_frame *r = (void *)fdtv->avc_data; @@ -1009,7 +1010,8 @@ out: return ret; } -int avc_ca_info(struct firedtv *fdtv, char *app_info, unsigned int *len) +int avc_ca_info(struct firedtv *fdtv, unsigned char *app_info, + unsigned int *len) { struct avc_command_frame *c = (void *)fdtv->avc_data; struct avc_response_frame *r = (void *)fdtv->avc_data; diff --git a/drivers/media/firewire/firedtv.h b/drivers/media/firewire/firedtv.h index 345d1eda8c05..5b18a08c6285 100644 --- a/drivers/media/firewire/firedtv.h +++ b/drivers/media/firewire/firedtv.h @@ -124,8 +124,10 @@ int avc_lnb_control(struct firedtv *fdtv, char voltage, char burst, struct dvb_diseqc_master_cmd *diseqcmd); void avc_remote_ctrl_work(struct work_struct *work); int avc_register_remote_control(struct firedtv *fdtv); -int avc_ca_app_info(struct firedtv *fdtv, char *app_info, unsigned int *len); -int avc_ca_info(struct firedtv *fdtv, char *app_info, unsigned int *len); +int avc_ca_app_info(struct firedtv *fdtv, unsigned char *app_info, + unsigned int *len); +int avc_ca_info(struct firedtv *fdtv, unsigned char *app_info, + unsigned int *len); int avc_ca_reset(struct firedtv *fdtv); int avc_ca_pmt(struct firedtv *fdtv, char *app_info, int length); int avc_ca_get_time_date(struct firedtv *fdtv, int *interval); diff --git a/drivers/media/i2c/soc_camera/mt9m111.c b/drivers/media/i2c/soc_camera/mt9m111.c index 6dfaead6aaa8..1d1ca03c797f 100644 --- a/drivers/media/i2c/soc_camera/mt9m111.c +++ b/drivers/media/i2c/soc_camera/mt9m111.c @@ -988,6 +988,8 @@ static int mt9m111_probe(struct i2c_client *client, mt9m111->rect.top = MT9M111_MIN_DARK_ROWS; mt9m111->rect.width = MT9M111_MAX_WIDTH; mt9m111->rect.height = MT9M111_MAX_HEIGHT; + mt9m111->width = mt9m111->rect.width; + mt9m111->height = mt9m111->rect.height; mt9m111->fmt = &mt9m111_colour_fmts[0]; mt9m111->lastpage = -1; mutex_init(&mt9m111->power_lock); diff --git a/drivers/media/pci/ivtv/ivtv-udma.c b/drivers/media/pci/ivtv/ivtv-udma.c index 24152accc66c..8729fdebef8f 100644 --- a/drivers/media/pci/ivtv/ivtv-udma.c +++ b/drivers/media/pci/ivtv/ivtv-udma.c @@ -125,7 +125,8 @@ int ivtv_udma_setup(struct ivtv *itv, unsigned long ivtv_dest_addr, /* Get user pages for DMA Xfer */ err = get_user_pages_unlocked(current, current->mm, - user_dma.uaddr, user_dma.page_count, 0, 1, dma->map); + user_dma.uaddr, user_dma.page_count, dma->map, + FOLL_FORCE); if (user_dma.page_count != err) { IVTV_DEBUG_WARN("failed to map user pages, returned %d instead of %d\n", diff --git a/drivers/media/pci/ivtv/ivtv-yuv.c b/drivers/media/pci/ivtv/ivtv-yuv.c index 2b8e7b2f2b86..9cd995f418e0 100644 --- a/drivers/media/pci/ivtv/ivtv-yuv.c +++ b/drivers/media/pci/ivtv/ivtv-yuv.c @@ -76,13 +76,13 @@ static int ivtv_yuv_prep_user_dma(struct ivtv *itv, struct ivtv_user_dma *dma, /* Get user pages for DMA Xfer */ y_pages = get_user_pages_unlocked(current, current->mm, - y_dma.uaddr, y_dma.page_count, 0, 1, - &dma->map[0]); + y_dma.uaddr, y_dma.page_count, + &dma->map[0], FOLL_FORCE); uv_pages = 0; /* silence gcc. value is set and consumed only if: */ if (y_pages == y_dma.page_count) { uv_pages = get_user_pages_unlocked(current, current->mm, - uv_dma.uaddr, uv_dma.page_count, 0, 1, - &dma->map[y_pages]); + uv_dma.uaddr, uv_dma.page_count, + &dma->map[y_pages], FOLL_FORCE); } if (y_pages != y_dma.page_count || uv_pages != uv_dma.page_count) { diff --git a/drivers/media/platform/davinci/vpbe.c b/drivers/media/platform/davinci/vpbe.c index 9a6c2cc38acb..abce9c4a1a8e 100644 --- a/drivers/media/platform/davinci/vpbe.c +++ b/drivers/media/platform/davinci/vpbe.c @@ -753,7 +753,7 @@ static int vpbe_initialize(struct device *dev, struct vpbe_device *vpbe_dev) if (ret) { v4l2_err(&vpbe_dev->v4l2_dev, "Failed to set default output %s", def_output); - return ret; + goto fail_kfree_amp; } printk(KERN_NOTICE "Setting default mode to %s\n", def_mode); @@ -761,12 +761,15 @@ static int vpbe_initialize(struct device *dev, struct vpbe_device *vpbe_dev) if (ret) { v4l2_err(&vpbe_dev->v4l2_dev, "Failed to set default mode %s", def_mode); - return ret; + goto fail_kfree_amp; } vpbe_dev->initialized = 1; /* TBD handling of bootargs for default output and mode */ return 0; +fail_kfree_amp: + mutex_lock(&vpbe_dev->lock); + kfree(vpbe_dev->amp); fail_kfree_encoders: kfree(vpbe_dev->encoders); fail_dev_unregister: diff --git a/drivers/media/platform/mx2_emmaprp.c b/drivers/media/platform/mx2_emmaprp.c index 03a1b606655d..009a4bb77d05 100644 --- a/drivers/media/platform/mx2_emmaprp.c +++ b/drivers/media/platform/mx2_emmaprp.c @@ -289,7 +289,7 @@ static void emmaprp_device_run(void *priv) { struct emmaprp_ctx *ctx = priv; struct emmaprp_q_data *s_q_data, *d_q_data; - struct vb2_buffer *src_buf, *dst_buf; + struct vb2_v4l2_buffer *src_buf, *dst_buf; struct emmaprp_dev *pcdev = ctx->dev; unsigned int s_width, s_height; unsigned int d_width, d_height; @@ -309,8 +309,8 @@ static void emmaprp_device_run(void *priv) d_height = d_q_data->height; d_size = d_width * d_height; - p_in = vb2_dma_contig_plane_dma_addr(src_buf, 0); - p_out = vb2_dma_contig_plane_dma_addr(dst_buf, 0); + p_in = vb2_dma_contig_plane_dma_addr(&src_buf->vb2_buf, 0); + p_out = vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0); if (!p_in || !p_out) { v4l2_err(&pcdev->v4l2_dev, "Acquiring kernel pointers to buffers failed\n"); diff --git a/drivers/media/platform/omap/omap_vout.c b/drivers/media/platform/omap/omap_vout.c index 70c28d19ea04..596359576109 100644 --- a/drivers/media/platform/omap/omap_vout.c +++ b/drivers/media/platform/omap/omap_vout.c @@ -214,7 +214,7 @@ static int omap_vout_get_userptr(struct videobuf_buffer *vb, u32 virtp, if (!vec) return -ENOMEM; - ret = get_vaddr_frames(virtp, 1, true, false, vec); + ret = get_vaddr_frames(virtp, 1, FOLL_WRITE, vec); if (ret != 1) { frame_vector_destroy(vec); return -EINVAL; diff --git a/drivers/media/platform/s5p-g2d/g2d.c b/drivers/media/platform/s5p-g2d/g2d.c index e1936d9d27da..2b939555cccb 100644 --- a/drivers/media/platform/s5p-g2d/g2d.c +++ b/drivers/media/platform/s5p-g2d/g2d.c @@ -497,7 +497,7 @@ static void device_run(void *prv) { struct g2d_ctx *ctx = prv; struct g2d_dev *dev = ctx->dev; - struct vb2_buffer *src, *dst; + struct vb2_v4l2_buffer *src, *dst; unsigned long flags; u32 cmd = 0; @@ -512,10 +512,10 @@ static void device_run(void *prv) spin_lock_irqsave(&dev->ctrl_lock, flags); g2d_set_src_size(dev, &ctx->in); - g2d_set_src_addr(dev, vb2_dma_contig_plane_dma_addr(src, 0)); + g2d_set_src_addr(dev, vb2_dma_contig_plane_dma_addr(&src->vb2_buf, 0)); g2d_set_dst_size(dev, &ctx->out); - g2d_set_dst_addr(dev, vb2_dma_contig_plane_dma_addr(dst, 0)); + g2d_set_dst_addr(dev, vb2_dma_contig_plane_dma_addr(&dst->vb2_buf, 0)); g2d_set_rop4(dev, ctx->rop); g2d_set_flip(dev, ctx->flip); diff --git a/drivers/media/platform/s5p-jpeg/jpeg-core.c b/drivers/media/platform/s5p-jpeg/jpeg-core.c index 9c6fc09b88e0..0d981bbf38bc 100644 --- a/drivers/media/platform/s5p-jpeg/jpeg-core.c +++ b/drivers/media/platform/s5p-jpeg/jpeg-core.c @@ -788,14 +788,14 @@ static void skip(struct s5p_jpeg_buffer *buf, long len); static void exynos4_jpeg_parse_decode_h_tbl(struct s5p_jpeg_ctx *ctx) { struct s5p_jpeg *jpeg = ctx->jpeg; - struct vb2_buffer *vb = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx); + struct vb2_v4l2_buffer *vb = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx); struct s5p_jpeg_buffer jpeg_buffer; unsigned int word; int c, x, components; jpeg_buffer.size = 2; /* Ls */ jpeg_buffer.data = - (unsigned long)vb2_plane_vaddr(vb, 0) + ctx->out_q.sos + 2; + (unsigned long)vb2_plane_vaddr(&vb->vb2_buf, 0) + ctx->out_q.sos + 2; jpeg_buffer.curr = 0; word = 0; @@ -825,14 +825,14 @@ static void exynos4_jpeg_parse_decode_h_tbl(struct s5p_jpeg_ctx *ctx) static void exynos4_jpeg_parse_huff_tbl(struct s5p_jpeg_ctx *ctx) { struct s5p_jpeg *jpeg = ctx->jpeg; - struct vb2_buffer *vb = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx); + struct vb2_v4l2_buffer *vb = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx); struct s5p_jpeg_buffer jpeg_buffer; unsigned int word; int c, i, n, j; for (j = 0; j < ctx->out_q.dht.n; ++j) { jpeg_buffer.size = ctx->out_q.dht.len[j]; - jpeg_buffer.data = (unsigned long)vb2_plane_vaddr(vb, 0) + + jpeg_buffer.data = (unsigned long)vb2_plane_vaddr(&vb->vb2_buf, 0) + ctx->out_q.dht.marker[j]; jpeg_buffer.curr = 0; @@ -884,13 +884,13 @@ static void exynos4_jpeg_parse_huff_tbl(struct s5p_jpeg_ctx *ctx) static void exynos4_jpeg_parse_decode_q_tbl(struct s5p_jpeg_ctx *ctx) { struct s5p_jpeg *jpeg = ctx->jpeg; - struct vb2_buffer *vb = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx); + struct vb2_v4l2_buffer *vb = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx); struct s5p_jpeg_buffer jpeg_buffer; int c, x, components; jpeg_buffer.size = ctx->out_q.sof_len; jpeg_buffer.data = - (unsigned long)vb2_plane_vaddr(vb, 0) + ctx->out_q.sof; + (unsigned long)vb2_plane_vaddr(&vb->vb2_buf, 0) + ctx->out_q.sof; jpeg_buffer.curr = 0; skip(&jpeg_buffer, 5); /* P, Y, X */ @@ -915,14 +915,14 @@ static void exynos4_jpeg_parse_decode_q_tbl(struct s5p_jpeg_ctx *ctx) static void exynos4_jpeg_parse_q_tbl(struct s5p_jpeg_ctx *ctx) { struct s5p_jpeg *jpeg = ctx->jpeg; - struct vb2_buffer *vb = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx); + struct vb2_v4l2_buffer *vb = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx); struct s5p_jpeg_buffer jpeg_buffer; unsigned int word; int c, i, j; for (j = 0; j < ctx->out_q.dqt.n; ++j) { jpeg_buffer.size = ctx->out_q.dqt.len[j]; - jpeg_buffer.data = (unsigned long)vb2_plane_vaddr(vb, 0) + + jpeg_buffer.data = (unsigned long)vb2_plane_vaddr(&vb->vb2_buf, 0) + ctx->out_q.dqt.marker[j]; jpeg_buffer.curr = 0; @@ -1262,13 +1262,16 @@ static int s5p_jpeg_querycap(struct file *file, void *priv, return 0; } -static int enum_fmt(struct s5p_jpeg_fmt *sjpeg_formats, int n, +static int enum_fmt(struct s5p_jpeg_ctx *ctx, + struct s5p_jpeg_fmt *sjpeg_formats, int n, struct v4l2_fmtdesc *f, u32 type) { int i, num = 0; + unsigned int fmt_ver_flag = ctx->jpeg->variant->fmt_ver_flag; for (i = 0; i < n; ++i) { - if (sjpeg_formats[i].flags & type) { + if (sjpeg_formats[i].flags & type && + sjpeg_formats[i].flags & fmt_ver_flag) { /* index-th format of type type found ? */ if (num == f->index) break; @@ -1294,11 +1297,11 @@ static int s5p_jpeg_enum_fmt_vid_cap(struct file *file, void *priv, struct s5p_jpeg_ctx *ctx = fh_to_ctx(priv); if (ctx->mode == S5P_JPEG_ENCODE) - return enum_fmt(sjpeg_formats, SJPEG_NUM_FORMATS, f, + return enum_fmt(ctx, sjpeg_formats, SJPEG_NUM_FORMATS, f, SJPEG_FMT_FLAG_ENC_CAPTURE); - return enum_fmt(sjpeg_formats, SJPEG_NUM_FORMATS, f, - SJPEG_FMT_FLAG_DEC_CAPTURE); + return enum_fmt(ctx, sjpeg_formats, SJPEG_NUM_FORMATS, f, + SJPEG_FMT_FLAG_DEC_CAPTURE); } static int s5p_jpeg_enum_fmt_vid_out(struct file *file, void *priv, @@ -1307,11 +1310,11 @@ static int s5p_jpeg_enum_fmt_vid_out(struct file *file, void *priv, struct s5p_jpeg_ctx *ctx = fh_to_ctx(priv); if (ctx->mode == S5P_JPEG_ENCODE) - return enum_fmt(sjpeg_formats, SJPEG_NUM_FORMATS, f, + return enum_fmt(ctx, sjpeg_formats, SJPEG_NUM_FORMATS, f, SJPEG_FMT_FLAG_ENC_OUTPUT); - return enum_fmt(sjpeg_formats, SJPEG_NUM_FORMATS, f, - SJPEG_FMT_FLAG_DEC_OUTPUT); + return enum_fmt(ctx, sjpeg_formats, SJPEG_NUM_FORMATS, f, + SJPEG_FMT_FLAG_DEC_OUTPUT); } static struct s5p_jpeg_q_data *get_q_data(struct s5p_jpeg_ctx *ctx, @@ -2016,15 +2019,15 @@ static void s5p_jpeg_device_run(void *priv) { struct s5p_jpeg_ctx *ctx = priv; struct s5p_jpeg *jpeg = ctx->jpeg; - struct vb2_buffer *src_buf, *dst_buf; + struct vb2_v4l2_buffer *src_buf, *dst_buf; unsigned long src_addr, dst_addr, flags; spin_lock_irqsave(&ctx->jpeg->slock, flags); src_buf = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx); dst_buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx); - src_addr = vb2_dma_contig_plane_dma_addr(src_buf, 0); - dst_addr = vb2_dma_contig_plane_dma_addr(dst_buf, 0); + src_addr = vb2_dma_contig_plane_dma_addr(&src_buf->vb2_buf, 0); + dst_addr = vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0); s5p_jpeg_reset(jpeg->regs); s5p_jpeg_poweron(jpeg->regs); @@ -2097,7 +2100,7 @@ static void exynos4_jpeg_set_img_addr(struct s5p_jpeg_ctx *ctx) { struct s5p_jpeg *jpeg = ctx->jpeg; struct s5p_jpeg_fmt *fmt; - struct vb2_buffer *vb; + struct vb2_v4l2_buffer *vb; struct s5p_jpeg_addr jpeg_addr = {}; u32 pix_size, padding_bytes = 0; @@ -2116,7 +2119,7 @@ static void exynos4_jpeg_set_img_addr(struct s5p_jpeg_ctx *ctx) vb = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx); } - jpeg_addr.y = vb2_dma_contig_plane_dma_addr(vb, 0); + jpeg_addr.y = vb2_dma_contig_plane_dma_addr(&vb->vb2_buf, 0); if (fmt->colplanes == 2) { jpeg_addr.cb = jpeg_addr.y + pix_size - padding_bytes; @@ -2134,7 +2137,7 @@ static void exynos4_jpeg_set_img_addr(struct s5p_jpeg_ctx *ctx) static void exynos4_jpeg_set_jpeg_addr(struct s5p_jpeg_ctx *ctx) { struct s5p_jpeg *jpeg = ctx->jpeg; - struct vb2_buffer *vb; + struct vb2_v4l2_buffer *vb; unsigned int jpeg_addr = 0; if (ctx->mode == S5P_JPEG_ENCODE) @@ -2142,7 +2145,7 @@ static void exynos4_jpeg_set_jpeg_addr(struct s5p_jpeg_ctx *ctx) else vb = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx); - jpeg_addr = vb2_dma_contig_plane_dma_addr(vb, 0); + jpeg_addr = vb2_dma_contig_plane_dma_addr(&vb->vb2_buf, 0); if (jpeg->variant->version == SJPEG_EXYNOS5433 && ctx->mode == S5P_JPEG_DECODE) jpeg_addr += ctx->out_q.sos; @@ -2257,7 +2260,7 @@ static void exynos3250_jpeg_set_img_addr(struct s5p_jpeg_ctx *ctx) { struct s5p_jpeg *jpeg = ctx->jpeg; struct s5p_jpeg_fmt *fmt; - struct vb2_buffer *vb; + struct vb2_v4l2_buffer *vb; struct s5p_jpeg_addr jpeg_addr = {}; u32 pix_size; @@ -2271,7 +2274,7 @@ static void exynos3250_jpeg_set_img_addr(struct s5p_jpeg_ctx *ctx) fmt = ctx->cap_q.fmt; } - jpeg_addr.y = vb2_dma_contig_plane_dma_addr(vb, 0); + jpeg_addr.y = vb2_dma_contig_plane_dma_addr(&vb->vb2_buf, 0); if (fmt->colplanes == 2) { jpeg_addr.cb = jpeg_addr.y + pix_size; @@ -2289,7 +2292,7 @@ static void exynos3250_jpeg_set_img_addr(struct s5p_jpeg_ctx *ctx) static void exynos3250_jpeg_set_jpeg_addr(struct s5p_jpeg_ctx *ctx) { struct s5p_jpeg *jpeg = ctx->jpeg; - struct vb2_buffer *vb; + struct vb2_v4l2_buffer *vb; unsigned int jpeg_addr = 0; if (ctx->mode == S5P_JPEG_ENCODE) @@ -2297,7 +2300,7 @@ static void exynos3250_jpeg_set_jpeg_addr(struct s5p_jpeg_ctx *ctx) else vb = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx); - jpeg_addr = vb2_dma_contig_plane_dma_addr(vb, 0); + jpeg_addr = vb2_dma_contig_plane_dma_addr(&vb->vb2_buf, 0); exynos3250_jpeg_jpgadr(jpeg->regs, jpeg_addr); } diff --git a/drivers/media/platform/sh_veu.c b/drivers/media/platform/sh_veu.c index d6ab33e7060a..b9f4cdee555e 100644 --- a/drivers/media/platform/sh_veu.c +++ b/drivers/media/platform/sh_veu.c @@ -277,13 +277,13 @@ static void sh_veu_process(struct sh_veu_dev *veu, static void sh_veu_device_run(void *priv) { struct sh_veu_dev *veu = priv; - struct vb2_buffer *src_buf, *dst_buf; + struct vb2_v4l2_buffer *src_buf, *dst_buf; src_buf = v4l2_m2m_next_src_buf(veu->m2m_ctx); dst_buf = v4l2_m2m_next_dst_buf(veu->m2m_ctx); if (src_buf && dst_buf) - sh_veu_process(veu, src_buf, dst_buf); + sh_veu_process(veu, &src_buf->vb2_buf, &dst_buf->vb2_buf); } /* ========== video ioctls ========== */ diff --git a/drivers/media/platform/vivid/vivid-kthread-cap.c b/drivers/media/platform/vivid/vivid-kthread-cap.c index 83cc6d3b4784..81ba454a6d95 100644 --- a/drivers/media/platform/vivid/vivid-kthread-cap.c +++ b/drivers/media/platform/vivid/vivid-kthread-cap.c @@ -863,8 +863,11 @@ int vivid_start_generating_vid_cap(struct vivid_dev *dev, bool *pstreaming) "%s-vid-cap", dev->v4l2_dev.name); if (IS_ERR(dev->kthread_vid_cap)) { + int err = PTR_ERR(dev->kthread_vid_cap); + + dev->kthread_vid_cap = NULL; v4l2_err(&dev->v4l2_dev, "kernel_thread() failed\n"); - return PTR_ERR(dev->kthread_vid_cap); + return err; } *pstreaming = true; vivid_grab_controls(dev, true); diff --git a/drivers/media/platform/vivid/vivid-kthread-out.c b/drivers/media/platform/vivid/vivid-kthread-out.c index c2c46dcdbe95..2c5dbdcb576a 100644 --- a/drivers/media/platform/vivid/vivid-kthread-out.c +++ b/drivers/media/platform/vivid/vivid-kthread-out.c @@ -248,8 +248,11 @@ int vivid_start_generating_vid_out(struct vivid_dev *dev, bool *pstreaming) "%s-vid-out", dev->v4l2_dev.name); if (IS_ERR(dev->kthread_vid_out)) { + int err = PTR_ERR(dev->kthread_vid_out); + + dev->kthread_vid_out = NULL; v4l2_err(&dev->v4l2_dev, "kernel_thread() failed\n"); - return PTR_ERR(dev->kthread_vid_out); + return err; } *pstreaming = true; vivid_grab_controls(dev, true); diff --git a/drivers/media/platform/vivid/vivid-vid-cap.c b/drivers/media/platform/vivid/vivid-vid-cap.c index ef5412311b2f..a84954f1be34 100644 --- a/drivers/media/platform/vivid/vivid-vid-cap.c +++ b/drivers/media/platform/vivid/vivid-vid-cap.c @@ -461,6 +461,8 @@ void vivid_update_format_cap(struct vivid_dev *dev, bool keep_controls) tpg_s_rgb_range(&dev->tpg, v4l2_ctrl_g_ctrl(dev->rgb_range_cap)); break; } + vfree(dev->bitmap_cap); + dev->bitmap_cap = NULL; vivid_update_quality(dev); tpg_reset_source(&dev->tpg, dev->src_rect.width, dev->src_rect.height, dev->field_cap); dev->crop_cap = dev->src_rect; diff --git a/drivers/media/platform/vivid/vivid-vid-common.c b/drivers/media/platform/vivid/vivid-vid-common.c index 1678b730dba2..2e82f520a869 100644 --- a/drivers/media/platform/vivid/vivid-vid-common.c +++ b/drivers/media/platform/vivid/vivid-vid-common.c @@ -33,7 +33,7 @@ const struct v4l2_dv_timings_cap vivid_dv_timings_cap = { .type = V4L2_DV_BT_656_1120, /* keep this initialization for compatibility with GCC < 4.4.6 */ .reserved = { 0 }, - V4L2_INIT_BT_TIMINGS(0, MAX_WIDTH, 0, MAX_HEIGHT, 14000000, 775000000, + V4L2_INIT_BT_TIMINGS(16, MAX_WIDTH, 16, MAX_HEIGHT, 14000000, 775000000, V4L2_DV_BT_STD_CEA861 | V4L2_DV_BT_STD_DMT | V4L2_DV_BT_STD_CVT | V4L2_DV_BT_STD_GTF, V4L2_DV_BT_CAP_PROGRESSIVE | V4L2_DV_BT_CAP_INTERLACED) diff --git a/drivers/media/usb/em28xx/em28xx-video.c b/drivers/media/usb/em28xx/em28xx-video.c index 6cfcdcea27e0..873948e429e8 100644 --- a/drivers/media/usb/em28xx/em28xx-video.c +++ b/drivers/media/usb/em28xx/em28xx-video.c @@ -930,6 +930,8 @@ int em28xx_start_analog_streaming(struct vb2_queue *vq, unsigned int count) em28xx_videodbg("%s\n", __func__); + dev->v4l2->field_count = 0; + /* Make sure streaming is not already in progress for this type of filehandle (e.g. video, vbi) */ rc = res_get(dev, vq->type); @@ -1149,8 +1151,6 @@ static void em28xx_ctrl_notify(struct v4l2_ctrl *ctrl, void *priv) { struct em28xx *dev = priv; - dev->v4l2->field_count = 0; - /* * In the case of non-AC97 volume controls, we still need * to do some setups at em28xx, in order to mute/unmute diff --git a/drivers/media/usb/uvc/uvc_ctrl.c b/drivers/media/usb/uvc/uvc_ctrl.c index 618e4e2b4207..fea09a33c6c8 100644 --- a/drivers/media/usb/uvc/uvc_ctrl.c +++ b/drivers/media/usb/uvc/uvc_ctrl.c @@ -1202,7 +1202,7 @@ static void uvc_ctrl_fill_event(struct uvc_video_chain *chain, __uvc_query_v4l2_ctrl(chain, ctrl, mapping, &v4l2_ctrl); - memset(ev->reserved, 0, sizeof(ev->reserved)); + memset(ev, 0, sizeof(*ev)); ev->type = V4L2_EVENT_CTRL; ev->id = v4l2_ctrl.id; ev->u.ctrl.value = value; diff --git a/drivers/media/usb/uvc/uvc_driver.c b/drivers/media/usb/uvc/uvc_driver.c index 435b2ee7a0f4..2257a2b86f6c 100644 --- a/drivers/media/usb/uvc/uvc_driver.c +++ b/drivers/media/usb/uvc/uvc_driver.c @@ -1026,11 +1026,19 @@ static int uvc_parse_standard_control(struct uvc_device *dev, return -EINVAL; } - /* Make sure the terminal type MSB is not null, otherwise it - * could be confused with a unit. + /* + * Reject invalid terminal types that would cause issues: + * + * - The high byte must be non-zero, otherwise it would be + * confused with a unit. + * + * - Bit 15 must be 0, as we use it internally as a terminal + * direction flag. + * + * Other unknown types are accepted. */ type = get_unaligned_le16(&buffer[4]); - if ((type & 0xff00) == 0) { + if ((type & 0x7f00) == 0 || (type & 0x8000) != 0) { uvc_trace(UVC_TRACE_DESCR, "device %d videocontrol " "interface %d INPUT_TERMINAL %d has invalid " "type 0x%04x, skipping\n", udev->devnum, diff --git a/drivers/media/usb/uvc/uvc_video.c b/drivers/media/usb/uvc/uvc_video.c index bf25ff7b2211..3bad54f3c777 100644 --- a/drivers/media/usb/uvc/uvc_video.c +++ b/drivers/media/usb/uvc/uvc_video.c @@ -638,6 +638,14 @@ void uvc_video_clock_update(struct uvc_streaming *stream, if (!uvc_hw_timestamps_param) return; + /* + * We will get called from __vb2_queue_cancel() if there are buffers + * done but not dequeued by the user, but the sample array has already + * been released at that time. Just bail out in that case. + */ + if (!clock->samples) + return; + spin_lock_irqsave(&clock->lock, flags); if (clock->count < clock->size) diff --git a/drivers/media/v4l2-core/v4l2-ctrls.c b/drivers/media/v4l2-core/v4l2-ctrls.c index c30f08e1ad70..8185f53fa79f 100644 --- a/drivers/media/v4l2-core/v4l2-ctrls.c +++ b/drivers/media/v4l2-core/v4l2-ctrls.c @@ -1257,7 +1257,7 @@ static u32 user_flags(const struct v4l2_ctrl *ctrl) static void fill_event(struct v4l2_event *ev, struct v4l2_ctrl *ctrl, u32 changes) { - memset(ev->reserved, 0, sizeof(ev->reserved)); + memset(ev, 0, sizeof(*ev)); ev->type = V4L2_EVENT_CTRL; ev->id = ctrl->id; ev->u.ctrl.changes = changes; diff --git a/drivers/media/v4l2-core/videobuf-dma-sg.c b/drivers/media/v4l2-core/videobuf-dma-sg.c index f669cedca8bd..f74a74d91b9e 100644 --- a/drivers/media/v4l2-core/videobuf-dma-sg.c +++ b/drivers/media/v4l2-core/videobuf-dma-sg.c @@ -156,6 +156,7 @@ static int videobuf_dma_init_user_locked(struct videobuf_dmabuf *dma, { unsigned long first, last; int err, rw = 0; + unsigned int flags = FOLL_FORCE; dma->direction = direction; switch (dma->direction) { @@ -178,13 +179,15 @@ static int videobuf_dma_init_user_locked(struct videobuf_dmabuf *dma, if (NULL == dma->pages) return -ENOMEM; + if (rw == READ) + flags |= FOLL_WRITE; + dprintk(1, "init user [0x%lx+0x%lx => %d pages]\n", data, size, dma->nr_pages); err = get_user_pages(current, current->mm, data & PAGE_MASK, dma->nr_pages, - rw == READ, 1, /* force */ - dma->pages, NULL); + flags, dma->pages, NULL); if (err != dma->nr_pages) { dma->nr_pages = (err >= 0) ? err : 0; diff --git a/drivers/media/v4l2-core/videobuf2-core.c b/drivers/media/v4l2-core/videobuf2-core.c index f42d718fa0ef..dac7b6f0a705 100644 --- a/drivers/media/v4l2-core/videobuf2-core.c +++ b/drivers/media/v4l2-core/videobuf2-core.c @@ -1970,9 +1970,13 @@ int vb2_mmap(struct vb2_queue *q, struct vm_area_struct *vma) return -EINVAL; } } + + mutex_lock(&q->mmap_lock); + if (vb2_fileio_is_active(q)) { dprintk(1, "mmap: file io in progress\n"); - return -EBUSY; + ret = -EBUSY; + goto unlock; } /* @@ -1980,7 +1984,7 @@ int vb2_mmap(struct vb2_queue *q, struct vm_area_struct *vma) */ ret = __find_plane_by_offset(q, off, &buffer, &plane); if (ret) - return ret; + goto unlock; vb = q->bufs[buffer]; @@ -1993,11 +1997,13 @@ int vb2_mmap(struct vb2_queue *q, struct vm_area_struct *vma) if (length < (vma->vm_end - vma->vm_start)) { dprintk(1, "MMAP invalid, as it would overflow buffer length\n"); - return -EINVAL; + ret = -EINVAL; + goto unlock; } - mutex_lock(&q->mmap_lock); ret = call_memop(vb, mmap, vb->planes[plane].mem_priv, vma); + +unlock: mutex_unlock(&q->mmap_lock); if (ret) return ret; diff --git a/drivers/media/v4l2-core/videobuf2-memops.c b/drivers/media/v4l2-core/videobuf2-memops.c index 3c3b517f1d1c..1cd322e939c7 100644 --- a/drivers/media/v4l2-core/videobuf2-memops.c +++ b/drivers/media/v4l2-core/videobuf2-memops.c @@ -42,6 +42,10 @@ struct frame_vector *vb2_create_framevec(unsigned long start, unsigned long first, last; unsigned long nr; struct frame_vector *vec; + unsigned int flags = FOLL_FORCE; + + if (write) + flags |= FOLL_WRITE; first = start >> PAGE_SHIFT; last = (start + length - 1) >> PAGE_SHIFT; @@ -49,7 +53,7 @@ struct frame_vector *vb2_create_framevec(unsigned long start, vec = frame_vector_create(nr); if (!vec) return ERR_PTR(-ENOMEM); - ret = get_vaddr_frames(start & PAGE_MASK, nr, write, true, vec); + ret = get_vaddr_frames(start & PAGE_MASK, nr, flags, vec); if (ret < 0) goto out_destroy; /* We accept only complete set of PFNs */ diff --git a/drivers/media/v4l2-core/videobuf2-v4l2.c b/drivers/media/v4l2-core/videobuf2-v4l2.c index 7812c8a63c9b..3370c4d65b25 100644 --- a/drivers/media/v4l2-core/videobuf2-v4l2.c +++ b/drivers/media/v4l2-core/videobuf2-v4l2.c @@ -144,7 +144,6 @@ static void vb2_warn_zero_bytesused(struct vb2_buffer *vb) return; check_once = true; - WARN_ON(1); pr_warn("use of bytesused == 0 is deprecated and will be removed in the future,\n"); if (vb->vb2_queue->allow_zero_bytesused) diff --git a/drivers/memstick/core/memstick.c b/drivers/memstick/core/memstick.c index a0547dbf9806..4d673a626db4 100644 --- a/drivers/memstick/core/memstick.c +++ b/drivers/memstick/core/memstick.c @@ -18,6 +18,7 @@ #include #include #include +#include #define DRIVER_NAME "memstick" @@ -436,6 +437,7 @@ static void memstick_check(struct work_struct *work) struct memstick_dev *card; dev_dbg(&host->dev, "memstick_check started\n"); + pm_runtime_get_noresume(host->dev.parent); mutex_lock(&host->lock); if (!host->card) { if (memstick_power_on(host)) @@ -479,6 +481,7 @@ out_power_off: host->set_param(host, MEMSTICK_POWER, MEMSTICK_POWER_OFF); mutex_unlock(&host->lock); + pm_runtime_put(host->dev.parent); dev_dbg(&host->dev, "memstick_check finished\n"); } diff --git a/drivers/mfd/ab8500-core.c b/drivers/mfd/ab8500-core.c index fefbe4cfa61d..1263cfd8b4d2 100644 --- a/drivers/mfd/ab8500-core.c +++ b/drivers/mfd/ab8500-core.c @@ -259,7 +259,7 @@ static int get_register_interruptible(struct ab8500 *ab8500, u8 bank, mutex_unlock(&ab8500->lock); dev_vdbg(ab8500->dev, "rd: addr %#x => data %#x\n", addr, ret); - return ret; + return (ret < 0) ? ret : 0; } static int ab8500_get_register(struct device *dev, u8 bank, diff --git a/drivers/mfd/as3722.c b/drivers/mfd/as3722.c index 924ea90494ae..e1f597f97f86 100644 --- a/drivers/mfd/as3722.c +++ b/drivers/mfd/as3722.c @@ -405,6 +405,8 @@ static int as3722_i2c_probe(struct i2c_client *i2c, goto scrub; } + device_init_wakeup(as3722->dev, true); + dev_dbg(as3722->dev, "AS3722 core driver initialized successfully\n"); return 0; @@ -422,6 +424,29 @@ static int as3722_i2c_remove(struct i2c_client *i2c) return 0; } +static int __maybe_unused as3722_i2c_suspend(struct device *dev) +{ + struct as3722 *as3722 = dev_get_drvdata(dev); + + if (device_may_wakeup(dev)) + enable_irq_wake(as3722->chip_irq); + disable_irq(as3722->chip_irq); + + return 0; +} + +static int __maybe_unused as3722_i2c_resume(struct device *dev) +{ + struct as3722 *as3722 = dev_get_drvdata(dev); + + enable_irq(as3722->chip_irq); + + if (device_may_wakeup(dev)) + disable_irq_wake(as3722->chip_irq); + + return 0; +} + static const struct of_device_id as3722_of_match[] = { { .compatible = "ams,as3722", }, {}, @@ -434,10 +459,15 @@ static const struct i2c_device_id as3722_i2c_id[] = { }; MODULE_DEVICE_TABLE(i2c, as3722_i2c_id); +static const struct dev_pm_ops as3722_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(as3722_i2c_suspend, as3722_i2c_resume) +}; + static struct i2c_driver as3722_i2c_driver = { .driver = { .name = "as3722", .of_match_table = as3722_of_match, + .pm = &as3722_pm_ops, }, .probe = as3722_i2c_probe, .remove = as3722_i2c_remove, diff --git a/drivers/mfd/db8500-prcmu.c b/drivers/mfd/db8500-prcmu.c index 12099b09a9a7..e71b9f23379d 100644 --- a/drivers/mfd/db8500-prcmu.c +++ b/drivers/mfd/db8500-prcmu.c @@ -2610,7 +2610,7 @@ static struct irq_chip prcmu_irq_chip = { .irq_unmask = prcmu_irq_unmask, }; -static __init char *fw_project_name(u32 project) +static char *fw_project_name(u32 project) { switch (project) { case PRCMU_FW_PROJECT_U8500: @@ -2758,7 +2758,7 @@ void __init db8500_prcmu_early_init(u32 phy_base, u32 size) INIT_WORK(&mb0_transfer.mask_work, prcmu_mask_work); } -static void __init init_prcm_registers(void) +static void init_prcm_registers(void) { u32 val; diff --git a/drivers/mfd/mc13xxx-core.c b/drivers/mfd/mc13xxx-core.c index 3f9f4c874d2a..8d74806b83c1 100644 --- a/drivers/mfd/mc13xxx-core.c +++ b/drivers/mfd/mc13xxx-core.c @@ -274,7 +274,9 @@ int mc13xxx_adc_do_conversion(struct mc13xxx *mc13xxx, unsigned int mode, mc13xxx->adcflags |= MC13XXX_ADC_WORKING; - mc13xxx_reg_read(mc13xxx, MC13XXX_ADC0, &old_adc0); + ret = mc13xxx_reg_read(mc13xxx, MC13XXX_ADC0, &old_adc0); + if (ret) + goto out; adc0 = MC13XXX_ADC0_ADINC1 | MC13XXX_ADC0_ADINC2; adc1 = MC13XXX_ADC1_ADEN | MC13XXX_ADC1_ADTRIGIGN | MC13XXX_ADC1_ASC; diff --git a/drivers/mfd/qcom_rpm.c b/drivers/mfd/qcom_rpm.c index a867cc91657e..27486f278201 100644 --- a/drivers/mfd/qcom_rpm.c +++ b/drivers/mfd/qcom_rpm.c @@ -570,6 +570,10 @@ static int qcom_rpm_probe(struct platform_device *pdev) return -EFAULT; } + writel(fw_version[0], RPM_CTRL_REG(rpm, 0)); + writel(fw_version[1], RPM_CTRL_REG(rpm, 1)); + writel(fw_version[2], RPM_CTRL_REG(rpm, 2)); + dev_info(&pdev->dev, "RPM firmware %u.%u.%u\n", fw_version[0], fw_version[1], fw_version[2]); diff --git a/drivers/mfd/ti_am335x_tscadc.c b/drivers/mfd/ti_am335x_tscadc.c index 4a0f076c91ba..faf8ce5be576 100644 --- a/drivers/mfd/ti_am335x_tscadc.c +++ b/drivers/mfd/ti_am335x_tscadc.c @@ -279,8 +279,9 @@ static int ti_tscadc_probe(struct platform_device *pdev) cell->pdata_size = sizeof(tscadc); } - err = mfd_add_devices(&pdev->dev, pdev->id, tscadc->cells, - tscadc->used_cells, NULL, 0, NULL); + err = mfd_add_devices(&pdev->dev, PLATFORM_DEVID_AUTO, + tscadc->cells, tscadc->used_cells, NULL, + 0, NULL); if (err < 0) goto err_disable_clk; diff --git a/drivers/mfd/tps6586x.c b/drivers/mfd/tps6586x.c index 5628a6b5b19b..c5c320efc7b4 100644 --- a/drivers/mfd/tps6586x.c +++ b/drivers/mfd/tps6586x.c @@ -594,6 +594,29 @@ static int tps6586x_i2c_remove(struct i2c_client *client) return 0; } +static int __maybe_unused tps6586x_i2c_suspend(struct device *dev) +{ + struct tps6586x *tps6586x = dev_get_drvdata(dev); + + if (tps6586x->client->irq) + disable_irq(tps6586x->client->irq); + + return 0; +} + +static int __maybe_unused tps6586x_i2c_resume(struct device *dev) +{ + struct tps6586x *tps6586x = dev_get_drvdata(dev); + + if (tps6586x->client->irq) + enable_irq(tps6586x->client->irq); + + return 0; +} + +static SIMPLE_DEV_PM_OPS(tps6586x_pm_ops, tps6586x_i2c_suspend, + tps6586x_i2c_resume); + static const struct i2c_device_id tps6586x_id_table[] = { { "tps6586x", 0 }, { }, @@ -604,6 +627,7 @@ static struct i2c_driver tps6586x_driver = { .driver = { .name = "tps6586x", .of_match_table = of_match_ptr(tps6586x_of_match), + .pm = &tps6586x_pm_ops, }, .probe = tps6586x_i2c_probe, .remove = tps6586x_i2c_remove, diff --git a/drivers/mfd/twl-core.c b/drivers/mfd/twl-core.c index 831696ee2472..90732a655d57 100644 --- a/drivers/mfd/twl-core.c +++ b/drivers/mfd/twl-core.c @@ -982,7 +982,7 @@ add_children(struct twl4030_platform_data *pdata, unsigned irq_base, * letting it generate the right frequencies for USB, MADC, and * other purposes. */ -static inline int __init protect_pm_master(void) +static inline int protect_pm_master(void) { int e = 0; @@ -991,7 +991,7 @@ static inline int __init protect_pm_master(void) return e; } -static inline int __init unprotect_pm_master(void) +static inline int unprotect_pm_master(void) { int e = 0; diff --git a/drivers/mfd/wm5110-tables.c b/drivers/mfd/wm5110-tables.c index 2bb2d0467a92..c47efe6dcb01 100644 --- a/drivers/mfd/wm5110-tables.c +++ b/drivers/mfd/wm5110-tables.c @@ -1622,6 +1622,7 @@ static const struct reg_default wm5110_reg_default[] = { { 0x00000ECD, 0x0000 }, /* R3789 - HPLPF4_2 */ { 0x00000EE0, 0x0000 }, /* R3808 - ASRC_ENABLE */ { 0x00000EE2, 0x0000 }, /* R3810 - ASRC_RATE1 */ + { 0x00000EE3, 0x4000 }, /* R3811 - ASRC_RATE2 */ { 0x00000EF0, 0x0000 }, /* R3824 - ISRC 1 CTRL 1 */ { 0x00000EF1, 0x0000 }, /* R3825 - ISRC 1 CTRL 2 */ { 0x00000EF2, 0x0000 }, /* R3826 - ISRC 1 CTRL 3 */ @@ -2877,6 +2878,7 @@ static bool wm5110_readable_register(struct device *dev, unsigned int reg) case ARIZONA_ASRC_ENABLE: case ARIZONA_ASRC_STATUS: case ARIZONA_ASRC_RATE1: + case ARIZONA_ASRC_RATE2: case ARIZONA_ISRC_1_CTRL_1: case ARIZONA_ISRC_1_CTRL_2: case ARIZONA_ISRC_1_CTRL_3: diff --git a/drivers/misc/genwqe/card_utils.c b/drivers/misc/genwqe/card_utils.c index 524660510599..0c15ba21fa54 100644 --- a/drivers/misc/genwqe/card_utils.c +++ b/drivers/misc/genwqe/card_utils.c @@ -217,7 +217,7 @@ u32 genwqe_crc32(u8 *buff, size_t len, u32 init) void *__genwqe_alloc_consistent(struct genwqe_dev *cd, size_t size, dma_addr_t *dma_handle) { - if (get_order(size) > MAX_ORDER) + if (get_order(size) >= MAX_ORDER) return NULL; return dma_alloc_coherent(&cd->pci_dev->dev, size, dma_handle, diff --git a/drivers/misc/mic/card/mic_virtio.c b/drivers/misc/mic/card/mic_virtio.c index e486a0c26267..f6ed57d3125c 100644 --- a/drivers/misc/mic/card/mic_virtio.c +++ b/drivers/misc/mic/card/mic_virtio.c @@ -311,7 +311,7 @@ unmap: static int mic_find_vqs(struct virtio_device *vdev, unsigned nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], - const char *names[]) + const char * const names[]) { struct mic_vdev *mvdev = to_micvdev(vdev); struct mic_device_ctrl __iomem *dc = mvdev->dc; diff --git a/drivers/misc/mic/scif/scif_rma.c b/drivers/misc/mic/scif/scif_rma.c index 8bd63128d536..71c69e1c4ac0 100644 --- a/drivers/misc/mic/scif/scif_rma.c +++ b/drivers/misc/mic/scif/scif_rma.c @@ -1398,8 +1398,7 @@ retry: mm, (u64)addr, nr_pages, - !!(prot & SCIF_PROT_WRITE), - 0, + (prot & SCIF_PROT_WRITE) ? FOLL_WRITE : 0, pinned_pages->pages, NULL); up_write(&mm->mmap_sem); diff --git a/drivers/misc/sgi-gru/grufault.c b/drivers/misc/sgi-gru/grufault.c index f74fc0ca2ef9..e6b723c6a2af 100644 --- a/drivers/misc/sgi-gru/grufault.c +++ b/drivers/misc/sgi-gru/grufault.c @@ -199,7 +199,7 @@ static int non_atomic_pte_lookup(struct vm_area_struct *vma, *pageshift = PAGE_SHIFT; #endif if (get_user_pages - (current, current->mm, vaddr, 1, write, 0, &page, NULL) <= 0) + (current, current->mm, vaddr, 1, write ? FOLL_WRITE : 0, &page, NULL) <= 0) return -EFAULT; *paddr = page_to_phys(page); put_page(page); diff --git a/drivers/misc/uid_sys_stats.c b/drivers/misc/uid_sys_stats.c index cda32366cc7e..99230369f3ed 100644 --- a/drivers/misc/uid_sys_stats.c +++ b/drivers/misc/uid_sys_stats.c @@ -129,7 +129,7 @@ static void get_full_task_comm(struct task_entry *task_entry, struct mm_struct *mm = task->mm; /* fill the first TASK_COMM_LEN bytes with thread name */ - get_task_comm(task_entry->comm, task); + __get_task_comm(task_entry->comm, TASK_COMM_LEN, task); i = strlen(task_entry->comm); while (i < TASK_COMM_LEN) task_entry->comm[i++] = ' '; diff --git a/drivers/misc/vexpress-syscfg.c b/drivers/misc/vexpress-syscfg.c index c344483fa7d6..9f257c53e6d4 100644 --- a/drivers/misc/vexpress-syscfg.c +++ b/drivers/misc/vexpress-syscfg.c @@ -61,7 +61,7 @@ static int vexpress_syscfg_exec(struct vexpress_syscfg_func *func, int tries; long timeout; - if (WARN_ON(index > func->num_templates)) + if (WARN_ON(index >= func->num_templates)) return -EINVAL; command = readl(syscfg->base + SYS_CFGCTRL); diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index f82b1eb64c6b..e9578618f44a 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -1066,7 +1066,7 @@ static inline void mmc_set_ios(struct mmc_host *host) "width %u timing %u\n", mmc_hostname(host), ios->clock, ios->bus_mode, ios->power_mode, ios->chip_select, ios->vdd, - ios->bus_width, ios->timing); + 1 << ios->bus_width, ios->timing); host->ops->set_ios(host, ios); } @@ -1252,8 +1252,12 @@ int mmc_of_parse_voltage(struct device_node *np, u32 *mask) voltage_ranges = of_get_property(np, "voltage-ranges", &num_ranges); num_ranges = num_ranges / sizeof(*voltage_ranges) / 2; - if (!voltage_ranges || !num_ranges) { - pr_info("%s: voltage-ranges unspecified\n", np->full_name); + if (!voltage_ranges) { + pr_debug("%s: voltage-ranges unspecified\n", np->full_name); + return -EINVAL; + } + if (!num_ranges) { + pr_err("%s: voltage-ranges empty\n", np->full_name); return -EINVAL; } diff --git a/drivers/mmc/core/debugfs.c b/drivers/mmc/core/debugfs.c index 164da9e6b0e8..217ae187b36a 100644 --- a/drivers/mmc/core/debugfs.c +++ b/drivers/mmc/core/debugfs.c @@ -221,7 +221,7 @@ static int mmc_clock_opt_set(void *data, u64 val) struct mmc_host *host = data; /* We need this check due to input value is u64 */ - if (val > host->f_max) + if (val != 0 && (val > host->f_max || val < host->f_min)) return -EINVAL; mmc_claim_host(host); diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index a814eb6882aa..e423b7f1505e 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -514,7 +514,7 @@ static int mmc_decode_ext_csd(struct mmc_card *card, u8 *ext_csd) card->ext_csd.raw_bkops_status = ext_csd[EXT_CSD_BKOPS_STATUS]; if (!card->ext_csd.man_bkops_en) - pr_info("%s: MAN_BKOPS_EN bit is not set\n", + pr_debug("%s: MAN_BKOPS_EN bit is not set\n", mmc_hostname(card->host)); } @@ -1702,9 +1702,11 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr, if (err) { pr_warn("%s: Enabling HPI failed\n", mmc_hostname(card->host)); + card->ext_csd.hpi_en = 0; err = 0; - } else + } else { card->ext_csd.hpi_en = 1; + } } /* diff --git a/drivers/mmc/core/pwrseq_simple.c b/drivers/mmc/core/pwrseq_simple.c index d10538bb5e07..96f45caea109 100644 --- a/drivers/mmc/core/pwrseq_simple.c +++ b/drivers/mmc/core/pwrseq_simple.c @@ -29,15 +29,18 @@ struct mmc_pwrseq_simple { static void mmc_pwrseq_simple_set_gpios_value(struct mmc_pwrseq_simple *pwrseq, int value) { - int i; struct gpio_descs *reset_gpios = pwrseq->reset_gpios; - int values[reset_gpios->ndescs]; - for (i = 0; i < reset_gpios->ndescs; i++) - values[i] = value; + if (!IS_ERR(reset_gpios)) { + int i; + int values[reset_gpios->ndescs]; - gpiod_set_array_value_cansleep(reset_gpios->ndescs, reset_gpios->desc, - values); + for (i = 0; i < reset_gpios->ndescs; i++) + values[i] = value; + + gpiod_set_array_value_cansleep( + reset_gpios->ndescs, reset_gpios->desc, values); + } } static void mmc_pwrseq_simple_pre_power_on(struct mmc_host *host) @@ -79,7 +82,8 @@ static void mmc_pwrseq_simple_free(struct mmc_host *host) struct mmc_pwrseq_simple *pwrseq = container_of(host->pwrseq, struct mmc_pwrseq_simple, pwrseq); - gpiod_put_array(pwrseq->reset_gpios); + if (!IS_ERR(pwrseq->reset_gpios)) + gpiod_put_array(pwrseq->reset_gpios); if (!IS_ERR(pwrseq->ext_clk)) clk_put(pwrseq->ext_clk); @@ -112,7 +116,9 @@ struct mmc_pwrseq *mmc_pwrseq_simple_alloc(struct mmc_host *host, } pwrseq->reset_gpios = gpiod_get_array(dev, "reset", GPIOD_OUT_HIGH); - if (IS_ERR(pwrseq->reset_gpios)) { + if (IS_ERR(pwrseq->reset_gpios) && + PTR_ERR(pwrseq->reset_gpios) != -ENOENT && + PTR_ERR(pwrseq->reset_gpios) != -ENOSYS) { ret = PTR_ERR(pwrseq->reset_gpios); goto clk_put; } diff --git a/drivers/mmc/host/atmel-mci.c b/drivers/mmc/host/atmel-mci.c index bf62e429f7fc..98be9eb3184b 100644 --- a/drivers/mmc/host/atmel-mci.c +++ b/drivers/mmc/host/atmel-mci.c @@ -1840,13 +1840,14 @@ static void atmci_tasklet_func(unsigned long priv) } atmci_request_end(host, host->mrq); - state = STATE_IDLE; + goto unlock; /* atmci_request_end() sets host->state */ break; } } while (state != prev_state); host->state = state; +unlock: spin_unlock(&host->lock); } diff --git a/drivers/mmc/host/davinci_mmc.c b/drivers/mmc/host/davinci_mmc.c index ea2a2ebc6b91..dba7565571a5 100644 --- a/drivers/mmc/host/davinci_mmc.c +++ b/drivers/mmc/host/davinci_mmc.c @@ -1147,7 +1147,7 @@ static inline void mmc_davinci_cpufreq_deregister(struct mmc_davinci_host *host) { } #endif -static void __init init_mmcsd_host(struct mmc_davinci_host *host) +static void init_mmcsd_host(struct mmc_davinci_host *host) { mmc_davinci_reset_ctrl(host, 1); diff --git a/drivers/mmc/host/mmc_spi.c b/drivers/mmc/host/mmc_spi.c index aad3243a48fc..e03ec74f3fb0 100644 --- a/drivers/mmc/host/mmc_spi.c +++ b/drivers/mmc/host/mmc_spi.c @@ -1451,6 +1451,7 @@ static int mmc_spi_probe(struct spi_device *spi) if (status != 0) goto fail_add_host; } + mmc_detect_change(mmc, 0); dev_info(&spi->dev, "SD/MMC host %s%s%s%s%s\n", dev_name(&mmc->class_dev), diff --git a/drivers/mmc/host/omap.c b/drivers/mmc/host/omap.c index b9958a123594..20d422558fa3 100644 --- a/drivers/mmc/host/omap.c +++ b/drivers/mmc/host/omap.c @@ -105,6 +105,7 @@ struct mmc_omap_slot { unsigned int vdd; u16 saved_con; u16 bus_mode; + u16 power_mode; unsigned int fclk_freq; struct tasklet_struct cover_tasklet; @@ -920,7 +921,7 @@ static inline void set_cmd_timeout(struct mmc_omap_host *host, struct mmc_reques reg &= ~(1 << 5); OMAP_MMC_WRITE(host, SDIO, reg); /* Set maximum timeout */ - OMAP_MMC_WRITE(host, CTO, 0xff); + OMAP_MMC_WRITE(host, CTO, 0xfd); } static inline void set_data_timeout(struct mmc_omap_host *host, struct mmc_request *req) @@ -1156,7 +1157,7 @@ static void mmc_omap_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) struct mmc_omap_slot *slot = mmc_priv(mmc); struct mmc_omap_host *host = slot->host; int i, dsor; - int clk_enabled; + int clk_enabled, init_stream; mmc_omap_select_slot(slot, 0); @@ -1166,6 +1167,7 @@ static void mmc_omap_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) slot->vdd = ios->vdd; clk_enabled = 0; + init_stream = 0; switch (ios->power_mode) { case MMC_POWER_OFF: mmc_omap_set_power(slot, 0, ios->vdd); @@ -1173,13 +1175,17 @@ static void mmc_omap_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) case MMC_POWER_UP: /* Cannot touch dsor yet, just power up MMC */ mmc_omap_set_power(slot, 1, ios->vdd); + slot->power_mode = ios->power_mode; goto exit; case MMC_POWER_ON: mmc_omap_fclk_enable(host, 1); clk_enabled = 1; dsor |= 1 << 11; + if (slot->power_mode != MMC_POWER_ON) + init_stream = 1; break; } + slot->power_mode = ios->power_mode; if (slot->bus_mode != ios->bus_mode) { if (slot->pdata->set_bus_mode != NULL) @@ -1195,7 +1201,7 @@ static void mmc_omap_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) for (i = 0; i < 2; i++) OMAP_MMC_WRITE(host, CON, dsor); slot->saved_con = dsor; - if (ios->power_mode == MMC_POWER_ON) { + if (init_stream) { /* worst case at 400kHz, 80 cycles makes 200 microsecs */ int usecs = 250; @@ -1233,6 +1239,7 @@ static int mmc_omap_new_slot(struct mmc_omap_host *host, int id) slot->host = host; slot->mmc = mmc; slot->id = id; + slot->power_mode = MMC_POWER_UNDEFINED; slot->pdata = &host->pdata->slots[id]; host->slots[id] = slot; diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c index 6b814d7d6560..af937d3e8c3e 100644 --- a/drivers/mmc/host/omap_hsmmc.c +++ b/drivers/mmc/host/omap_hsmmc.c @@ -2117,7 +2117,6 @@ static int omap_hsmmc_probe(struct platform_device *pdev) mmc->max_blk_size = 512; /* Block Length at max can be 1024 */ mmc->max_blk_count = 0xFFFF; /* No. of Blocks is 16 bits */ mmc->max_req_size = mmc->max_blk_size * mmc->max_blk_count; - mmc->max_seg_size = mmc->max_req_size; mmc->caps |= MMC_CAP_MMC_HIGHSPEED | MMC_CAP_SD_HIGHSPEED | MMC_CAP_WAIT_WHILE_BUSY | MMC_CAP_ERASE; @@ -2174,6 +2173,17 @@ static int omap_hsmmc_probe(struct platform_device *pdev) goto err_irq; } + /* + * Limit the maximum segment size to the lower of the request size + * and the DMA engine device segment size limits. In reality, with + * 32-bit transfers, the DMA engine can do longer segments than this + * but there is no way to represent that in the DMA model - if we + * increase this figure here, we get warnings from the DMA API debug. + */ + mmc->max_seg_size = min3(mmc->max_req_size, + dma_get_max_seg_size(host->rx_chan->device->dev), + dma_get_max_seg_size(host->tx_chan->device->dev)); + /* Request IRQ for MMC operations */ ret = devm_request_irq(&pdev->dev, host->irq, omap_hsmmc_irq, 0, mmc_hostname(mmc), host); diff --git a/drivers/mmc/host/pxamci.c b/drivers/mmc/host/pxamci.c index 72bbb12fb938..1d57c12b191c 100644 --- a/drivers/mmc/host/pxamci.c +++ b/drivers/mmc/host/pxamci.c @@ -181,7 +181,7 @@ static void pxamci_dma_irq(void *param); static void pxamci_setup_data(struct pxamci_host *host, struct mmc_data *data) { struct dma_async_tx_descriptor *tx; - enum dma_data_direction direction; + enum dma_transfer_direction direction; struct dma_slave_config config; struct dma_chan *chan; unsigned int nob = data->blocks; diff --git a/drivers/mmc/host/sdhci-iproc.c b/drivers/mmc/host/sdhci-iproc.c index ffd448149796..4a2ae06d0da4 100644 --- a/drivers/mmc/host/sdhci-iproc.c +++ b/drivers/mmc/host/sdhci-iproc.c @@ -217,7 +217,10 @@ static int sdhci_iproc_probe(struct platform_device *pdev) iproc_host->data = iproc_data; - mmc_of_parse(host->mmc); + ret = mmc_of_parse(host->mmc); + if (ret) + goto err; + sdhci_get_of_property(pdev); /* Enable EMMC 1/8V DDR capable */ diff --git a/drivers/mmc/host/tmio_mmc_pio.c b/drivers/mmc/host/tmio_mmc_pio.c index a10fde40b6c3..3c7c3a1c8f4f 100644 --- a/drivers/mmc/host/tmio_mmc_pio.c +++ b/drivers/mmc/host/tmio_mmc_pio.c @@ -716,7 +716,7 @@ irqreturn_t tmio_mmc_sdio_irq(int irq, void *devid) unsigned int sdio_status; if (!(pdata->flags & TMIO_MMC_SDIO_IRQ)) - return IRQ_HANDLED; + return IRQ_NONE; status = sd_ctrl_read16(host, CTL_SDIO_STATUS); ireg = status & TMIO_SDIO_MASK_ALL & ~host->sdcard_irq_mask; @@ -730,7 +730,7 @@ irqreturn_t tmio_mmc_sdio_irq(int irq, void *devid) if (mmc->caps & MMC_CAP_SDIO_IRQ && ireg & TMIO_SDIO_STAT_IOIRQ) mmc_signal_sdio_irq(mmc); - return IRQ_HANDLED; + return IRQ_RETVAL(ireg); } EXPORT_SYMBOL(tmio_mmc_sdio_irq); @@ -747,9 +747,7 @@ irqreturn_t tmio_mmc_irq(int irq, void *devid) if (__tmio_mmc_sdcard_irq(host, ireg, status)) return IRQ_HANDLED; - tmio_mmc_sdio_irq(irq, devid); - - return IRQ_HANDLED; + return tmio_mmc_sdio_irq(irq, devid); } EXPORT_SYMBOL(tmio_mmc_irq); diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-lib.c b/drivers/mtd/nand/gpmi-nand/gpmi-lib.c index 43fa16b5f510..672c02e32a39 100644 --- a/drivers/mtd/nand/gpmi-nand/gpmi-lib.c +++ b/drivers/mtd/nand/gpmi-nand/gpmi-lib.c @@ -168,9 +168,10 @@ int gpmi_init(struct gpmi_nand_data *this) /* * Reset BCH here, too. We got failures otherwise :( - * See later BCH reset for explanation of MX23 handling + * See later BCH reset for explanation of MX23 and MX28 handling */ - ret = gpmi_reset_block(r->bch_regs, GPMI_IS_MX23(this)); + ret = gpmi_reset_block(r->bch_regs, + GPMI_IS_MX23(this) || GPMI_IS_MX28(this)); if (ret) goto err_out; @@ -274,13 +275,11 @@ int bch_set_geometry(struct gpmi_nand_data *this) /* * Due to erratum #2847 of the MX23, the BCH cannot be soft reset on this - * chip, otherwise it will lock up. So we skip resetting BCH on the MX23. - * On the other hand, the MX28 needs the reset, because one case has been - * seen where the BCH produced ECC errors constantly after 10000 - * consecutive reboots. The latter case has not been seen on the MX23 - * yet, still we don't know if it could happen there as well. + * chip, otherwise it will lock up. So we skip resetting BCH on the MX23 + * and MX28. */ - ret = gpmi_reset_block(r->bch_regs, GPMI_IS_MX23(this)); + ret = gpmi_reset_block(r->bch_regs, + GPMI_IS_MX23(this) || GPMI_IS_MX28(this)); if (ret) goto err_out; diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index 940e2ebbdea8..399c627b15cc 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -2011,6 +2011,9 @@ void bond_3ad_unbind_slave(struct slave *slave) aggregator->aggregator_identifier); /* Tell the partner that this port is not suitable for aggregation */ + port->actor_oper_port_state &= ~AD_STATE_SYNCHRONIZATION; + port->actor_oper_port_state &= ~AD_STATE_COLLECTING; + port->actor_oper_port_state &= ~AD_STATE_DISTRIBUTING; port->actor_oper_port_state &= ~AD_STATE_AGGREGATION; __update_lacpdu_from_port(port); ad_lacpdu_send(port); diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index a32dcb6718ca..fde7f5efc47d 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -3067,8 +3067,12 @@ static int bond_netdev_event(struct notifier_block *this, return NOTIFY_DONE; if (event_dev->flags & IFF_MASTER) { + int ret; + netdev_dbg(event_dev, "IFF_MASTER\n"); - return bond_master_netdev_event(event, event_dev); + ret = bond_master_netdev_event(event, event_dev); + if (ret != NOTIFY_DONE) + return ret; } if (event_dev->flags & IFF_SLAVE) { diff --git a/drivers/net/caif/Kconfig b/drivers/net/caif/Kconfig index 547098086773..f81df91a9ce1 100644 --- a/drivers/net/caif/Kconfig +++ b/drivers/net/caif/Kconfig @@ -52,5 +52,5 @@ config CAIF_VIRTIO The caif driver for CAIF over Virtio. if CAIF_VIRTIO -source "drivers/vhost/Kconfig" +source "drivers/vhost/Kconfig.vringh" endif diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c index 1dbee1cb3df9..8b7c6425b681 100644 --- a/drivers/net/can/dev.c +++ b/drivers/net/can/dev.c @@ -426,8 +426,6 @@ EXPORT_SYMBOL_GPL(can_put_echo_skb); struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx, u8 *len_ptr) { struct can_priv *priv = netdev_priv(dev); - struct sk_buff *skb = priv->echo_skb[idx]; - struct canfd_frame *cf; if (idx >= priv->echo_skb_max) { netdev_err(dev, "%s: BUG! Trying to access can_priv::echo_skb out of bounds (%u/max %u)\n", @@ -435,20 +433,21 @@ struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx, u8 return NULL; } - if (!skb) { - netdev_err(dev, "%s: BUG! Trying to echo non existing skb: can_priv::echo_skb[%u]\n", - __func__, idx); - return NULL; + if (priv->echo_skb[idx]) { + /* Using "struct canfd_frame::len" for the frame + * length is supported on both CAN and CANFD frames. + */ + struct sk_buff *skb = priv->echo_skb[idx]; + struct canfd_frame *cf = (struct canfd_frame *)skb->data; + u8 len = cf->len; + + *len_ptr = len; + priv->echo_skb[idx] = NULL; + + return skb; } - /* Using "struct canfd_frame::len" for the frame - * length is supported on both CAN and CANFD frames. - */ - cf = (struct canfd_frame *)skb->data; - *len_ptr = cf->len; - priv->echo_skb[idx] = NULL; - - return skb; + return NULL; } /* diff --git a/drivers/net/dsa/mv88e6060.c b/drivers/net/dsa/mv88e6060.c index 0527f485c3dc..973fcd442aea 100644 --- a/drivers/net/dsa/mv88e6060.c +++ b/drivers/net/dsa/mv88e6060.c @@ -98,8 +98,7 @@ static int mv88e6060_switch_reset(struct dsa_switch *ds) /* Reset the switch. */ REG_WRITE(REG_GLOBAL, GLOBAL_ATU_CONTROL, GLOBAL_ATU_CONTROL_SWRESET | - GLOBAL_ATU_CONTROL_ATUSIZE_1024 | - GLOBAL_ATU_CONTROL_ATE_AGE_5MIN); + GLOBAL_ATU_CONTROL_LEARNDIS); /* Wait up to one second for reset to complete. */ timeout = jiffies + 1 * HZ; @@ -124,13 +123,10 @@ static int mv88e6060_setup_global(struct dsa_switch *ds) */ REG_WRITE(REG_GLOBAL, GLOBAL_CONTROL, GLOBAL_CONTROL_MAX_FRAME_1536); - /* Enable automatic address learning, set the address - * database size to 1024 entries, and set the default aging - * time to 5 minutes. + /* Disable automatic address learning. */ REG_WRITE(REG_GLOBAL, GLOBAL_ATU_CONTROL, - GLOBAL_ATU_CONTROL_ATUSIZE_1024 | - GLOBAL_ATU_CONTROL_ATE_AGE_5MIN); + GLOBAL_ATU_CONTROL_LEARNDIS); return 0; } diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c index 2dea39b5cb0b..e2414f2d7ba9 100644 --- a/drivers/net/dsa/mv88e6xxx.c +++ b/drivers/net/dsa/mv88e6xxx.c @@ -712,7 +712,7 @@ static uint64_t _mv88e6xxx_get_ethtool_stat(struct dsa_switch *ds, if (s->sizeof_stat == 8) _mv88e6xxx_stats_read(ds, s->reg + 1, &high); } - value = (((u64)high) << 16) | low; + value = (((u64)high) << 32) | low; return value; } diff --git a/drivers/net/ethernet/8390/mac8390.c b/drivers/net/ethernet/8390/mac8390.c index b9283901136e..0fdc9ad32a2e 100644 --- a/drivers/net/ethernet/8390/mac8390.c +++ b/drivers/net/ethernet/8390/mac8390.c @@ -156,8 +156,6 @@ static void dayna_block_output(struct net_device *dev, int count, #define memcpy_fromio(a, b, c) memcpy((a), (void *)(b), (c)) #define memcpy_toio(a, b, c) memcpy((void *)(a), (b), (c)) -#define memcmp_withio(a, b, c) memcmp((a), (void *)(b), (c)) - /* Slow Sane (16-bit chunk memory read/write) Cabletron uses this */ static void slow_sane_get_8390_hdr(struct net_device *dev, struct e8390_pkt_hdr *hdr, int ring_page); @@ -237,19 +235,26 @@ static enum mac8390_type __init mac8390_ident(struct nubus_dev *dev) static enum mac8390_access __init mac8390_testio(volatile unsigned long membase) { - unsigned long outdata = 0xA5A0B5B0; - unsigned long indata = 0x00000000; + u32 outdata = 0xA5A0B5B0; + u32 indata = 0; + /* Try writing 32 bits */ - memcpy_toio(membase, &outdata, 4); - /* Now compare them */ - if (memcmp_withio(&outdata, membase, 4) == 0) + nubus_writel(outdata, membase); + /* Now read it back */ + indata = nubus_readl(membase); + if (outdata == indata) return ACCESS_32; + + outdata = 0xC5C0D5D0; + indata = 0; + /* Write 16 bit output */ word_memcpy_tocard(membase, &outdata, 4); /* Now read it back */ word_memcpy_fromcard(&indata, membase, 4); if (outdata == indata) return ACCESS_16; + return ACCESS_UNKNOWN; } diff --git a/drivers/net/ethernet/altera/altera_msgdma.c b/drivers/net/ethernet/altera/altera_msgdma.c index 0fb986ba3290..0ae723f75341 100644 --- a/drivers/net/ethernet/altera/altera_msgdma.c +++ b/drivers/net/ethernet/altera/altera_msgdma.c @@ -145,7 +145,8 @@ u32 msgdma_tx_completions(struct altera_tse_private *priv) & 0xffff; if (inuse) { /* Tx FIFO is not empty */ - ready = priv->tx_prod - priv->tx_cons - inuse - 1; + ready = max_t(int, + priv->tx_prod - priv->tx_cons - inuse - 1, 0); } else { /* Check for buffered last packet */ status = csrrd32(priv->tx_dma_csr, msgdma_csroffs(status)); diff --git a/drivers/net/ethernet/altera/altera_tse_main.c b/drivers/net/ethernet/altera/altera_tse_main.c index fe644823ceaf..bb51f124d8c7 100644 --- a/drivers/net/ethernet/altera/altera_tse_main.c +++ b/drivers/net/ethernet/altera/altera_tse_main.c @@ -716,8 +716,10 @@ static struct phy_device *connect_local_phy(struct net_device *dev) phydev = phy_connect(dev, phy_id_fmt, &altera_tse_adjust_link, priv->phy_iface); - if (IS_ERR(phydev)) + if (IS_ERR(phydev)) { netdev_err(dev, "Could not attach to PHY\n"); + phydev = NULL; + } } else { int ret; diff --git a/drivers/net/ethernet/atheros/atlx/atl2.c b/drivers/net/ethernet/atheros/atlx/atl2.c index 2ff465848b65..097a0bf592ab 100644 --- a/drivers/net/ethernet/atheros/atlx/atl2.c +++ b/drivers/net/ethernet/atheros/atlx/atl2.c @@ -1338,13 +1338,11 @@ static int atl2_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { struct net_device *netdev; struct atl2_adapter *adapter; - static int cards_found; + static int cards_found = 0; unsigned long mmio_start; int mmio_len; int err; - cards_found = 0; - err = pci_enable_device(pdev); if (err) return err; diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index 7a6dd5e5e498..53b3c1a5851c 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -126,6 +126,10 @@ static int bcm_sysport_set_rx_csum(struct net_device *dev, priv->rx_chk_en = !!(wanted & NETIF_F_RXCSUM); reg = rxchk_readl(priv, RXCHK_CONTROL); + /* Clear L2 header checks, which would prevent BPDUs + * from being received. + */ + reg &= ~RXCHK_L2_HDR_DIS; if (priv->rx_chk_en) reg |= RXCHK_EN; else @@ -400,7 +404,6 @@ static void bcm_sysport_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol) { struct bcm_sysport_priv *priv = netdev_priv(dev); - u32 reg; wol->supported = WAKE_MAGIC | WAKE_MAGICSECURE; wol->wolopts = priv->wolopts; @@ -408,11 +411,7 @@ static void bcm_sysport_get_wol(struct net_device *dev, if (!(priv->wolopts & WAKE_MAGICSECURE)) return; - /* Return the programmed SecureOn password */ - reg = umac_readl(priv, UMAC_PSW_MS); - put_unaligned_be16(reg, &wol->sopass[0]); - reg = umac_readl(priv, UMAC_PSW_LS); - put_unaligned_be32(reg, &wol->sopass[2]); + memcpy(wol->sopass, priv->sopass, sizeof(priv->sopass)); } static int bcm_sysport_set_wol(struct net_device *dev, @@ -428,13 +427,8 @@ static int bcm_sysport_set_wol(struct net_device *dev, if (wol->wolopts & ~supported) return -EINVAL; - /* Program the SecureOn password */ - if (wol->wolopts & WAKE_MAGICSECURE) { - umac_writel(priv, get_unaligned_be16(&wol->sopass[0]), - UMAC_PSW_MS); - umac_writel(priv, get_unaligned_be32(&wol->sopass[2]), - UMAC_PSW_LS); - } + if (wol->wolopts & WAKE_MAGICSECURE) + memcpy(priv->sopass, wol->sopass, sizeof(priv->sopass)); /* Flag the device and relevant IRQ as wakeup capable */ if (wol->wolopts) { @@ -1889,12 +1883,17 @@ static int bcm_sysport_suspend_to_wol(struct bcm_sysport_priv *priv) unsigned int timeout = 1000; u32 reg; - /* Password has already been programmed */ reg = umac_readl(priv, UMAC_MPD_CTRL); reg |= MPD_EN; reg &= ~PSW_EN; - if (priv->wolopts & WAKE_MAGICSECURE) + if (priv->wolopts & WAKE_MAGICSECURE) { + /* Program the SecureOn password */ + umac_writel(priv, get_unaligned_be16(&priv->sopass[0]), + UMAC_PSW_MS); + umac_writel(priv, get_unaligned_be32(&priv->sopass[2]), + UMAC_PSW_LS); reg |= PSW_EN; + } umac_writel(priv, reg, UMAC_MPD_CTRL); /* Make sure RBUF entered WoL mode as result */ diff --git a/drivers/net/ethernet/broadcom/bcmsysport.h b/drivers/net/ethernet/broadcom/bcmsysport.h index 8ace6ecb5f79..e668b1ce5828 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.h +++ b/drivers/net/ethernet/broadcom/bcmsysport.h @@ -11,6 +11,7 @@ #ifndef __BCM_SYSPORT_H #define __BCM_SYSPORT_H +#include #include /* Receive/transmit descriptor format */ @@ -682,6 +683,7 @@ struct bcm_sysport_priv { unsigned int crc_fwd:1; u16 rev; u32 wolopts; + u8 sopass[SOPASS_MAX]; unsigned int wol_irq_disabled:1; /* MIB related fields */ diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index fea8116da06a..00bd7be85679 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -330,6 +330,12 @@ normal_tx: } length >>= 9; + if (unlikely(length >= ARRAY_SIZE(bnxt_lhint_arr))) { + dev_warn_ratelimited(&pdev->dev, "Dropped oversize %d bytes TX packet.\n", + skb->len); + i = 0; + goto tx_dma_error; + } flags |= bnxt_lhint_arr[length]; txbd->tx_bd_len_flags_type = cpu_to_le32(flags); diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c index 16baaafed26c..cbdeb54eab51 100644 --- a/drivers/net/ethernet/cavium/thunder/nic_main.c +++ b/drivers/net/ethernet/cavium/thunder/nic_main.c @@ -1090,6 +1090,9 @@ static void nic_remove(struct pci_dev *pdev) { struct nicpf *nic = pci_get_drvdata(pdev); + if (!nic) + return; + if (nic->flags & NIC_SRIOV_ENABLED) pci_disable_sriov(pdev); diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index 0433fdebda25..3c20d0dc9256 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -120,7 +120,7 @@ static void enic_init_affinity_hint(struct enic *enic) for (i = 0; i < enic->intr_count; i++) { if (enic_is_err_intr(enic, i) || enic_is_notify_intr(enic, i) || - (enic->msix[i].affinity_mask && + (cpumask_available(enic->msix[i].affinity_mask) && !cpumask_empty(enic->msix[i].affinity_mask))) continue; if (zalloc_cpumask_var(&enic->msix[i].affinity_mask, @@ -149,7 +149,7 @@ static void enic_set_affinity_hint(struct enic *enic) for (i = 0; i < enic->intr_count; i++) { if (enic_is_err_intr(enic, i) || enic_is_notify_intr(enic, i) || - !enic->msix[i].affinity_mask || + !cpumask_available(enic->msix[i].affinity_mask) || cpumask_empty(enic->msix[i].affinity_mask)) continue; err = irq_set_affinity_hint(enic->msix_entry[i].vector, @@ -162,7 +162,7 @@ static void enic_set_affinity_hint(struct enic *enic) for (i = 0; i < enic->wq_count; i++) { int wq_intr = enic_msix_wq_intr(enic, i); - if (enic->msix[wq_intr].affinity_mask && + if (cpumask_available(enic->msix[wq_intr].affinity_mask) && !cpumask_empty(enic->msix[wq_intr].affinity_mask)) netif_set_xps_queue(enic->netdev, enic->msix[wq_intr].affinity_mask, @@ -1180,7 +1180,7 @@ static void enic_rq_indicate_buf(struct vnic_rq *rq, * CHECSUM_UNNECESSARY. */ if ((netdev->features & NETIF_F_RXCSUM) && tcp_udp_csum_ok && - ipv4_csum_ok) + (ipv4_csum_ok || ipv6)) skb->ip_summed = CHECKSUM_UNNECESSARY; if (vlan_stripped) diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c index 650f7888e32b..55ac00055977 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.c +++ b/drivers/net/ethernet/freescale/ucc_geth.c @@ -1888,6 +1888,8 @@ static void ucc_geth_free_tx(struct ucc_geth_private *ugeth) u16 i, j; u8 __iomem *bd; + netdev_reset_queue(ugeth->ndev); + ug_info = ugeth->ug_info; uf_info = &ug_info->uf_info; diff --git a/drivers/net/ethernet/hisilicon/hip04_eth.c b/drivers/net/ethernet/hisilicon/hip04_eth.c index 253f8ed0537a..60c727b0b7ab 100644 --- a/drivers/net/ethernet/hisilicon/hip04_eth.c +++ b/drivers/net/ethernet/hisilicon/hip04_eth.c @@ -919,10 +919,8 @@ static int hip04_mac_probe(struct platform_device *pdev) } ret = register_netdev(ndev); - if (ret) { - free_netdev(ndev); + if (ret) goto alloc_fail; - } return 0; diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c index 1a16c0307b47..bd36fbe81ad2 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c @@ -188,12 +188,10 @@ static void hns_ae_put_handle(struct hnae_handle *handle) struct hnae_vf_cb *vf_cb = hns_ae_get_vf_cb(handle); int i; - vf_cb->mac_cb = NULL; - - kfree(vf_cb); - for (i = 0; i < handle->q_num; i++) hns_ae_get_ring_pair(handle->qs[i])->used_by_vf = 0; + + kfree(vf_cb); } static void hns_ae_ring_enable_all(struct hnae_handle *handle, int val) diff --git a/drivers/net/ethernet/hisilicon/hns_mdio.c b/drivers/net/ethernet/hisilicon/hns_mdio.c index 37491c85bc42..6ff13c559e52 100644 --- a/drivers/net/ethernet/hisilicon/hns_mdio.c +++ b/drivers/net/ethernet/hisilicon/hns_mdio.c @@ -319,7 +319,7 @@ static int hns_mdio_read(struct mii_bus *bus, int phy_id, int regnum) } hns_mdio_cmd_write(mdio_dev, is_c45, - MDIO_C45_WRITE_ADDR, phy_id, devad); + MDIO_C45_READ, phy_id, devad); } /* Step 5: waitting for MDIO_COMMAND_REG 's mdio_start==0,*/ diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c index 2f9b12cf9ee5..70b3253e7ed5 100644 --- a/drivers/net/ethernet/ibm/ibmveth.c +++ b/drivers/net/ethernet/ibm/ibmveth.c @@ -1163,11 +1163,15 @@ out: map_failed_frags: last = i+1; - for (i = 0; i < last; i++) + for (i = 1; i < last; i++) dma_unmap_page(&adapter->vdev->dev, descs[i].fields.address, descs[i].fields.flags_len & IBMVETH_BUF_LEN_MASK, DMA_TO_DEVICE); + dma_unmap_single(&adapter->vdev->dev, + descs[0].fields.address, + descs[0].fields.flags_len & IBMVETH_BUF_LEN_MASK, + DMA_TO_DEVICE); map_failed: if (!firmware_has_feature(FW_FEATURE_CMO)) netdev_err(netdev, "tx: unable to map xmit buffer\n"); @@ -1234,7 +1238,6 @@ static int ibmveth_poll(struct napi_struct *napi, int budget) struct iphdr *iph; u16 mss = 0; -restart_poll: while (frames_processed < budget) { if (!ibmveth_rxq_pending_buffer(adapter)) break; @@ -1332,7 +1335,6 @@ restart_poll: napi_reschedule(napi)) { lpar_rc = h_vio_signal(adapter->vdev->unit_address, VIO_IRQ_DISABLE); - goto restart_poll; } } diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 6369d88b81c1..6b1cacd86c6e 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -2131,7 +2131,7 @@ static int e1000_request_msix(struct e1000_adapter *adapter) if (strlen(netdev->name) < (IFNAMSIZ - 5)) snprintf(adapter->rx_ring->name, sizeof(adapter->rx_ring->name) - 1, - "%s-rx-0", netdev->name); + "%.14s-rx-0", netdev->name); else memcpy(adapter->rx_ring->name, netdev->name, IFNAMSIZ); err = request_irq(adapter->msix_entries[vector].vector, @@ -2147,7 +2147,7 @@ static int e1000_request_msix(struct e1000_adapter *adapter) if (strlen(netdev->name) < (IFNAMSIZ - 5)) snprintf(adapter->tx_ring->name, sizeof(adapter->tx_ring->name) - 1, - "%s-tx-0", netdev->name); + "%.14s-tx-0", netdev->name); else memcpy(adapter->tx_ring->name, netdev->name, IFNAMSIZ); err = request_irq(adapter->msix_entries[vector].vector, diff --git a/drivers/net/ethernet/intel/e1000e/ptp.c b/drivers/net/ethernet/intel/e1000e/ptp.c index 25a0ad5102d6..855cf8c15c8a 100644 --- a/drivers/net/ethernet/intel/e1000e/ptp.c +++ b/drivers/net/ethernet/intel/e1000e/ptp.c @@ -111,10 +111,14 @@ static int e1000e_phc_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts) struct e1000_adapter *adapter = container_of(ptp, struct e1000_adapter, ptp_clock_info); unsigned long flags; - u64 ns; + u64 cycles, ns; spin_lock_irqsave(&adapter->systim_lock, flags); - ns = timecounter_read(&adapter->tc); + + /* Use timecounter_cyc2time() to allow non-monotonic SYSTIM readings */ + cycles = adapter->cc.read(&adapter->cc); + ns = timecounter_cyc2time(&adapter->tc, cycles); + spin_unlock_irqrestore(&adapter->systim_lock, flags); *ts = ns_to_timespec64(ns); @@ -170,9 +174,12 @@ static void e1000e_systim_overflow_work(struct work_struct *work) systim_overflow_work.work); struct e1000_hw *hw = &adapter->hw; struct timespec64 ts; + u64 ns; - adapter->ptp_clock_info.gettime64(&adapter->ptp_clock_info, &ts); + /* Update the timecounter */ + ns = timecounter_read(&adapter->tc); + ts = ns_to_timespec64(ns); e_dbg("SYSTIM overflow check at %lld.%09lu\n", (long long) ts.tv_sec, ts.tv_nsec); diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 02b23f6277fb..c1796aa2dde5 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -7339,9 +7339,11 @@ static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake, rtnl_unlock(); #ifdef CONFIG_PM - retval = pci_save_state(pdev); - if (retval) - return retval; + if (!runtime) { + retval = pci_save_state(pdev); + if (retval) + return retval; + } #endif status = rd32(E1000_STATUS); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c index ffd2e74e5638..dcd718ce13d5 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c @@ -1429,7 +1429,9 @@ static s32 ixgbe_get_link_capabilities_X550em(struct ixgbe_hw *hw, *autoneg = false; if (hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core0 || - hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core1) { + hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core1 || + hw->phy.sfp_type == ixgbe_sfp_type_1g_lx_core0 || + hw->phy.sfp_type == ixgbe_sfp_type_1g_lx_core1) { *speed = IXGBE_LINK_SPEED_1GB_FULL; return 0; } diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c index 4182290fdbcf..82f080a5ed5c 100644 --- a/drivers/net/ethernet/marvell/mv643xx_eth.c +++ b/drivers/net/ethernet/marvell/mv643xx_eth.c @@ -2884,7 +2884,7 @@ static int mv643xx_eth_shared_probe(struct platform_device *pdev) ret = mv643xx_eth_shared_of_probe(pdev); if (ret) - return ret; + goto err_put_clk; pd = dev_get_platdata(&pdev->dev); msp->tx_csum_limit = (pd != NULL && pd->tx_csum_limit) ? @@ -2892,6 +2892,11 @@ static int mv643xx_eth_shared_probe(struct platform_device *pdev) infer_hw_params(msp); return 0; + +err_put_clk: + if (!IS_ERR(msp->clk)) + clk_disable_unprepare(msp->clk); + return ret; } static int mv643xx_eth_shared_remove(struct platform_device *pdev) diff --git a/drivers/net/ethernet/marvell/skge.c b/drivers/net/ethernet/marvell/skge.c index 7173836fe361..c9f4b5412844 100644 --- a/drivers/net/ethernet/marvell/skge.c +++ b/drivers/net/ethernet/marvell/skge.c @@ -152,8 +152,10 @@ static void skge_get_regs(struct net_device *dev, struct ethtool_regs *regs, memset(p, 0, regs->len); memcpy_fromio(p, io, B3_RAM_ADDR); - memcpy_fromio(p + B3_RI_WTO_R1, io + B3_RI_WTO_R1, - regs->len - B3_RI_WTO_R1); + if (regs->len > B3_RI_WTO_R1) { + memcpy_fromio(p + B3_RI_WTO_R1, io + B3_RI_WTO_R1, + regs->len - B3_RI_WTO_R1); + } } /* Wake on Lan only supported on Yukon chips with rev 1 or above */ diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c index 6e5065f0907b..5cc05df69a86 100644 --- a/drivers/net/ethernet/marvell/sky2.c +++ b/drivers/net/ethernet/marvell/sky2.c @@ -46,6 +46,7 @@ #include #include #include +#include #include @@ -93,7 +94,7 @@ static int copybreak __read_mostly = 128; module_param(copybreak, int, 0); MODULE_PARM_DESC(copybreak, "Receive copy threshold"); -static int disable_msi = 0; +static int disable_msi = -1; module_param(disable_msi, int, 0); MODULE_PARM_DESC(disable_msi, "Disable Message Signaled Interrupt (MSI)"); @@ -4923,6 +4924,24 @@ static const char *sky2_name(u8 chipid, char *buf, int sz) return buf; } +static const struct dmi_system_id msi_blacklist[] = { + { + .ident = "Dell Inspiron 1545", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 1545"), + }, + }, + { + .ident = "Gateway P-79", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Gateway"), + DMI_MATCH(DMI_PRODUCT_NAME, "P-79"), + }, + }, + {} +}; + static int sky2_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { struct net_device *dev, *dev1; @@ -5034,6 +5053,9 @@ static int sky2_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_out_free_pci; } + if (disable_msi == -1) + disable_msi = !!dmi_check_system(msi_blacklist); + if (!disable_msi && pci_enable_msi(pdev) == 0) { err = sky2_test_msi(hw); if (err) { @@ -5079,7 +5101,7 @@ static int sky2_probe(struct pci_dev *pdev, const struct pci_device_id *ent) INIT_WORK(&hw->restart_work, sky2_restart); pci_set_drvdata(pdev, hw); - pdev->d3_delay = 200; + pdev->d3_delay = 300; return 0; diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index fc222df47aa9..9e104dcfa9dd 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -2636,6 +2636,7 @@ void mlx4_cmd_use_polling(struct mlx4_dev *dev) down(&priv->cmd.event_sem); kfree(priv->cmd.context); + priv->cmd.context = NULL; up(&priv->cmd.poll_sem); } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 82bf1b539d87..ac7c64bae2a5 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -725,13 +725,27 @@ static int get_fixed_ipv6_csum(__wsum hw_checksum, struct sk_buff *skb, return 0; } #endif + +#define short_frame(size) ((size) <= ETH_ZLEN + ETH_FCS_LEN) + static int check_csum(struct mlx4_cqe *cqe, struct sk_buff *skb, void *va, netdev_features_t dev_features) { __wsum hw_checksum = 0; + void *hdr; - void *hdr = (u8 *)va + sizeof(struct ethhdr); + /* CQE csum doesn't cover padding octets in short ethernet + * frames. And the pad field is appended prior to calculating + * and appending the FCS field. + * + * Detecting these padded frames requires to verify and parse + * IP headers, so we simply force all those small frames to skip + * checksum complete. + */ + if (short_frame(skb->len)) + return -EINVAL; + hdr = (u8 *)va + sizeof(struct ethhdr); hw_checksum = csum_unfold((__force __sum16)cqe->checksum); if (cqe->vlan_my_qpn & cpu_to_be32(MLX4_CQE_CVLAN_PRESENT_MASK) && @@ -851,6 +865,11 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud (cqe->vlan_my_qpn & cpu_to_be32(MLX4_CQE_L2_TUNNEL)); if (likely(dev->features & NETIF_F_RXCSUM)) { + /* TODO: For IP non TCP/UDP packets when csum complete is + * not an option (not supported or any other reason) we can + * actually check cqe IPOK status bit and report + * CHECKSUM_UNNECESSARY rather than CHECKSUM_NONE + */ if (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_TCP | MLX4_CQE_STATUS_UDP)) { if ((cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPOK)) && diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 90db94e83fde..033f99d2f15c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -1906,9 +1906,11 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev, { struct mlx4_cmd_mailbox *mailbox; __be32 *outbox; + u64 qword_field; u32 dword_field; - int err; + u16 word_field; u8 byte_field; + int err; static const u8 a0_dmfs_query_hw_steering[] = { [0] = MLX4_STEERING_DMFS_A0_DEFAULT, [1] = MLX4_STEERING_DMFS_A0_DYNAMIC, @@ -1936,19 +1938,32 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev, /* QPC/EEC/CQC/EQC/RDMARC attributes */ - MLX4_GET(param->qpc_base, outbox, INIT_HCA_QPC_BASE_OFFSET); - MLX4_GET(param->log_num_qps, outbox, INIT_HCA_LOG_QP_OFFSET); - MLX4_GET(param->srqc_base, outbox, INIT_HCA_SRQC_BASE_OFFSET); - MLX4_GET(param->log_num_srqs, outbox, INIT_HCA_LOG_SRQ_OFFSET); - MLX4_GET(param->cqc_base, outbox, INIT_HCA_CQC_BASE_OFFSET); - MLX4_GET(param->log_num_cqs, outbox, INIT_HCA_LOG_CQ_OFFSET); - MLX4_GET(param->altc_base, outbox, INIT_HCA_ALTC_BASE_OFFSET); - MLX4_GET(param->auxc_base, outbox, INIT_HCA_AUXC_BASE_OFFSET); - MLX4_GET(param->eqc_base, outbox, INIT_HCA_EQC_BASE_OFFSET); - MLX4_GET(param->log_num_eqs, outbox, INIT_HCA_LOG_EQ_OFFSET); - MLX4_GET(param->num_sys_eqs, outbox, INIT_HCA_NUM_SYS_EQS_OFFSET); - MLX4_GET(param->rdmarc_base, outbox, INIT_HCA_RDMARC_BASE_OFFSET); - MLX4_GET(param->log_rd_per_qp, outbox, INIT_HCA_LOG_RD_OFFSET); + MLX4_GET(qword_field, outbox, INIT_HCA_QPC_BASE_OFFSET); + param->qpc_base = qword_field & ~((u64)0x1f); + MLX4_GET(byte_field, outbox, INIT_HCA_LOG_QP_OFFSET); + param->log_num_qps = byte_field & 0x1f; + MLX4_GET(qword_field, outbox, INIT_HCA_SRQC_BASE_OFFSET); + param->srqc_base = qword_field & ~((u64)0x1f); + MLX4_GET(byte_field, outbox, INIT_HCA_LOG_SRQ_OFFSET); + param->log_num_srqs = byte_field & 0x1f; + MLX4_GET(qword_field, outbox, INIT_HCA_CQC_BASE_OFFSET); + param->cqc_base = qword_field & ~((u64)0x1f); + MLX4_GET(byte_field, outbox, INIT_HCA_LOG_CQ_OFFSET); + param->log_num_cqs = byte_field & 0x1f; + MLX4_GET(qword_field, outbox, INIT_HCA_ALTC_BASE_OFFSET); + param->altc_base = qword_field; + MLX4_GET(qword_field, outbox, INIT_HCA_AUXC_BASE_OFFSET); + param->auxc_base = qword_field; + MLX4_GET(qword_field, outbox, INIT_HCA_EQC_BASE_OFFSET); + param->eqc_base = qword_field & ~((u64)0x1f); + MLX4_GET(byte_field, outbox, INIT_HCA_LOG_EQ_OFFSET); + param->log_num_eqs = byte_field & 0x1f; + MLX4_GET(word_field, outbox, INIT_HCA_NUM_SYS_EQS_OFFSET); + param->num_sys_eqs = word_field & 0xfff; + MLX4_GET(qword_field, outbox, INIT_HCA_RDMARC_BASE_OFFSET); + param->rdmarc_base = qword_field & ~((u64)0x1f); + MLX4_GET(byte_field, outbox, INIT_HCA_LOG_RD_OFFSET); + param->log_rd_per_qp = byte_field & 0x7; MLX4_GET(dword_field, outbox, INIT_HCA_FLAGS_OFFSET); if (dword_field & (1 << INIT_HCA_DEVICE_MANAGED_FLOW_STEERING_EN)) { @@ -1967,22 +1982,21 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev, /* steering attributes */ if (param->steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) { MLX4_GET(param->mc_base, outbox, INIT_HCA_FS_BASE_OFFSET); - MLX4_GET(param->log_mc_entry_sz, outbox, - INIT_HCA_FS_LOG_ENTRY_SZ_OFFSET); - MLX4_GET(param->log_mc_table_sz, outbox, - INIT_HCA_FS_LOG_TABLE_SZ_OFFSET); - MLX4_GET(byte_field, outbox, - INIT_HCA_FS_A0_OFFSET); + MLX4_GET(byte_field, outbox, INIT_HCA_FS_LOG_ENTRY_SZ_OFFSET); + param->log_mc_entry_sz = byte_field & 0x1f; + MLX4_GET(byte_field, outbox, INIT_HCA_FS_LOG_TABLE_SZ_OFFSET); + param->log_mc_table_sz = byte_field & 0x1f; + MLX4_GET(byte_field, outbox, INIT_HCA_FS_A0_OFFSET); param->dmfs_high_steer_mode = a0_dmfs_query_hw_steering[(byte_field >> 6) & 3]; } else { MLX4_GET(param->mc_base, outbox, INIT_HCA_MC_BASE_OFFSET); - MLX4_GET(param->log_mc_entry_sz, outbox, - INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET); - MLX4_GET(param->log_mc_hash_sz, outbox, - INIT_HCA_LOG_MC_HASH_SZ_OFFSET); - MLX4_GET(param->log_mc_table_sz, outbox, - INIT_HCA_LOG_MC_TABLE_SZ_OFFSET); + MLX4_GET(byte_field, outbox, INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET); + param->log_mc_entry_sz = byte_field & 0x1f; + MLX4_GET(byte_field, outbox, INIT_HCA_LOG_MC_HASH_SZ_OFFSET); + param->log_mc_hash_sz = byte_field & 0x1f; + MLX4_GET(byte_field, outbox, INIT_HCA_LOG_MC_TABLE_SZ_OFFSET); + param->log_mc_table_sz = byte_field & 0x1f; } /* CX3 is capable of extending CQEs/EQEs from 32 to 64 bytes */ @@ -2006,15 +2020,18 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev, /* TPT attributes */ MLX4_GET(param->dmpt_base, outbox, INIT_HCA_DMPT_BASE_OFFSET); - MLX4_GET(param->mw_enabled, outbox, INIT_HCA_TPT_MW_OFFSET); - MLX4_GET(param->log_mpt_sz, outbox, INIT_HCA_LOG_MPT_SZ_OFFSET); + MLX4_GET(byte_field, outbox, INIT_HCA_TPT_MW_OFFSET); + param->mw_enabled = byte_field >> 7; + MLX4_GET(byte_field, outbox, INIT_HCA_LOG_MPT_SZ_OFFSET); + param->log_mpt_sz = byte_field & 0x3f; MLX4_GET(param->mtt_base, outbox, INIT_HCA_MTT_BASE_OFFSET); MLX4_GET(param->cmpt_base, outbox, INIT_HCA_CMPT_BASE_OFFSET); /* UAR attributes */ MLX4_GET(param->uar_page_sz, outbox, INIT_HCA_UAR_PAGE_SZ_OFFSET); - MLX4_GET(param->log_uar_sz, outbox, INIT_HCA_LOG_UAR_SZ_OFFSET); + MLX4_GET(byte_field, outbox, INIT_HCA_LOG_UAR_SZ_OFFSET); + param->log_uar_sz = byte_field & 0xf; /* phv_check enable */ MLX4_GET(byte_field, outbox, INIT_HCA_CACHELINE_SZ_OFFSET); diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 7911dc3da98e..37dfdb1329f4 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -2652,13 +2652,13 @@ static int qp_get_mtt_size(struct mlx4_qp_context *qpc) int total_pages; int total_mem; int page_offset = (be32_to_cpu(qpc->params2) >> 6) & 0x3f; + int tot; sq_size = 1 << (log_sq_size + log_sq_sride + 4); rq_size = (srq|rss|xrc) ? 0 : (1 << (log_rq_size + log_rq_stride + 4)); total_mem = sq_size + rq_size; - total_pages = - roundup_pow_of_two((total_mem + (page_offset << 6)) >> - page_shift); + tot = (total_mem + (page_offset << 6)) >> page_shift; + total_pages = !tot ? 1 : roundup_pow_of_two(tot); return total_pages; } diff --git a/drivers/net/ethernet/realtek/8139cp.c b/drivers/net/ethernet/realtek/8139cp.c index deae10d7426d..9b588251f2a7 100644 --- a/drivers/net/ethernet/realtek/8139cp.c +++ b/drivers/net/ethernet/realtek/8139cp.c @@ -578,6 +578,7 @@ static irqreturn_t cp_interrupt (int irq, void *dev_instance) struct cp_private *cp; int handled = 0; u16 status; + u16 mask; if (unlikely(dev == NULL)) return IRQ_NONE; @@ -585,6 +586,10 @@ static irqreturn_t cp_interrupt (int irq, void *dev_instance) spin_lock(&cp->lock); + mask = cpr16(IntrMask); + if (!mask) + goto out_unlock; + status = cpr16(IntrStatus); if (!status || (status == 0xFFFF)) goto out_unlock; diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index 22e4845afd64..4eff14320831 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -325,6 +325,8 @@ enum cfg_version { static const struct pci_device_id rtl8169_pci_tbl[] = { { PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x0000), 0, 0, RTL_CFG_1 }, + { PCI_VDEVICE(REALTEK, 0x2502), RTL_CFG_1 }, + { PCI_VDEVICE(REALTEK, 0x2600), RTL_CFG_1 }, { PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8129), 0, 0, RTL_CFG_0 }, { PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8136), 0, 0, RTL_CFG_2 }, { PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8161), 0, 0, RTL_CFG_1 }, diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index f735dfcb64ae..29d31eb995d7 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -453,7 +453,7 @@ static int ravb_dmac_init(struct net_device *ndev) ravb_write(ndev, RCR_EFFS | RCR_ENCF | RCR_ETS0 | 0x18000000, RCR); /* Set FIFO size */ - ravb_write(ndev, TGC_TQP_AVBMODE1 | 0x00222200, TGC); + ravb_write(ndev, TGC_TQP_AVBMODE1 | 0x00112200, TGC); /* Timestamp enable */ ravb_write(ndev, TCCR_TFEN, TCCR); diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index 3920c3eb6006..df6063faad2e 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -821,37 +821,49 @@ static int rocker_tlv_put(struct rocker_desc_info *desc_info, static int rocker_tlv_put_u8(struct rocker_desc_info *desc_info, int attrtype, u8 value) { - return rocker_tlv_put(desc_info, attrtype, sizeof(u8), &value); + u8 tmp = value; /* work around GCC PR81715 */ + + return rocker_tlv_put(desc_info, attrtype, sizeof(u8), &tmp); } static int rocker_tlv_put_u16(struct rocker_desc_info *desc_info, int attrtype, u16 value) { - return rocker_tlv_put(desc_info, attrtype, sizeof(u16), &value); + u16 tmp = value; + + return rocker_tlv_put(desc_info, attrtype, sizeof(u16), &tmp); } static int rocker_tlv_put_be16(struct rocker_desc_info *desc_info, int attrtype, __be16 value) { - return rocker_tlv_put(desc_info, attrtype, sizeof(__be16), &value); + __be16 tmp = value; + + return rocker_tlv_put(desc_info, attrtype, sizeof(__be16), &tmp); } static int rocker_tlv_put_u32(struct rocker_desc_info *desc_info, int attrtype, u32 value) { - return rocker_tlv_put(desc_info, attrtype, sizeof(u32), &value); + u32 tmp = value; + + return rocker_tlv_put(desc_info, attrtype, sizeof(u32), &tmp); } static int rocker_tlv_put_be32(struct rocker_desc_info *desc_info, int attrtype, __be32 value) { - return rocker_tlv_put(desc_info, attrtype, sizeof(__be32), &value); + __be32 tmp = value; + + return rocker_tlv_put(desc_info, attrtype, sizeof(__be32), &tmp); } static int rocker_tlv_put_u64(struct rocker_desc_info *desc_info, int attrtype, u64 value) { - return rocker_tlv_put(desc_info, attrtype, sizeof(u64), &value); + u64 tmp = value; + + return rocker_tlv_put(desc_info, attrtype, sizeof(u64), &tmp); } static struct rocker_tlv * diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c index ea94f5272c3f..e3befecc221d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c @@ -1520,8 +1520,10 @@ static int rk_gmac_powerup(struct rk_priv_data *bsp_priv) } ret = phy_power_on(bsp_priv, true); - if (ret) + if (ret) { + gmac_clk_enable(bsp_priv, false); return ret; + } pm_runtime_enable(dev); pm_runtime_get_sync(dev); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c index 0db15420902e..d754cd9dde33 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c @@ -614,25 +614,27 @@ static int stmmac_ethtool_op_set_eee(struct net_device *dev, struct ethtool_eee *edata) { struct stmmac_priv *priv = netdev_priv(dev); + int ret; - priv->eee_enabled = edata->eee_enabled; - - if (!priv->eee_enabled) + if (!edata->eee_enabled) { stmmac_disable_eee_mode(priv); - else { + } else { /* We are asking for enabling the EEE but it is safe * to verify all by invoking the eee_init function. * In case of failure it will return an error. */ - priv->eee_enabled = stmmac_eee_init(priv); - if (!priv->eee_enabled) + edata->eee_enabled = stmmac_eee_init(priv); + if (!edata->eee_enabled) return -EOPNOTSUPP; - - /* Do not change tx_lpi_timer in case of failure */ - priv->tx_lpi_timer = edata->tx_lpi_timer; } - return phy_ethtool_set_eee(priv->phydev, edata); + ret = phy_ethtool_set_eee(dev->phydev, edata); + if (ret) + return ret; + + priv->eee_enabled = edata->eee_enabled; + priv->tx_lpi_timer = edata->tx_lpi_timer; + return 0; } static u32 stmmac_usec2riwt(u32 usec, struct stmmac_priv *priv) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index da97e20c0e18..ec453e07b316 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -2532,6 +2532,20 @@ static int stmmac_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) return ret; } +static int stmmac_set_mac_address(struct net_device *ndev, void *addr) +{ + struct stmmac_priv *priv = netdev_priv(ndev); + int ret = 0; + + ret = eth_mac_addr(ndev, addr); + if (ret) + return ret; + + priv->hw->mac->set_umac_addr(priv->hw, ndev->dev_addr, 0); + + return ret; +} + #ifdef CONFIG_DEBUG_FS static struct dentry *stmmac_fs_dir; @@ -2733,7 +2747,7 @@ static const struct net_device_ops stmmac_netdev_ops = { #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = stmmac_poll_controller, #endif - .ndo_set_mac_address = eth_mac_addr, + .ndo_set_mac_address = stmmac_set_mac_address, }; /** diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c index ccebf89aa1e4..85f3a2c0d4dd 100644 --- a/drivers/net/ethernet/sun/niu.c +++ b/drivers/net/ethernet/sun/niu.c @@ -8121,6 +8121,8 @@ static int niu_pci_vpd_scan_props(struct niu *np, u32 start, u32 end) start += 3; prop_len = niu_pci_eeprom_read(np, start + 4); + if (prop_len < 0) + return prop_len; err = niu_pci_vpd_get_propname(np, start + 5, namebuf, 64); if (err < 0) return err; @@ -8165,8 +8167,12 @@ static int niu_pci_vpd_scan_props(struct niu *np, u32 start, u32 end) netif_printk(np, probe, KERN_DEBUG, np->dev, "VPD_SCAN: Reading in property [%s] len[%d]\n", namebuf, prop_len); - for (i = 0; i < prop_len; i++) - *prop_buf++ = niu_pci_eeprom_read(np, off + i); + for (i = 0; i < prop_len; i++) { + err = niu_pci_eeprom_read(np, off + i); + if (err >= 0) + *prop_buf = err; + ++prop_buf; + } } start += len; diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index a9268db4e349..ae02ce17c505 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -389,7 +389,12 @@ static int ipvlan_nl_changelink(struct net_device *dev, struct ipvl_dev *ipvlan = netdev_priv(dev); struct ipvl_port *port = ipvlan_port_get_rtnl(ipvlan->phy_dev); - if (data && data[IFLA_IPVLAN_MODE]) { + if (!data) + return 0; + if (!ns_capable(dev_net(ipvlan->phy_dev)->user_ns, CAP_NET_ADMIN)) + return -EPERM; + + if (data[IFLA_IPVLAN_MODE]) { u16 nmode = nla_get_u16(data[IFLA_IPVLAN_MODE]); ipvlan_set_port_mode(port, nmode); @@ -454,6 +459,8 @@ static int ipvlan_link_new(struct net *src_net, struct net_device *dev, struct ipvl_dev *tmp = netdev_priv(phy_dev); phy_dev = tmp->phy_dev; + if (!ns_capable(dev_net(phy_dev)->user_ns, CAP_NET_ADMIN)) + return -EPERM; } else if (!netif_is_ipvlan_port(phy_dev)) { err = ipvlan_port_create(phy_dev); if (err < 0) diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c index dc934347ae28..e6f564d50663 100644 --- a/drivers/net/phy/dp83640.c +++ b/drivers/net/phy/dp83640.c @@ -890,14 +890,14 @@ static void decode_txts(struct dp83640_private *dp83640, struct phy_txts *phy_txts) { struct skb_shared_hwtstamps shhwtstamps; + struct dp83640_skb_info *skb_info; struct sk_buff *skb; - u64 ns; u8 overflow; + u64 ns; /* We must already have the skb that triggered this. */ - +again: skb = skb_dequeue(&dp83640->tx_queue); - if (!skb) { pr_debug("have timestamp but tx_queue empty\n"); return; @@ -912,6 +912,11 @@ static void decode_txts(struct dp83640_private *dp83640, } return; } + skb_info = (struct dp83640_skb_info *)skb->cb; + if (time_after(jiffies, skb_info->tmo)) { + kfree_skb(skb); + goto again; + } ns = phy2txts(phy_txts); memset(&shhwtstamps, 0, sizeof(shhwtstamps)); @@ -1461,6 +1466,7 @@ static bool dp83640_rxtstamp(struct phy_device *phydev, static void dp83640_txtstamp(struct phy_device *phydev, struct sk_buff *skb, int type) { + struct dp83640_skb_info *skb_info = (struct dp83640_skb_info *)skb->cb; struct dp83640_private *dp83640 = phydev->priv; switch (dp83640->hwts_tx_en) { @@ -1473,6 +1479,7 @@ static void dp83640_txtstamp(struct phy_device *phydev, /* fall through */ case HWTSTAMP_TX_ON: skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; + skb_info->tmo = jiffies + SKB_TIMESTAMP_TIMEOUT; skb_queue_tail(&dp83640->tx_queue, skb); break; diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index cb40734d61e3..d079ac58c7da 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -267,7 +267,6 @@ int __mdiobus_register(struct mii_bus *bus, struct module *owner) err = device_register(&bus->dev); if (err) { pr_err("mii_bus %s failed to register\n", bus->id); - put_device(&bus->dev); return -EINVAL; } diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 53af8e59b1a5..49809eb643e2 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -28,6 +28,7 @@ #include #include #include +#include /* Operation Mode Strap Override */ #define MII_KSZPHY_OMSO 0x16 @@ -288,6 +289,17 @@ static int kszphy_config_init(struct phy_device *phydev) return 0; } +static int ksz8061_config_init(struct phy_device *phydev) +{ + int ret; + + ret = phy_write_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_DEVID1, 0xB61A); + if (ret) + return ret; + + return kszphy_config_init(phydev); +} + static int ksz9021_load_values_from_of(struct phy_device *phydev, const struct device_node *of_node, u16 reg, @@ -773,7 +785,7 @@ static struct phy_driver ksphy_driver[] = { .phy_id_mask = 0x00fffff0, .features = (PHY_BASIC_FEATURES | SUPPORTED_Pause), .flags = PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT, - .config_init = kszphy_config_init, + .config_init = ksz8061_config_init, .config_aneg = genphy_config_aneg, .read_status = genphy_read_status, .ack_interrupt = kszphy_ack_interrupt, diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 1f2f25a71d18..70f26b30729c 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1265,20 +1265,17 @@ static int gen10g_resume(struct phy_device *phydev) static int __set_phy_supported(struct phy_device *phydev, u32 max_speed) { - phydev->supported &= ~(PHY_1000BT_FEATURES | PHY_100BT_FEATURES | - PHY_10BT_FEATURES); - switch (max_speed) { - default: - return -ENOTSUPP; - case SPEED_1000: - phydev->supported |= PHY_1000BT_FEATURES; + case SPEED_10: + phydev->supported &= ~PHY_100BT_FEATURES; /* fall through */ case SPEED_100: - phydev->supported |= PHY_100BT_FEATURES; - /* fall through */ - case SPEED_10: - phydev->supported |= PHY_10BT_FEATURES; + phydev->supported &= ~PHY_1000BT_FEATURES; + break; + case SPEED_1000: + break; + default: + return -ENOTSUPP; } return 0; diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c index 583d50f80b24..02327e6c4819 100644 --- a/drivers/net/ppp/pppoe.c +++ b/drivers/net/ppp/pppoe.c @@ -442,6 +442,7 @@ static int pppoe_rcv(struct sk_buff *skb, struct net_device *dev, if (pskb_trim_rcsum(skb, len)) goto drop; + ph = pppoe_hdr(skb); pn = pppoe_pernet(dev_net(dev)); /* Note that get_item does a sock_hold(), so sk_pppox(po) diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c index 12a627fcc02c..53c1f2bd0f24 100644 --- a/drivers/net/ppp/pptp.c +++ b/drivers/net/ppp/pptp.c @@ -577,6 +577,7 @@ static void pptp_sock_destruct(struct sock *sk) pppox_unbind_sock(sk); } skb_queue_purge(&sk->sk_receive_queue); + dst_release(rcu_dereference_protected(sk->sk_dst_cache, 1)); } static int pptp_create(struct net *net, struct socket *sock, int kern) diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 33ffb573fd67..267a90423154 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -247,17 +247,6 @@ static void __team_option_inst_mark_removed_port(struct team *team, } } -static bool __team_option_inst_tmp_find(const struct list_head *opts, - const struct team_option_inst *needle) -{ - struct team_option_inst *opt_inst; - - list_for_each_entry(opt_inst, opts, tmp_list) - if (opt_inst == needle) - return true; - return false; -} - static int __team_options_register(struct team *team, const struct team_option *option, size_t option_count) @@ -2447,7 +2436,6 @@ static int team_nl_cmd_options_set(struct sk_buff *skb, struct genl_info *info) int err = 0; int i; struct nlattr *nl_option; - LIST_HEAD(opt_inst_list); team = team_nl_team_get(info); if (!team) @@ -2463,6 +2451,7 @@ static int team_nl_cmd_options_set(struct sk_buff *skb, struct genl_info *info) struct nlattr *opt_attrs[TEAM_ATTR_OPTION_MAX + 1]; struct nlattr *attr; struct nlattr *attr_data; + LIST_HEAD(opt_inst_list); enum team_option_type opt_type; int opt_port_ifindex = 0; /* != 0 for per-port options */ u32 opt_array_index = 0; @@ -2566,23 +2555,17 @@ static int team_nl_cmd_options_set(struct sk_buff *skb, struct genl_info *info) if (err) goto team_put; opt_inst->changed = true; - - /* dumb/evil user-space can send us duplicate opt, - * keep only the last one - */ - if (__team_option_inst_tmp_find(&opt_inst_list, - opt_inst)) - continue; - list_add(&opt_inst->tmp_list, &opt_inst_list); } if (!opt_found) { err = -ENOENT; goto team_put; } - } - err = team_nl_send_event_options_get(team, &opt_inst_list); + err = team_nl_send_event_options_get(team, &opt_inst_list); + if (err) + break; + } team_put: team_nl_team_put(team); diff --git a/drivers/net/team/team_mode_loadbalance.c b/drivers/net/team/team_mode_loadbalance.c index a1536d0d83a9..a00335b3786e 100644 --- a/drivers/net/team/team_mode_loadbalance.c +++ b/drivers/net/team/team_mode_loadbalance.c @@ -305,6 +305,20 @@ static int lb_bpf_func_set(struct team *team, struct team_gsetter_ctx *ctx) return 0; } +static void lb_bpf_func_free(struct team *team) +{ + struct lb_priv *lb_priv = get_lb_priv(team); + struct bpf_prog *fp; + + if (!lb_priv->ex->orig_fprog) + return; + + __fprog_destroy(lb_priv->ex->orig_fprog); + fp = rcu_dereference_protected(lb_priv->fp, + lockdep_is_held(&team->lock)); + bpf_prog_destroy(fp); +} + static int lb_tx_method_get(struct team *team, struct team_gsetter_ctx *ctx) { struct lb_priv *lb_priv = get_lb_priv(team); @@ -619,6 +633,7 @@ static void lb_exit(struct team *team) team_options_unregister(team, lb_options, ARRAY_SIZE(lb_options)); + lb_bpf_func_free(team); cancel_delayed_work_sync(&lb_priv->ex->stats.refresh_dw); free_percpu(lb_priv->pcpu_stats); kfree(lb_priv->ex); diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 1d6ffe28b337..6032ec80956c 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1472,9 +1472,9 @@ static void tun_setup(struct net_device *dev) */ static int tun_validate(struct nlattr *tb[], struct nlattr *data[]) { - if (!data) - return 0; - return -EINVAL; + /* NL_SET_ERR_MSG(extack, + "tun/tap creation via rtnetlink is not supported."); */ + return -EOPNOTSUPP; } static struct rtnl_link_ops tun_link_ops __read_mostly = { diff --git a/drivers/net/usb/ch9200.c b/drivers/net/usb/ch9200.c index 5e151e6a3e09..3c7715ea40c1 100644 --- a/drivers/net/usb/ch9200.c +++ b/drivers/net/usb/ch9200.c @@ -255,14 +255,9 @@ static struct sk_buff *ch9200_tx_fixup(struct usbnet *dev, struct sk_buff *skb, tx_overhead = 0x40; len = skb->len; - if (skb_headroom(skb) < tx_overhead) { - struct sk_buff *skb2; - - skb2 = skb_copy_expand(skb, tx_overhead, 0, flags); + if (skb_cow_head(skb, tx_overhead)) { dev_kfree_skb_any(skb); - skb = skb2; - if (!skb) - return NULL; + return NULL; } __skb_push(skb, tx_overhead); diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c index 111d907e0c11..79cede19e0c4 100644 --- a/drivers/net/usb/hso.c +++ b/drivers/net/usb/hso.c @@ -2825,6 +2825,12 @@ static int hso_get_config_data(struct usb_interface *interface) return -EIO; } + /* check if we have a valid interface */ + if (if_num > 16) { + kfree(config_data); + return -EINVAL; + } + switch (config_data[if_num]) { case 0x0: result = 0; @@ -2895,10 +2901,18 @@ static int hso_probe(struct usb_interface *interface, /* Get the interface/port specification from either driver_info or from * the device itself */ - if (id->driver_info) + if (id->driver_info) { + /* if_num is controlled by the device, driver_info is a 0 terminated + * array. Make sure, the access is in bounds! */ + for (i = 0; i <= if_num; ++i) + if (((u32 *)(id->driver_info))[i] == 0) + goto exit; port_spec = ((u32 *)(id->driver_info))[if_num]; - else + } else { port_spec = hso_get_config_data(interface); + if (port_spec < 0) + goto exit; + } /* Check if we need to switch to alt interfaces prior to port * configuration */ diff --git a/drivers/net/usb/kaweth.c b/drivers/net/usb/kaweth.c index cd93220c9b45..a628db738b8a 100644 --- a/drivers/net/usb/kaweth.c +++ b/drivers/net/usb/kaweth.c @@ -812,18 +812,12 @@ static netdev_tx_t kaweth_start_xmit(struct sk_buff *skb, } /* We now decide whether we can put our special header into the sk_buff */ - if (skb_cloned(skb) || skb_headroom(skb) < 2) { - /* no such luck - we make our own */ - struct sk_buff *copied_skb; - copied_skb = skb_copy_expand(skb, 2, 0, GFP_ATOMIC); - dev_kfree_skb_irq(skb); - skb = copied_skb; - if (!copied_skb) { - kaweth->stats.tx_errors++; - netif_start_queue(net); - spin_unlock_irq(&kaweth->device_lock); - return NETDEV_TX_OK; - } + if (skb_cow_head(skb, 2)) { + kaweth->stats.tx_errors++; + netif_start_queue(net); + spin_unlock_irq(&kaweth->device_lock); + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; } private_header = (__le16 *)__skb_push(skb, 2); diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 3b67140eed73..ee6fefe92af4 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -717,6 +717,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x19d2, 0x2002, 4)}, /* ZTE (Vodafone) K3765-Z */ {QMI_FIXED_INTF(0x2001, 0x7e19, 4)}, /* D-Link DWM-221 B1 */ {QMI_FIXED_INTF(0x2001, 0x7e35, 4)}, /* D-Link DWM-222 */ + {QMI_FIXED_INTF(0x2020, 0x2031, 4)}, /* Olicard 600 */ {QMI_FIXED_INTF(0x2020, 0x2033, 4)}, /* BroadMobi BM806U */ {QMI_FIXED_INTF(0x0f3d, 0x68a2, 8)}, /* Sierra Wireless MC7700 */ {QMI_FIXED_INTF(0x114f, 0x68a2, 8)}, /* Sierra Wireless MC7750 */ diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c index 7cee7777d13f..b6b8aec73b28 100644 --- a/drivers/net/usb/smsc95xx.c +++ b/drivers/net/usb/smsc95xx.c @@ -1838,13 +1838,13 @@ static struct sk_buff *smsc95xx_tx_fixup(struct usbnet *dev, /* We do not advertise SG, so skbs should be already linearized */ BUG_ON(skb_shinfo(skb)->nr_frags); - if (skb_headroom(skb) < overhead) { - struct sk_buff *skb2 = skb_copy_expand(skb, - overhead, 0, flags); + /* Make writable and expand header space by overhead if required */ + if (skb_cow_head(skb, overhead)) { + /* Must deallocate here as returning NULL to indicate error + * means the skb won't be deallocated in the caller. + */ dev_kfree_skb_any(skb); - skb = skb2; - if (!skb) - return NULL; + return NULL; } if (csum) { diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index c41378214ede..835129152fc4 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -1229,6 +1229,14 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, } } + rcu_read_lock(); + + if (unlikely(!(vxlan->dev->flags & IFF_UP))) { + rcu_read_unlock(); + atomic_long_inc(&vxlan->dev->rx_dropped); + goto drop; + } + stats = this_cpu_ptr(vxlan->dev->tstats); u64_stats_update_begin(&stats->syncp); stats->rx_packets++; @@ -1237,6 +1245,8 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, gro_cells_receive(&vxlan->gro_cells, skb); + rcu_read_unlock(); + return; drop: if (tun_dst) @@ -1881,7 +1891,7 @@ static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan, struct pcpu_sw_netstats *tx_stats, *rx_stats; union vxlan_addr loopback; union vxlan_addr *remote_ip = &dst_vxlan->default_dst.remote_ip; - struct net_device *dev = skb->dev; + struct net_device *dev; int len = skb->len; tx_stats = this_cpu_ptr(src_vxlan->dev->tstats); @@ -1901,8 +1911,15 @@ static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan, #endif } + rcu_read_lock(); + dev = skb->dev; + if (unlikely(!(dev->flags & IFF_UP))) { + kfree_skb(skb); + goto drop; + } + if (dst_vxlan->flags & VXLAN_F_LEARN) - vxlan_snoop(skb->dev, &loopback, eth_hdr(skb)->h_source); + vxlan_snoop(dev, &loopback, eth_hdr(skb)->h_source); u64_stats_update_begin(&tx_stats->syncp); tx_stats->tx_packets++; @@ -1915,8 +1932,10 @@ static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan, rx_stats->rx_bytes += len; u64_stats_update_end(&rx_stats->syncp); } else { +drop: dev->stats.rx_dropped++; } + rcu_read_unlock(); } static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, @@ -2303,6 +2322,8 @@ static void vxlan_uninit(struct net_device *dev) { struct vxlan_dev *vxlan = netdev_priv(dev); + gro_cells_destroy(&vxlan->gro_cells); + vxlan_fdb_delete_default(vxlan); free_percpu(dev->tstats); @@ -3047,7 +3068,6 @@ static void vxlan_dellink(struct net_device *dev, struct list_head *head) { struct vxlan_dev *vxlan = netdev_priv(dev); - gro_cells_destroy(&vxlan->gro_cells); list_del(&vxlan->next); unregister_netdevice_queue(dev, head); } @@ -3256,10 +3276,8 @@ static void __net_exit vxlan_exit_net(struct net *net) /* If vxlan->dev is in the same netns, it has already been added * to the list by the previous loop. */ - if (!net_eq(dev_net(vxlan->dev), net)) { - gro_cells_destroy(&vxlan->gro_cells); + if (!net_eq(dev_net(vxlan->dev), net)) unregister_netdevice_queue(vxlan->dev, &list); - } } unregister_netdevice_many(&list); diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c index f201e50447d8..b867875aa6e6 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.c +++ b/drivers/net/wireless/ath/ath10k/wmi.c @@ -4065,7 +4065,7 @@ static void ath10k_tpc_config_disp_tables(struct ath10k *ar, rate_code[i], type); snprintf(buff, sizeof(buff), "%8d ", tpc[j]); - strncat(tpc_value, buff, strlen(buff)); + strlcat(tpc_value, buff, sizeof(tpc_value)); } tpc_stats->tpc_table[type].pream_idx[i] = pream_idx; tpc_stats->tpc_table[type].rate_code[i] = rate_code[i]; diff --git a/drivers/net/wireless/ath/wil6210/wmi.c b/drivers/net/wireless/ath/wil6210/wmi.c index 6ed26baca0e5..7af8479acb98 100644 --- a/drivers/net/wireless/ath/wil6210/wmi.c +++ b/drivers/net/wireless/ath/wil6210/wmi.c @@ -1035,8 +1035,14 @@ int wmi_set_ie(struct wil6210_priv *wil, u8 type, u16 ie_len, const void *ie) }; int rc; u16 len = sizeof(struct wmi_set_appie_cmd) + ie_len; - struct wmi_set_appie_cmd *cmd = kzalloc(len, GFP_KERNEL); + struct wmi_set_appie_cmd *cmd; + if (len < ie_len) { + rc = -EINVAL; + goto out; + } + + cmd = kzalloc(len, GFP_KERNEL); if (!cmd) { rc = -ENOMEM; goto out; diff --git a/drivers/net/wireless/b43/phy_common.c b/drivers/net/wireless/b43/phy_common.c index ec2b9c577b90..3644c9edaf81 100644 --- a/drivers/net/wireless/b43/phy_common.c +++ b/drivers/net/wireless/b43/phy_common.c @@ -616,7 +616,7 @@ struct b43_c32 b43_cordic(int theta) u8 i; s32 tmp; s8 signx = 1; - u32 angle = 0; + s32 angle = 0; struct b43_c32 ret = { .i = 39797, .q = 0, }; while (theta > (180 << 16)) diff --git a/drivers/net/wireless/cw1200/scan.c b/drivers/net/wireless/cw1200/scan.c index bff81b8d4164..9f1037e7e55c 100644 --- a/drivers/net/wireless/cw1200/scan.c +++ b/drivers/net/wireless/cw1200/scan.c @@ -78,6 +78,10 @@ int cw1200_hw_scan(struct ieee80211_hw *hw, if (req->n_ssids > WSM_SCAN_MAX_NUM_OF_SSIDS) return -EINVAL; + /* will be unlocked in cw1200_scan_work() */ + down(&priv->scan.lock); + mutex_lock(&priv->conf_mutex); + frame.skb = ieee80211_probereq_get(hw, priv->vif->addr, NULL, 0, req->ie_len); if (!frame.skb) @@ -86,19 +90,15 @@ int cw1200_hw_scan(struct ieee80211_hw *hw, if (req->ie_len) memcpy(skb_put(frame.skb, req->ie_len), req->ie, req->ie_len); - /* will be unlocked in cw1200_scan_work() */ - down(&priv->scan.lock); - mutex_lock(&priv->conf_mutex); - ret = wsm_set_template_frame(priv, &frame); if (!ret) { /* Host want to be the probe responder. */ ret = wsm_set_probe_responder(priv, true); } if (ret) { + dev_kfree_skb(frame.skb); mutex_unlock(&priv->conf_mutex); up(&priv->scan.lock); - dev_kfree_skb(frame.skb); return ret; } @@ -120,10 +120,9 @@ int cw1200_hw_scan(struct ieee80211_hw *hw, ++priv->scan.n_ssids; } - mutex_unlock(&priv->conf_mutex); - if (frame.skb) dev_kfree_skb(frame.skb); + mutex_unlock(&priv->conf_mutex); queue_work(priv->workqueue, &priv->scan.work); return 0; } diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index ab480ea6d95a..0f582117b0e3 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -3002,7 +3002,7 @@ static int hwsim_get_radio_nl(struct sk_buff *msg, struct genl_info *info) goto out_err; } - genlmsg_reply(skb, info); + res = genlmsg_reply(skb, info); break; } @@ -3195,16 +3195,16 @@ static int __init init_mac80211_hwsim(void) if (err) return err; + err = hwsim_init_netlink(); + if (err) + goto out_unregister_driver; + hwsim_class = class_create(THIS_MODULE, "mac80211_hwsim"); if (IS_ERR(hwsim_class)) { err = PTR_ERR(hwsim_class); - goto out_unregister_driver; + goto out_exit_netlink; } - err = hwsim_init_netlink(); - if (err < 0) - goto out_unregister_driver; - for (i = 0; i < radios; i++) { struct hwsim_new_radio_params param = { 0 }; @@ -3310,6 +3310,8 @@ out_free_mon: free_netdev(hwsim_mon); out_free_radios: mac80211_hwsim_free(); +out_exit_netlink: + hwsim_exit_netlink(); out_unregister_driver: platform_driver_unregister(&mac80211_hwsim_driver); return err; diff --git a/drivers/net/wireless/mediatek/mt7601u/eeprom.h b/drivers/net/wireless/mediatek/mt7601u/eeprom.h index 662d12703b69..57b503ae63f1 100644 --- a/drivers/net/wireless/mediatek/mt7601u/eeprom.h +++ b/drivers/net/wireless/mediatek/mt7601u/eeprom.h @@ -17,7 +17,7 @@ struct mt7601u_dev; -#define MT7601U_EE_MAX_VER 0x0c +#define MT7601U_EE_MAX_VER 0x0d #define MT7601U_EEPROM_SIZE 256 #define MT7601U_DEFAULT_TX_POWER 6 diff --git a/drivers/net/wireless/rsi/rsi_common.h b/drivers/net/wireless/rsi/rsi_common.h index d3fbe33d2324..a13f08fd8690 100644 --- a/drivers/net/wireless/rsi/rsi_common.h +++ b/drivers/net/wireless/rsi/rsi_common.h @@ -75,7 +75,6 @@ static inline int rsi_kill_thread(struct rsi_thread *handle) atomic_inc(&handle->thread_done); rsi_set_event(&handle->event); - wait_for_completion(&handle->completion); return kthread_stop(handle->task); } diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c index 7b27c7e23af2..cc10b72607c6 100644 --- a/drivers/net/wireless/ti/wlcore/main.c +++ b/drivers/net/wireless/ti/wlcore/main.c @@ -1123,8 +1123,11 @@ static int wl12xx_chip_wakeup(struct wl1271 *wl, bool plt) goto out; ret = wl12xx_fetch_firmware(wl, plt); - if (ret < 0) - goto out; + if (ret < 0) { + kfree(wl->fw_status); + kfree(wl->raw_fw_status); + kfree(wl->tx_res_if); + } out: return ret; diff --git a/drivers/net/wireless/virt_wifi.c b/drivers/net/wireless/virt_wifi.c index 90ecb7e8834f..7b7d4270ffc7 100644 --- a/drivers/net/wireless/virt_wifi.c +++ b/drivers/net/wireless/virt_wifi.c @@ -363,7 +363,6 @@ static struct wiphy *virt_wifi_make_wiphy(void) wiphy->bands[IEEE80211_BAND_5GHZ] = &band_5ghz; wiphy->bands[IEEE80211_BAND_60GHZ] = NULL; - wiphy->regulatory_flags = REGULATORY_WIPHY_SELF_MANAGED; wiphy->interface_modes = BIT(NL80211_IFTYPE_STATION); priv = wiphy_priv(wiphy); @@ -527,8 +526,10 @@ static int virt_wifi_newlink(struct net *src_net, struct net_device *dev, SET_NETDEV_DEV(dev, &priv->lowerdev->dev); dev->ieee80211_ptr = kzalloc(sizeof(*dev->ieee80211_ptr), GFP_KERNEL); - if (!dev->ieee80211_ptr) + if (!dev->ieee80211_ptr) { + err = -ENOMEM; goto remove_handler; + } dev->ieee80211_ptr->iftype = NL80211_IFTYPE_STATION; dev->ieee80211_ptr->wiphy = common_wiphy; diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 02db20b26749..d324ac308e6d 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -1538,11 +1538,6 @@ static int xenvif_handle_frag_list(struct xenvif_queue *queue, struct sk_buff *s skb_frag_size_set(&frags[i], len); } - /* Copied all the bits from the frag list -- free it. */ - skb_frag_list_init(skb); - xenvif_skb_zerocopy_prepare(queue, nskb); - kfree_skb(nskb); - /* Release all the original (foreign) frags. */ for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) skb_frag_unref(skb, f); @@ -1611,6 +1606,8 @@ static int xenvif_tx_submit(struct xenvif_queue *queue) xenvif_fill_frags(queue, skb); if (unlikely(skb_has_frag_list(skb))) { + struct sk_buff *nskb = skb_shinfo(skb)->frag_list; + xenvif_skb_zerocopy_prepare(queue, nskb); if (xenvif_handle_frag_list(queue, skb)) { if (net_ratelimit()) netdev_err(queue->vif->dev, @@ -1619,6 +1616,9 @@ static int xenvif_tx_submit(struct xenvif_queue *queue) kfree_skb(skb); continue; } + /* Copied all the bits from the frag list -- free it. */ + skb_frag_list_init(skb); + kfree_skb(nskb); } skb->dev = queue->vif->dev; diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 0a4bd73caae5..6f55ab4f7959 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -889,7 +889,7 @@ static RING_IDX xennet_fill_frags(struct netfront_queue *queue, if (skb_shinfo(skb)->nr_frags == MAX_SKB_FRAGS) { unsigned int pull_to = NETFRONT_SKB_CB(skb)->pull_to; - BUG_ON(pull_to <= skb_headlen(skb)); + BUG_ON(pull_to < skb_headlen(skb)); __pskb_pull_tail(skb, pull_to - skb_headlen(skb)); } if (unlikely(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS)) { diff --git a/drivers/nfc/nxp-nci/firmware.c b/drivers/nfc/nxp-nci/firmware.c index 5291797324ba..553011f58339 100644 --- a/drivers/nfc/nxp-nci/firmware.c +++ b/drivers/nfc/nxp-nci/firmware.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include "nxp-nci.h" diff --git a/drivers/nfc/nxp-nci/i2c.c b/drivers/nfc/nxp-nci/i2c.c index df4333c7ee0f..0b1122cb5d0c 100644 --- a/drivers/nfc/nxp-nci/i2c.c +++ b/drivers/nfc/nxp-nci/i2c.c @@ -36,7 +36,7 @@ #include #include #include -#include +#include #include diff --git a/drivers/parport/parport_pc.c b/drivers/parport/parport_pc.c index bdce0679674c..02e6485c1ed5 100644 --- a/drivers/parport/parport_pc.c +++ b/drivers/parport/parport_pc.c @@ -1377,7 +1377,7 @@ static struct superio_struct *find_superio(struct parport *p) { int i; for (i = 0; i < NR_SUPERIOS; i++) - if (superios[i].io != p->base) + if (superios[i].io == p->base) return &superios[i]; return NULL; } diff --git a/drivers/pci/host/pcie-altera.c b/drivers/pci/host/pcie-altera.c index 99da549d5d06..0118287a8a10 100644 --- a/drivers/pci/host/pcie-altera.c +++ b/drivers/pci/host/pcie-altera.c @@ -40,8 +40,10 @@ #define P2A_INT_ENABLE 0x3070 #define P2A_INT_ENA_ALL 0xf #define RP_LTSSM 0x3c64 +#define RP_LTSSM_MASK 0x1f #define LTSSM_L0 0xf +#define PCIE_CAP_OFFSET 0x80 /* TLP configuration type 0 and 1 */ #define TLP_FMTTYPE_CFGRD0 0x04 /* Configuration Read Type 0 */ #define TLP_FMTTYPE_CFGWR0 0x44 /* Configuration Write Type 0 */ @@ -60,6 +62,9 @@ #define TLP_LOOP 500 #define RP_DEVFN 0 +#define LINK_UP_TIMEOUT HZ +#define LINK_RETRAIN_TIMEOUT HZ + #define INTX_NUM 4 #define DWORD_MASK 3 @@ -80,25 +85,21 @@ struct tlp_rp_regpair_t { u32 reg1; }; -static void altera_pcie_retrain(struct pci_dev *dev) +static inline void cra_writel(struct altera_pcie *pcie, const u32 value, + const u32 reg) { - u16 linkcap, linkstat; - - /* - * Set the retrain bit if the PCIe rootport support > 2.5GB/s, but - * current speed is 2.5 GB/s. - */ - pcie_capability_read_word(dev, PCI_EXP_LNKCAP, &linkcap); - - if ((linkcap & PCI_EXP_LNKCAP_SLS) <= PCI_EXP_LNKCAP_SLS_2_5GB) - return; - - pcie_capability_read_word(dev, PCI_EXP_LNKSTA, &linkstat); - if ((linkstat & PCI_EXP_LNKSTA_CLS) == PCI_EXP_LNKSTA_CLS_2_5GB) - pcie_capability_set_word(dev, PCI_EXP_LNKCTL, - PCI_EXP_LNKCTL_RL); + writel_relaxed(value, pcie->cra_base + reg); +} + +static inline u32 cra_readl(struct altera_pcie *pcie, const u32 reg) +{ + return readl_relaxed(pcie->cra_base + reg); +} + +static bool altera_pcie_link_is_up(struct altera_pcie *pcie) +{ + return !!((cra_readl(pcie, RP_LTSSM) & RP_LTSSM_MASK) == LTSSM_L0); } -DECLARE_PCI_FIXUP_EARLY(0x1172, PCI_ANY_ID, altera_pcie_retrain); /* * Altera PCIe port uses BAR0 of RC's configuration space as the translation @@ -119,17 +120,6 @@ static bool altera_pcie_hide_rc_bar(struct pci_bus *bus, unsigned int devfn, return false; } -static inline void cra_writel(struct altera_pcie *pcie, const u32 value, - const u32 reg) -{ - writel_relaxed(value, pcie->cra_base + reg); -} - -static inline u32 cra_readl(struct altera_pcie *pcie, const u32 reg) -{ - return readl_relaxed(pcie->cra_base + reg); -} - static void tlp_write_tx(struct altera_pcie *pcie, struct tlp_rp_regpair_t *tlp_rp_regdata) { @@ -138,11 +128,6 @@ static void tlp_write_tx(struct altera_pcie *pcie, cra_writel(pcie, tlp_rp_regdata->ctrl, RP_TX_CNTRL); } -static bool altera_pcie_link_is_up(struct altera_pcie *pcie) -{ - return !!(cra_readl(pcie, RP_LTSSM) & LTSSM_L0); -} - static bool altera_pcie_valid_config(struct altera_pcie *pcie, struct pci_bus *bus, int dev) { @@ -286,22 +271,14 @@ static int tlp_cfg_dword_write(struct altera_pcie *pcie, u8 bus, u32 devfn, return PCIBIOS_SUCCESSFUL; } -static int altera_pcie_cfg_read(struct pci_bus *bus, unsigned int devfn, - int where, int size, u32 *value) +static int _altera_pcie_cfg_read(struct altera_pcie *pcie, u8 busno, + unsigned int devfn, int where, int size, + u32 *value) { - struct altera_pcie *pcie = bus->sysdata; int ret; u32 data; u8 byte_en; - if (altera_pcie_hide_rc_bar(bus, devfn, where)) - return PCIBIOS_BAD_REGISTER_NUMBER; - - if (!altera_pcie_valid_config(pcie, bus, PCI_SLOT(devfn))) { - *value = 0xffffffff; - return PCIBIOS_DEVICE_NOT_FOUND; - } - switch (size) { case 1: byte_en = 1 << (where & 3); @@ -314,7 +291,7 @@ static int altera_pcie_cfg_read(struct pci_bus *bus, unsigned int devfn, break; } - ret = tlp_cfg_dword_read(pcie, bus->number, devfn, + ret = tlp_cfg_dword_read(pcie, busno, devfn, (where & ~DWORD_MASK), byte_en, &data); if (ret != PCIBIOS_SUCCESSFUL) return ret; @@ -334,20 +311,14 @@ static int altera_pcie_cfg_read(struct pci_bus *bus, unsigned int devfn, return PCIBIOS_SUCCESSFUL; } -static int altera_pcie_cfg_write(struct pci_bus *bus, unsigned int devfn, - int where, int size, u32 value) +static int _altera_pcie_cfg_write(struct altera_pcie *pcie, u8 busno, + unsigned int devfn, int where, int size, + u32 value) { - struct altera_pcie *pcie = bus->sysdata; u32 data32; u32 shift = 8 * (where & 3); u8 byte_en; - if (altera_pcie_hide_rc_bar(bus, devfn, where)) - return PCIBIOS_BAD_REGISTER_NUMBER; - - if (!altera_pcie_valid_config(pcie, bus, PCI_SLOT(devfn))) - return PCIBIOS_DEVICE_NOT_FOUND; - switch (size) { case 1: data32 = (value & 0xff) << shift; @@ -363,8 +334,40 @@ static int altera_pcie_cfg_write(struct pci_bus *bus, unsigned int devfn, break; } - return tlp_cfg_dword_write(pcie, bus->number, devfn, - (where & ~DWORD_MASK), byte_en, data32); + return tlp_cfg_dword_write(pcie, busno, devfn, (where & ~DWORD_MASK), + byte_en, data32); +} + +static int altera_pcie_cfg_read(struct pci_bus *bus, unsigned int devfn, + int where, int size, u32 *value) +{ + struct altera_pcie *pcie = bus->sysdata; + + if (altera_pcie_hide_rc_bar(bus, devfn, where)) + return PCIBIOS_BAD_REGISTER_NUMBER; + + if (!altera_pcie_valid_config(pcie, bus, PCI_SLOT(devfn))) { + *value = 0xffffffff; + return PCIBIOS_DEVICE_NOT_FOUND; + } + + return _altera_pcie_cfg_read(pcie, bus->number, devfn, where, size, + value); +} + +static int altera_pcie_cfg_write(struct pci_bus *bus, unsigned int devfn, + int where, int size, u32 value) +{ + struct altera_pcie *pcie = bus->sysdata; + + if (altera_pcie_hide_rc_bar(bus, devfn, where)) + return PCIBIOS_BAD_REGISTER_NUMBER; + + if (!altera_pcie_valid_config(pcie, bus, PCI_SLOT(devfn))) + return PCIBIOS_DEVICE_NOT_FOUND; + + return _altera_pcie_cfg_write(pcie, bus->number, devfn, where, size, + value); } static struct pci_ops altera_pcie_ops = { @@ -372,6 +375,90 @@ static struct pci_ops altera_pcie_ops = { .write = altera_pcie_cfg_write, }; +static int altera_read_cap_word(struct altera_pcie *pcie, u8 busno, + unsigned int devfn, int offset, u16 *value) +{ + u32 data; + int ret; + + ret = _altera_pcie_cfg_read(pcie, busno, devfn, + PCIE_CAP_OFFSET + offset, sizeof(*value), + &data); + *value = data; + return ret; +} + +static int altera_write_cap_word(struct altera_pcie *pcie, u8 busno, + unsigned int devfn, int offset, u16 value) +{ + return _altera_pcie_cfg_write(pcie, busno, devfn, + PCIE_CAP_OFFSET + offset, sizeof(value), + value); +} + +static void altera_wait_link_retrain(struct altera_pcie *pcie) +{ + u16 reg16; + unsigned long start_jiffies; + + /* Wait for link training end. */ + start_jiffies = jiffies; + for (;;) { + altera_read_cap_word(pcie, pcie->root_bus_nr, RP_DEVFN, + PCI_EXP_LNKSTA, ®16); + if (!(reg16 & PCI_EXP_LNKSTA_LT)) + break; + + if (time_after(jiffies, start_jiffies + LINK_RETRAIN_TIMEOUT)) { + dev_err(&pcie->pdev->dev, "link retrain timeout\n"); + break; + } + udelay(100); + } + + /* Wait for link is up */ + start_jiffies = jiffies; + for (;;) { + if (altera_pcie_link_is_up(pcie)) + break; + + if (time_after(jiffies, start_jiffies + LINK_UP_TIMEOUT)) { + dev_err(&pcie->pdev->dev, "link up timeout\n"); + break; + } + udelay(100); + } +} + +static void altera_pcie_retrain(struct altera_pcie *pcie) +{ + u16 linkcap, linkstat, linkctl; + + if (!altera_pcie_link_is_up(pcie)) + return; + + /* + * Set the retrain bit if the PCIe rootport support > 2.5GB/s, but + * current speed is 2.5 GB/s. + */ + altera_read_cap_word(pcie, pcie->root_bus_nr, RP_DEVFN, PCI_EXP_LNKCAP, + &linkcap); + if ((linkcap & PCI_EXP_LNKCAP_SLS) <= PCI_EXP_LNKCAP_SLS_2_5GB) + return; + + altera_read_cap_word(pcie, pcie->root_bus_nr, RP_DEVFN, PCI_EXP_LNKSTA, + &linkstat); + if ((linkstat & PCI_EXP_LNKSTA_CLS) == PCI_EXP_LNKSTA_CLS_2_5GB) { + altera_read_cap_word(pcie, pcie->root_bus_nr, RP_DEVFN, + PCI_EXP_LNKCTL, &linkctl); + linkctl |= PCI_EXP_LNKCTL_RL; + altera_write_cap_word(pcie, pcie->root_bus_nr, RP_DEVFN, + PCI_EXP_LNKCTL, linkctl); + + altera_wait_link_retrain(pcie); + } +} + static int altera_pcie_intx_map(struct irq_domain *domain, unsigned int irq, irq_hw_number_t hwirq) { @@ -506,6 +593,11 @@ static int altera_pcie_parse_dt(struct altera_pcie *pcie) return 0; } +static void altera_pcie_host_init(struct altera_pcie *pcie) +{ + altera_pcie_retrain(pcie); +} + static int altera_pcie_probe(struct platform_device *pdev) { struct altera_pcie *pcie; @@ -543,6 +635,7 @@ static int altera_pcie_probe(struct platform_device *pdev) cra_writel(pcie, P2A_INT_STS_ALL, P2A_INT_STATUS); /* enable all interrupts */ cra_writel(pcie, P2A_INT_ENA_ALL, P2A_INT_ENABLE); + altera_pcie_host_init(pcie); bus = pci_scan_root_bus(&pdev->dev, pcie->root_bus_nr, &altera_pcie_ops, pcie, &pcie->resources); diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 84d501f5ff4e..d85010ebac5a 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -3623,6 +3623,8 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9128, /* https://bugzilla.kernel.org/show_bug.cgi?id=42679#c14 */ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9130, quirk_dma_func1_alias); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9170, + quirk_dma_func1_alias); /* https://bugzilla.kernel.org/show_bug.cgi?id=42679#c47 + c57 */ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9172, quirk_dma_func1_alias); diff --git a/drivers/pinctrl/meson/pinctrl-meson8b.c b/drivers/pinctrl/meson/pinctrl-meson8b.c index b505b87661f8..07c4153e6f3d 100644 --- a/drivers/pinctrl/meson/pinctrl-meson8b.c +++ b/drivers/pinctrl/meson/pinctrl-meson8b.c @@ -656,7 +656,7 @@ static const char * const sd_a_groups[] = { static const char * const sdxc_a_groups[] = { "sdxc_d0_0_a", "sdxc_d13_0_a", "sdxc_d47_a", "sdxc_clk_a", - "sdxc_cmd_a", "sdxc_d0_1_a", "sdxc_d0_13_1_a" + "sdxc_cmd_a", "sdxc_d0_1_a", "sdxc_d13_1_a" }; static const char * const pcm_a_groups[] = { diff --git a/drivers/pinctrl/qcom/pinctrl-msm.c b/drivers/pinctrl/qcom/pinctrl-msm.c index 8c423020d3d8..217c89b0e27f 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm.c +++ b/drivers/pinctrl/qcom/pinctrl-msm.c @@ -806,11 +806,24 @@ static int msm_gpio_init(struct msm_pinctrl *pctrl) return ret; } - ret = gpiochip_add_pin_range(&pctrl->chip, dev_name(pctrl->dev), 0, 0, chip->ngpio); - if (ret) { - dev_err(pctrl->dev, "Failed to add pin range\n"); - gpiochip_remove(&pctrl->chip); - return ret; + /* + * For DeviceTree-supported systems, the gpio core checks the + * pinctrl's device node for the "gpio-ranges" property. + * If it is present, it takes care of adding the pin ranges + * for the driver. In this case the driver can skip ahead. + * + * In order to remain compatible with older, existing DeviceTree + * files which don't set the "gpio-ranges" property or systems that + * utilize ACPI the driver has to call gpiochip_add_pin_range(). + */ + if (!of_property_read_bool(pctrl->dev->of_node, "gpio-ranges")) { + ret = gpiochip_add_pin_range(&pctrl->chip, + dev_name(pctrl->dev), 0, 0, chip->ngpio); + if (ret) { + dev_err(pctrl->dev, "Failed to add pin range\n"); + gpiochip_remove(&pctrl->chip); + return ret; + } } ret = gpiochip_irqchip_add(chip, diff --git a/drivers/pinctrl/sunxi/pinctrl-sun8i-a83t.c b/drivers/pinctrl/sunxi/pinctrl-sun8i-a83t.c index a7c81e988656..383977ea3a3c 100644 --- a/drivers/pinctrl/sunxi/pinctrl-sun8i-a83t.c +++ b/drivers/pinctrl/sunxi/pinctrl-sun8i-a83t.c @@ -568,7 +568,7 @@ static const struct sunxi_desc_pin sun8i_a83t_pins[] = { SUNXI_PIN(SUNXI_PINCTRL_PIN(H, 11), SUNXI_FUNCTION(0x0, "gpio_in"), SUNXI_FUNCTION(0x1, "gpio_out"), - SUNXI_FUNCTION_IRQ_BANK(0x6, 2, 1)), /* PH_EINT11 */ + SUNXI_FUNCTION_IRQ_BANK(0x6, 2, 11)), /* PH_EINT11 */ }; static const struct sunxi_pinctrl_desc sun8i_a83t_pinctrl_data = { diff --git a/drivers/platform/goldfish/goldfish_pipe_v2.c b/drivers/platform/goldfish/goldfish_pipe_v2.c index 957fa907beb3..f0b9b46047be 100644 --- a/drivers/platform/goldfish/goldfish_pipe_v2.c +++ b/drivers/platform/goldfish/goldfish_pipe_v2.c @@ -697,7 +697,10 @@ static int get_free_pipe_id_locked(struct goldfish_pipe_dev *dev) return id; { - /* Reallocate the array */ + /* Reallocate the array. + * Since get_free_pipe_id_locked runs with interrupts disabled, + * we don't want to make calls that could lead to sleep. + */ u32 new_capacity = 2 * dev->pipes_capacity; struct goldfish_pipe **pipes = kcalloc(new_capacity, sizeof(*pipes), diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index 988ebe9a6b90..953974b5a9a9 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -881,6 +881,7 @@ config INTEL_OAKTRAIL config SAMSUNG_Q10 tristate "Samsung Q10 Extras" depends on ACPI + depends on BACKLIGHT_LCD_SUPPORT select BACKLIGHT_CLASS_DEVICE ---help--- This driver provides support for backlight control on Samsung Q10 diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c index 852d2de7f69f..a284a2b42bcd 100644 --- a/drivers/platform/x86/asus-nb-wmi.c +++ b/drivers/platform/x86/asus-nb-wmi.c @@ -339,8 +339,7 @@ static const struct key_entry asus_nb_wmi_keymap[] = { { KE_KEY, 0x30, { KEY_VOLUMEUP } }, { KE_KEY, 0x31, { KEY_VOLUMEDOWN } }, { KE_KEY, 0x32, { KEY_MUTE } }, - { KE_KEY, 0x33, { KEY_DISPLAYTOGGLE } }, /* LCD on */ - { KE_KEY, 0x34, { KEY_DISPLAY_OFF } }, /* LCD off */ + { KE_KEY, 0x35, { KEY_SCREENLOCK } }, { KE_KEY, 0x40, { KEY_PREVIOUSSONG } }, { KE_KEY, 0x41, { KEY_NEXTSONG } }, { KE_KEY, 0x43, { KEY_STOPCD } }, /* Stop/Eject */ diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c index f96f7b865267..7c1defaef3f5 100644 --- a/drivers/platform/x86/asus-wmi.c +++ b/drivers/platform/x86/asus-wmi.c @@ -2084,7 +2084,8 @@ static int asus_wmi_add(struct platform_device *pdev) err = asus_wmi_backlight_init(asus); if (err && err != -ENODEV) goto fail_backlight; - } + } else + err = asus_wmi_set_devstate(ASUS_WMI_DEVID_BACKLIGHT, 2, NULL); status = wmi_install_notify_handler(asus->driver->event_guid, asus_wmi_notify, asus); diff --git a/drivers/power/olpc_battery.c b/drivers/power/olpc_battery.c index 9e29b1321648..15783869e1a0 100644 --- a/drivers/power/olpc_battery.c +++ b/drivers/power/olpc_battery.c @@ -427,14 +427,14 @@ static int olpc_bat_get_property(struct power_supply *psy, if (ret) return ret; - val->intval = (s16)be16_to_cpu(ec_word) * 100 / 256; + val->intval = (s16)be16_to_cpu(ec_word) * 10 / 256; break; case POWER_SUPPLY_PROP_TEMP_AMBIENT: ret = olpc_ec_cmd(EC_AMB_TEMP, NULL, 0, (void *)&ec_word, 2); if (ret) return ret; - val->intval = (int)be16_to_cpu(ec_word) * 100 / 256; + val->intval = (int)be16_to_cpu(ec_word) * 10 / 256; break; case POWER_SUPPLY_PROP_CHARGE_COUNTER: ret = olpc_ec_cmd(EC_BAT_ACR, NULL, 0, (void *)&ec_word, 2); diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c index 4eb254a273f8..4861cfddcdd3 100644 --- a/drivers/ptp/ptp_chardev.c +++ b/drivers/ptp/ptp_chardev.c @@ -204,7 +204,9 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg) pct->sec = ts.tv_sec; pct->nsec = ts.tv_nsec; pct++; - ptp->info->gettime64(ptp->info, &ts); + err = ptp->info->gettime64(ptp->info, &ts); + if (err) + goto out; pct->sec = ts.tv_sec; pct->nsec = ts.tv_nsec; pct++; @@ -257,6 +259,7 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg) break; } +out: kfree(sysoff); return err; } diff --git a/drivers/regulator/act8865-regulator.c b/drivers/regulator/act8865-regulator.c index 6e9592c2e1e5..f419f6ee8144 100644 --- a/drivers/regulator/act8865-regulator.c +++ b/drivers/regulator/act8865-regulator.c @@ -131,7 +131,7 @@ * ACT8865 voltage number */ #define ACT8865_VOLTAGE_NUM 64 -#define ACT8600_SUDCDC_VOLTAGE_NUM 255 +#define ACT8600_SUDCDC_VOLTAGE_NUM 256 struct act8865 { struct regmap *regmap; @@ -154,7 +154,8 @@ static const struct regulator_linear_range act8600_sudcdc_voltage_ranges[] = { REGULATOR_LINEAR_RANGE(3000000, 0, 63, 0), REGULATOR_LINEAR_RANGE(3000000, 64, 159, 100000), REGULATOR_LINEAR_RANGE(12600000, 160, 191, 200000), - REGULATOR_LINEAR_RANGE(19000000, 191, 255, 400000), + REGULATOR_LINEAR_RANGE(19000000, 192, 247, 400000), + REGULATOR_LINEAR_RANGE(41400000, 248, 255, 0), }; static struct regulator_ops act8865_ops = { diff --git a/drivers/regulator/s2mpa01.c b/drivers/regulator/s2mpa01.c index 92f88753bfed..2daf751c26c7 100644 --- a/drivers/regulator/s2mpa01.c +++ b/drivers/regulator/s2mpa01.c @@ -303,13 +303,13 @@ static const struct regulator_desc regulators[] = { regulator_desc_ldo(2, STEP_50_MV), regulator_desc_ldo(3, STEP_50_MV), regulator_desc_ldo(4, STEP_50_MV), - regulator_desc_ldo(5, STEP_50_MV), + regulator_desc_ldo(5, STEP_25_MV), regulator_desc_ldo(6, STEP_25_MV), regulator_desc_ldo(7, STEP_50_MV), regulator_desc_ldo(8, STEP_50_MV), regulator_desc_ldo(9, STEP_50_MV), regulator_desc_ldo(10, STEP_50_MV), - regulator_desc_ldo(11, STEP_25_MV), + regulator_desc_ldo(11, STEP_50_MV), regulator_desc_ldo(12, STEP_50_MV), regulator_desc_ldo(13, STEP_50_MV), regulator_desc_ldo(14, STEP_50_MV), @@ -320,11 +320,11 @@ static const struct regulator_desc regulators[] = { regulator_desc_ldo(19, STEP_50_MV), regulator_desc_ldo(20, STEP_50_MV), regulator_desc_ldo(21, STEP_50_MV), - regulator_desc_ldo(22, STEP_25_MV), - regulator_desc_ldo(23, STEP_25_MV), + regulator_desc_ldo(22, STEP_50_MV), + regulator_desc_ldo(23, STEP_50_MV), regulator_desc_ldo(24, STEP_50_MV), regulator_desc_ldo(25, STEP_50_MV), - regulator_desc_ldo(26, STEP_50_MV), + regulator_desc_ldo(26, STEP_25_MV), regulator_desc_buck1_4(1), regulator_desc_buck1_4(2), regulator_desc_buck1_4(3), diff --git a/drivers/regulator/s2mps11.c b/drivers/regulator/s2mps11.c index b6d831b84e1d..47694dd515ab 100644 --- a/drivers/regulator/s2mps11.c +++ b/drivers/regulator/s2mps11.c @@ -372,7 +372,7 @@ static const struct regulator_desc s2mps11_regulators[] = { regulator_desc_s2mps11_ldo(32, STEP_50_MV), regulator_desc_s2mps11_ldo(33, STEP_50_MV), regulator_desc_s2mps11_ldo(34, STEP_50_MV), - regulator_desc_s2mps11_ldo(35, STEP_50_MV), + regulator_desc_s2mps11_ldo(35, STEP_25_MV), regulator_desc_s2mps11_ldo(36, STEP_50_MV), regulator_desc_s2mps11_ldo(37, STEP_50_MV), regulator_desc_s2mps11_ldo(38, STEP_50_MV), @@ -382,8 +382,8 @@ static const struct regulator_desc s2mps11_regulators[] = { regulator_desc_s2mps11_buck1_4(4), regulator_desc_s2mps11_buck5, regulator_desc_s2mps11_buck67810(6, MIN_600_MV, STEP_6_25_MV), - regulator_desc_s2mps11_buck67810(7, MIN_600_MV, STEP_6_25_MV), - regulator_desc_s2mps11_buck67810(8, MIN_600_MV, STEP_6_25_MV), + regulator_desc_s2mps11_buck67810(7, MIN_600_MV, STEP_12_5_MV), + regulator_desc_s2mps11_buck67810(8, MIN_600_MV, STEP_12_5_MV), regulator_desc_s2mps11_buck9, regulator_desc_s2mps11_buck67810(10, MIN_750_MV, STEP_12_5_MV), }; diff --git a/drivers/remoteproc/remoteproc_virtio.c b/drivers/remoteproc/remoteproc_virtio.c index e1a10232a943..e44872fb9e5e 100644 --- a/drivers/remoteproc/remoteproc_virtio.c +++ b/drivers/remoteproc/remoteproc_virtio.c @@ -147,7 +147,7 @@ static void rproc_virtio_del_vqs(struct virtio_device *vdev) static int rproc_virtio_find_vqs(struct virtio_device *vdev, unsigned nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], - const char *names[]) + const char * const names[]) { struct rproc *rproc = vdev_to_rproc(vdev); int i, ret; diff --git a/drivers/rpmsg/virtio_rpmsg_bus.c b/drivers/rpmsg/virtio_rpmsg_bus.c index 73354ee27877..1fcd27c1f183 100644 --- a/drivers/rpmsg/virtio_rpmsg_bus.c +++ b/drivers/rpmsg/virtio_rpmsg_bus.c @@ -945,7 +945,7 @@ static void rpmsg_ns_cb(struct rpmsg_channel *rpdev, void *data, int len, static int rpmsg_probe(struct virtio_device *vdev) { vq_callback_t *vq_cbs[] = { rpmsg_recv_done, rpmsg_xmit_done }; - const char *names[] = { "input", "output" }; + static const char * const names[] = { "input", "output" }; struct virtqueue *vqs[2]; struct virtproc_info *vrp; void *bufs_va; diff --git a/drivers/rtc/rtc-lib.c b/drivers/rtc/rtc-lib.c index e6bfb9c42a10..5b136bdc03d4 100644 --- a/drivers/rtc/rtc-lib.c +++ b/drivers/rtc/rtc-lib.c @@ -52,13 +52,11 @@ EXPORT_SYMBOL(rtc_year_days); */ void rtc_time64_to_tm(time64_t time, struct rtc_time *tm) { - unsigned int month, year; - unsigned long secs; + unsigned int month, year, secs; int days; /* time must be positive */ - days = div_s64(time, 86400); - secs = time - (unsigned int) days * 86400; + days = div_s64_rem(time, 86400, &secs); /* day of the week, 1970-01-01 was a Thursday */ tm->tm_wday = (days + 4) % 7; diff --git a/drivers/rtc/rtc-snvs.c b/drivers/rtc/rtc-snvs.c index a161fbf6f172..63ad5b543f14 100644 --- a/drivers/rtc/rtc-snvs.c +++ b/drivers/rtc/rtc-snvs.c @@ -47,49 +47,83 @@ struct snvs_rtc_data { struct clk *clk; }; +/* Read 64 bit timer register, which could be in inconsistent state */ +static u64 rtc_read_lpsrt(struct snvs_rtc_data *data) +{ + u32 msb, lsb; + + regmap_read(data->regmap, data->offset + SNVS_LPSRTCMR, &msb); + regmap_read(data->regmap, data->offset + SNVS_LPSRTCLR, &lsb); + return (u64)msb << 32 | lsb; +} + +/* Read the secure real time counter, taking care to deal with the cases of the + * counter updating while being read. + */ static u32 rtc_read_lp_counter(struct snvs_rtc_data *data) { u64 read1, read2; - u32 val; + unsigned int timeout = 100; + /* As expected, the registers might update between the read of the LSB + * reg and the MSB reg. It's also possible that one register might be + * in partially modified state as well. + */ + read1 = rtc_read_lpsrt(data); do { - regmap_read(data->regmap, data->offset + SNVS_LPSRTCMR, &val); - read1 = val; - read1 <<= 32; - regmap_read(data->regmap, data->offset + SNVS_LPSRTCLR, &val); - read1 |= val; - - regmap_read(data->regmap, data->offset + SNVS_LPSRTCMR, &val); - read2 = val; - read2 <<= 32; - regmap_read(data->regmap, data->offset + SNVS_LPSRTCLR, &val); - read2 |= val; - } while (read1 != read2); + read2 = read1; + read1 = rtc_read_lpsrt(data); + } while (read1 != read2 && --timeout); + if (!timeout) + dev_err(&data->rtc->dev, "Timeout trying to get valid LPSRT Counter read\n"); /* Convert 47-bit counter to 32-bit raw second count */ return (u32) (read1 >> CNTR_TO_SECS_SH); } -static void rtc_write_sync_lp(struct snvs_rtc_data *data) +/* Just read the lsb from the counter, dealing with inconsistent state */ +static int rtc_read_lp_counter_lsb(struct snvs_rtc_data *data, u32 *lsb) { - u32 count1, count2, count3; - int i; + u32 count1, count2; + unsigned int timeout = 100; - /* Wait for 3 CKIL cycles */ - for (i = 0; i < 3; i++) { - do { - regmap_read(data->regmap, data->offset + SNVS_LPSRTCLR, &count1); - regmap_read(data->regmap, data->offset + SNVS_LPSRTCLR, &count2); - } while (count1 != count2); - - /* Now wait until counter value changes */ - do { - do { - regmap_read(data->regmap, data->offset + SNVS_LPSRTCLR, &count2); - regmap_read(data->regmap, data->offset + SNVS_LPSRTCLR, &count3); - } while (count2 != count3); - } while (count3 == count1); + regmap_read(data->regmap, data->offset + SNVS_LPSRTCLR, &count1); + do { + count2 = count1; + regmap_read(data->regmap, data->offset + SNVS_LPSRTCLR, &count1); + } while (count1 != count2 && --timeout); + if (!timeout) { + dev_err(&data->rtc->dev, "Timeout trying to get valid LPSRT Counter read\n"); + return -ETIMEDOUT; } + + *lsb = count1; + return 0; +} + +static int rtc_write_sync_lp(struct snvs_rtc_data *data) +{ + u32 count1, count2; + u32 elapsed; + unsigned int timeout = 1000; + int ret; + + ret = rtc_read_lp_counter_lsb(data, &count1); + if (ret) + return ret; + + /* Wait for 3 CKIL cycles, about 61.0-91.5 µs */ + do { + ret = rtc_read_lp_counter_lsb(data, &count2); + if (ret) + return ret; + elapsed = count2 - count1; /* wrap around _is_ handled! */ + } while (elapsed < 3 && --timeout); + if (!timeout) { + dev_err(&data->rtc->dev, "Timeout waiting for LPSRT Counter to change\n"); + return -ETIMEDOUT; + } + return 0; } static int snvs_rtc_enable(struct snvs_rtc_data *data, bool enable) @@ -173,9 +207,7 @@ static int snvs_rtc_alarm_irq_enable(struct device *dev, unsigned int enable) (SNVS_LPCR_LPTA_EN | SNVS_LPCR_LPWUI_EN), enable ? (SNVS_LPCR_LPTA_EN | SNVS_LPCR_LPWUI_EN) : 0); - rtc_write_sync_lp(data); - - return 0; + return rtc_write_sync_lp(data); } static int snvs_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm) @@ -183,10 +215,14 @@ static int snvs_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm) struct snvs_rtc_data *data = dev_get_drvdata(dev); struct rtc_time *alrm_tm = &alrm->time; unsigned long time; + int ret; rtc_tm_to_time(alrm_tm, &time); regmap_update_bits(data->regmap, data->offset + SNVS_LPCR, SNVS_LPCR_LPTA_EN, 0); + ret = rtc_write_sync_lp(data); + if (ret) + return ret; regmap_write(data->regmap, data->offset + SNVS_LPTAR, time); /* Clear alarm interrupt status bit */ diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index dac2f6883e28..80a43074c2f9 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -4023,6 +4023,14 @@ static int dasd_symm_io(struct dasd_device *device, void __user *argp) usrparm.psf_data &= 0x7fffffffULL; usrparm.rssd_result &= 0x7fffffffULL; } + /* at least 2 bytes are accessed and should be allocated */ + if (usrparm.psf_data_len < 2) { + DBF_DEV_EVENT(DBF_WARNING, device, + "Symmetrix ioctl invalid data length %d", + usrparm.psf_data_len); + rc = -EINVAL; + goto out; + } /* alloc I/O data area */ psf_data = kzalloc(usrparm.psf_data_len, GFP_KERNEL | GFP_DMA); rssd_result = kzalloc(usrparm.rssd_result_len, GFP_KERNEL | GFP_DMA); diff --git a/drivers/s390/char/sclp_config.c b/drivers/s390/char/sclp_config.c index 944156207477..dcb949dcfa66 100644 --- a/drivers/s390/char/sclp_config.c +++ b/drivers/s390/char/sclp_config.c @@ -43,7 +43,9 @@ static void sclp_cpu_capability_notify(struct work_struct *work) static void __ref sclp_cpu_change_notify(struct work_struct *work) { + lock_device_hotplug(); smp_rescan_cpus(); + unlock_device_hotplug(); } static void sclp_conf_receiver_fn(struct evbuf_header *evbuf) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 533bd2467910..b40604d0126f 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -2452,11 +2452,12 @@ out: return rc; } -static void qeth_free_qdio_out_buf(struct qeth_qdio_out_q *q) +static void qeth_free_output_queue(struct qeth_qdio_out_q *q) { if (!q) return; + qeth_clear_outq_buffers(q, 1); qdio_free_buffers(q->qdio_bufs, QDIO_MAX_BUFFERS_PER_Q); kfree(q); } @@ -2529,10 +2530,8 @@ out_freeoutqbufs: card->qdio.out_qs[i]->bufs[j] = NULL; } out_freeoutq: - while (i > 0) { - qeth_free_qdio_out_buf(card->qdio.out_qs[--i]); - qeth_clear_outq_buffers(card->qdio.out_qs[i], 1); - } + while (i > 0) + qeth_free_output_queue(card->qdio.out_qs[--i]); kfree(card->qdio.out_qs); card->qdio.out_qs = NULL; out_freepool: @@ -2565,10 +2564,8 @@ static void qeth_free_qdio_buffers(struct qeth_card *card) qeth_free_buffer_pool(card); /* free outbound qdio_qs */ if (card->qdio.out_qs) { - for (i = 0; i < card->qdio.no_out_queues; ++i) { - qeth_clear_outq_buffers(card->qdio.out_qs[i], 1); - qeth_free_qdio_out_buf(card->qdio.out_qs[i]); - } + for (i = 0; i < card->qdio.no_out_queues; i++) + qeth_free_output_queue(card->qdio.out_qs[i]); kfree(card->qdio.out_qs); card->qdio.out_qs = NULL; } diff --git a/drivers/s390/scsi/zfcp_aux.c b/drivers/s390/scsi/zfcp_aux.c index 38c8e308d4c8..a96c98e3fc73 100644 --- a/drivers/s390/scsi/zfcp_aux.c +++ b/drivers/s390/scsi/zfcp_aux.c @@ -275,16 +275,16 @@ static void zfcp_free_low_mem_buffers(struct zfcp_adapter *adapter) */ int zfcp_status_read_refill(struct zfcp_adapter *adapter) { - while (atomic_read(&adapter->stat_miss) > 0) + while (atomic_add_unless(&adapter->stat_miss, -1, 0)) if (zfcp_fsf_status_read(adapter->qdio)) { + atomic_inc(&adapter->stat_miss); /* undo add -1 */ if (atomic_read(&adapter->stat_miss) >= adapter->stat_read_buf_num) { zfcp_erp_adapter_reopen(adapter, 0, "axsref1"); return 1; } break; - } else - atomic_dec(&adapter->stat_miss); + } return 0; } diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c index 2abcd331b05d..abe460eac712 100644 --- a/drivers/s390/scsi/zfcp_erp.c +++ b/drivers/s390/scsi/zfcp_erp.c @@ -652,6 +652,20 @@ static void zfcp_erp_strategy_memwait(struct zfcp_erp_action *erp_action) add_timer(&erp_action->timer); } +void zfcp_erp_port_forced_reopen_all(struct zfcp_adapter *adapter, + int clear, char *dbftag) +{ + unsigned long flags; + struct zfcp_port *port; + + write_lock_irqsave(&adapter->erp_lock, flags); + read_lock(&adapter->port_list_lock); + list_for_each_entry(port, &adapter->port_list, list) + _zfcp_erp_port_forced_reopen(port, clear, dbftag); + read_unlock(&adapter->port_list_lock); + write_unlock_irqrestore(&adapter->erp_lock, flags); +} + static void _zfcp_erp_port_reopen_all(struct zfcp_adapter *adapter, int clear, char *id) { @@ -1306,6 +1320,9 @@ static void zfcp_erp_try_rport_unblock(struct zfcp_port *port) struct zfcp_scsi_dev *zsdev = sdev_to_zfcp(sdev); int lun_status; + if (sdev->sdev_state == SDEV_DEL || + sdev->sdev_state == SDEV_CANCEL) + continue; if (zsdev->port != port) continue; /* LUN under port of interest */ diff --git a/drivers/s390/scsi/zfcp_ext.h b/drivers/s390/scsi/zfcp_ext.h index b326f05c7f89..a39a74500e23 100644 --- a/drivers/s390/scsi/zfcp_ext.h +++ b/drivers/s390/scsi/zfcp_ext.h @@ -68,6 +68,8 @@ extern void zfcp_erp_clear_port_status(struct zfcp_port *, u32); extern int zfcp_erp_port_reopen(struct zfcp_port *, int, char *); extern void zfcp_erp_port_shutdown(struct zfcp_port *, int, char *); extern void zfcp_erp_port_forced_reopen(struct zfcp_port *, int, char *); +extern void zfcp_erp_port_forced_reopen_all(struct zfcp_adapter *adapter, + int clear, char *dbftag); extern void zfcp_erp_set_lun_status(struct scsi_device *, u32); extern void zfcp_erp_clear_lun_status(struct scsi_device *, u32); extern void zfcp_erp_lun_reopen(struct scsi_device *, int, char *); diff --git a/drivers/s390/scsi/zfcp_scsi.c b/drivers/s390/scsi/zfcp_scsi.c index 3afb200b2829..bdb257eaa2e5 100644 --- a/drivers/s390/scsi/zfcp_scsi.c +++ b/drivers/s390/scsi/zfcp_scsi.c @@ -326,6 +326,10 @@ static int zfcp_scsi_eh_host_reset_handler(struct scsi_cmnd *scpnt) struct zfcp_adapter *adapter = zfcp_sdev->port->adapter; int ret = SUCCESS, fc_ret; + if (!(adapter->connection_features & FSF_FEATURE_NPIV_MODE)) { + zfcp_erp_port_forced_reopen_all(adapter, 0, "schrh_p"); + zfcp_erp_wait(adapter); + } zfcp_erp_adapter_reopen(adapter, 0, "schrh_1"); zfcp_erp_wait(adapter); fc_ret = fc_block_scsi_eh(scpnt); diff --git a/drivers/s390/virtio/kvm_virtio.c b/drivers/s390/virtio/kvm_virtio.c index 53fb975c404b..1d060fd293a3 100644 --- a/drivers/s390/virtio/kvm_virtio.c +++ b/drivers/s390/virtio/kvm_virtio.c @@ -255,7 +255,7 @@ static void kvm_del_vqs(struct virtio_device *vdev) static int kvm_find_vqs(struct virtio_device *vdev, unsigned nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], - const char *names[]) + const char * const names[]) { struct kvm_device *kdev = to_kvmdev(vdev); int i; diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c index ff06bdfd2b20..5abd37ce4f6f 100644 --- a/drivers/s390/virtio/virtio_ccw.c +++ b/drivers/s390/virtio/virtio_ccw.c @@ -283,6 +283,8 @@ static void virtio_ccw_drop_indicators(struct virtio_ccw_device *vcdev) { struct virtio_ccw_vq_info *info; + if (!vcdev->airq_info) + return; list_for_each_entry(info, &vcdev->virtqueues, node) drop_airq_indicator(info->vq, vcdev->airq_info); } @@ -423,7 +425,7 @@ static int virtio_ccw_read_vq_conf(struct virtio_ccw_device *vcdev, ret = ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_READ_VQ_CONF); if (ret) return ret; - return vcdev->config_block->num; + return vcdev->config_block->num ?: -ENOENT; } static void virtio_ccw_del_vq(struct virtqueue *vq, struct ccw1 *ccw) @@ -639,7 +641,7 @@ out: static int virtio_ccw_find_vqs(struct virtio_device *vdev, unsigned nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], - const char *names[]) + const char * const names[]) { struct virtio_ccw_device *vcdev = to_vc_device(vdev); unsigned long *indicatorp = NULL; diff --git a/drivers/sbus/char/display7seg.c b/drivers/sbus/char/display7seg.c index 33fbe8249fd5..044cffbc45e8 100644 --- a/drivers/sbus/char/display7seg.c +++ b/drivers/sbus/char/display7seg.c @@ -221,6 +221,7 @@ static int d7s_probe(struct platform_device *op) dev_set_drvdata(&op->dev, p); d7s_device = p; err = 0; + of_node_put(opts); out: return err; diff --git a/drivers/sbus/char/envctrl.c b/drivers/sbus/char/envctrl.c index 5609b602c54d..baa9b322520b 100644 --- a/drivers/sbus/char/envctrl.c +++ b/drivers/sbus/char/envctrl.c @@ -910,8 +910,10 @@ static void envctrl_init_i2c_child(struct device_node *dp, for (len = 0; len < PCF8584_MAX_CHANNELS; ++len) { pchild->mon_type[len] = ENVCTRL_NOMON; } + of_node_put(root_node); return; } + of_node_put(root_node); } /* Get the monitor channels. */ diff --git a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c index d0b227ffbd5f..573aeec7a02b 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c +++ b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c @@ -2279,7 +2279,7 @@ static int _bnx2fc_create(struct net_device *netdev, if (!interface) { printk(KERN_ERR PFX "bnx2fc_interface_create failed\n"); rc = -ENOMEM; - goto ifput_err; + goto netdev_err; } if (netdev->priv_flags & IFF_802_1Q_VLAN) { diff --git a/drivers/scsi/csiostor/csio_attr.c b/drivers/scsi/csiostor/csio_attr.c index 2d1c4ebd40f9..6587f20cff1a 100644 --- a/drivers/scsi/csiostor/csio_attr.c +++ b/drivers/scsi/csiostor/csio_attr.c @@ -582,12 +582,12 @@ csio_vport_create(struct fc_vport *fc_vport, bool disable) } fc_vport_set_state(fc_vport, FC_VPORT_INITIALIZING); + ln->fc_vport = fc_vport; if (csio_fcoe_alloc_vnp(hw, ln)) goto error; *(struct csio_lnode **)fc_vport->dd_data = ln; - ln->fc_vport = fc_vport; if (!fc_vport->node_name) fc_vport->node_name = wwn_to_u64(csio_ln_wwnn(ln)); if (!fc_vport->port_name) diff --git a/drivers/scsi/isci/init.c b/drivers/scsi/isci/init.c index 77128d680e3b..6f38fa1f468a 100644 --- a/drivers/scsi/isci/init.c +++ b/drivers/scsi/isci/init.c @@ -595,6 +595,13 @@ static struct isci_host *isci_host_alloc(struct pci_dev *pdev, int id) shost->max_lun = ~0; shost->max_cmd_len = MAX_COMMAND_SIZE; + /* turn on DIF support */ + scsi_host_set_prot(shost, + SHOST_DIF_TYPE1_PROTECTION | + SHOST_DIF_TYPE2_PROTECTION | + SHOST_DIF_TYPE3_PROTECTION); + scsi_host_set_guard(shost, SHOST_DIX_GUARD_CRC); + err = scsi_add_host(shost, &pdev->dev); if (err) goto err_shost; @@ -682,13 +689,6 @@ static int isci_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto err_host_alloc; } pci_info->hosts[i] = h; - - /* turn on DIF support */ - scsi_host_set_prot(to_shost(h), - SHOST_DIF_TYPE1_PROTECTION | - SHOST_DIF_TYPE2_PROTECTION | - SHOST_DIF_TYPE3_PROTECTION); - scsi_host_set_guard(to_shost(h), SHOST_DIX_GUARD_CRC); } err = isci_setup_interrupts(pdev); diff --git a/drivers/scsi/libfc/fc_lport.c b/drivers/scsi/libfc/fc_lport.c index e01a29863c38..867fc036d6ef 100644 --- a/drivers/scsi/libfc/fc_lport.c +++ b/drivers/scsi/libfc/fc_lport.c @@ -1739,14 +1739,14 @@ void fc_lport_flogi_resp(struct fc_seq *sp, struct fc_frame *fp, fc_frame_payload_op(fp) != ELS_LS_ACC) { FC_LPORT_DBG(lport, "FLOGI not accepted or bad response\n"); fc_lport_error(lport, fp); - goto err; + goto out; } flp = fc_frame_payload_get(fp, sizeof(*flp)); if (!flp) { FC_LPORT_DBG(lport, "FLOGI bad response\n"); fc_lport_error(lport, fp); - goto err; + goto out; } mfs = ntohs(flp->fl_csp.sp_bb_data) & @@ -1756,7 +1756,7 @@ void fc_lport_flogi_resp(struct fc_seq *sp, struct fc_frame *fp, FC_LPORT_DBG(lport, "FLOGI bad mfs:%hu response, " "lport->mfs:%hu\n", mfs, lport->mfs); fc_lport_error(lport, fp); - goto err; + goto out; } if (mfs <= lport->mfs) { diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index a74f8fbefd33..0fdc8c417035 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -1448,7 +1448,13 @@ static int iscsi_xmit_task(struct iscsi_conn *conn) if (test_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx)) return -ENODATA; + spin_lock_bh(&conn->session->back_lock); + if (conn->task == NULL) { + spin_unlock_bh(&conn->session->back_lock); + return -ENODATA; + } __iscsi_get_task(task); + spin_unlock_bh(&conn->session->back_lock); spin_unlock_bh(&conn->session->frwd_lock); rc = conn->session->tt->xmit_task(task); spin_lock_bh(&conn->session->frwd_lock); @@ -2416,8 +2422,8 @@ int iscsi_eh_session_reset(struct scsi_cmnd *sc) failed: ISCSI_DBG_EH(session, "failing session reset: Could not log back into " - "%s, %s [age %d]\n", session->targetname, - conn->persistent_address, session->age); + "%s [age %d]\n", session->targetname, + session->age); spin_unlock_bh(&session->frwd_lock); mutex_unlock(&session->eh_mutex); return FAILED; diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c index 12886f96b286..7be581f7c35d 100644 --- a/drivers/scsi/libsas/sas_expander.c +++ b/drivers/scsi/libsas/sas_expander.c @@ -818,6 +818,7 @@ static struct domain_device *sas_ex_discover_end_dev( rphy = sas_end_device_alloc(phy->port); if (!rphy) goto out_free; + rphy->identify.phy_identifier = phy_id; child->rphy = rphy; get_device(&rphy->dev); @@ -845,6 +846,7 @@ static struct domain_device *sas_ex_discover_end_dev( child->rphy = rphy; get_device(&rphy->dev); + rphy->identify.phy_identifier = phy_id; sas_fill_in_rphy(child, rphy); list_add_tail(&child->disco_list_node, &parent->port->disco_list); diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index fd8fe1202dbe..398c9a0a5ade 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -5105,6 +5105,9 @@ error: stat = (struct ls_rjt *)(pcmd + sizeof(uint32_t)); stat->un.b.lsRjtRsnCode = LSRJT_UNABLE_TPC; + if (shdr_add_status == ADD_STATUS_OPERATION_ALREADY_ACTIVE) + stat->un.b.lsRjtRsnCodeExp = LSEXP_CMD_IN_PROGRESS; + elsiocb->iocb_cmpl = lpfc_cmpl_els_rsp; phba->fc_stat.elsXmitLSRJT++; rc = lpfc_sli_issue_iocb(phba, LPFC_ELS_RING, elsiocb, 0); diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index ac7acd257c99..2422094f1f15 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -3847,6 +3847,7 @@ int megasas_alloc_cmds(struct megasas_instance *instance) if (megasas_create_frame_pool(instance)) { dev_printk(KERN_DEBUG, &instance->pdev->dev, "Error creating frame DMA pool\n"); megasas_free_cmds(instance); + return -ENOMEM; } return 0; diff --git a/drivers/scsi/megaraid/megaraid_sas_fp.c b/drivers/scsi/megaraid/megaraid_sas_fp.c index 741509b35617..14f32c114c55 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fp.c +++ b/drivers/scsi/megaraid/megaraid_sas_fp.c @@ -1273,7 +1273,7 @@ void mr_update_load_balance_params(struct MR_DRV_RAID_MAP_ALL *drv_map, for (ldCount = 0; ldCount < MAX_LOGICAL_DRIVES_EXT; ldCount++) { ld = MR_TargetIdToLdGet(ldCount, drv_map); - if (ld >= MAX_LOGICAL_DRIVES_EXT) { + if (ld >= MAX_LOGICAL_DRIVES_EXT - 1) { lbInfo[ldCount].loadBalanceFlag = 0; continue; } diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c index 213944ed64d9..3d3bfa814093 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fusion.c +++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c @@ -1758,7 +1758,7 @@ static void megasas_build_ld_nonrw_fusion(struct megasas_instance *instance, device_id < instance->fw_supported_vd_count)) { ld = MR_TargetIdToLdGet(device_id, local_map_ptr); - if (ld >= instance->fw_supported_vd_count) + if (ld >= instance->fw_supported_vd_count - 1) fp_possible = 0; raid = MR_LdRaidGet(ld, local_map_ptr); diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c index d8c03431d0aa..f9f899ec9427 100644 --- a/drivers/scsi/qla4xxx/ql4_os.c +++ b/drivers/scsi/qla4xxx/ql4_os.c @@ -7245,6 +7245,8 @@ static int qla4xxx_sysfs_ddb_tgt_create(struct scsi_qla_host *ha, rc = qla4xxx_copy_from_fwddb_param(fnode_sess, fnode_conn, fw_ddb_entry); + if (rc) + goto free_sess; ql4_printk(KERN_INFO, ha, "%s: sysfs entry %s created\n", __func__, fnode_sess->dev.kobj.name); diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index 850ddc5fac04..3e2288af56bc 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -217,7 +217,7 @@ static struct scsi_device *scsi_alloc_sdev(struct scsi_target *starget, extern void scsi_requeue_run_queue(struct work_struct *work); sdev = kzalloc(sizeof(*sdev) + shost->transportt->device_size, - GFP_ATOMIC); + GFP_KERNEL); if (!sdev) goto out; @@ -791,7 +791,7 @@ static int scsi_add_lun(struct scsi_device *sdev, unsigned char *inq_result, */ sdev->inquiry = kmemdup(inq_result, max_t(size_t, sdev->inquiry_len, 36), - GFP_ATOMIC); + GFP_KERNEL); if (sdev->inquiry == NULL) return SCSI_SCAN_NO_RESPONSE; @@ -1085,7 +1085,7 @@ static int scsi_probe_and_add_lun(struct scsi_target *starget, if (!sdev) goto out; - result = kmalloc(result_len, GFP_ATOMIC | + result = kmalloc(result_len, GFP_KERNEL | ((shost->unchecked_isa_dma) ? __GFP_DMA : 0)); if (!result) goto out_free_sdev; diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 6fffb73766de..62adaca8fb97 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -207,6 +207,12 @@ cache_type_store(struct device *dev, struct device_attribute *attr, sp = buffer_data[0] & 0x80 ? 1 : 0; buffer_data[0] &= ~0x80; + /* + * Ensure WP, DPOFUA, and RESERVED fields are cleared in + * received mode parameter buffer before doing MODE SELECT. + */ + data.device_specific = 0; + if (scsi_mode_select(sdp, 1, sp, 8, buffer_data, len, SD_TIMEOUT, SD_MAX_RETRIES, &data, &sshdr)) { if (scsi_sense_valid(&sshdr)) @@ -1270,11 +1276,6 @@ static void sd_release(struct gendisk *disk, fmode_t mode) scsi_set_medium_removal(sdev, SCSI_REMOVAL_ALLOW); } - /* - * XXX and what if there are packets in flight and this close() - * XXX is followed by a "rmmod sd_mod"? - */ - scsi_disk_put(sdkp); } @@ -3221,11 +3222,23 @@ static void scsi_disk_release(struct device *dev) { struct scsi_disk *sdkp = to_scsi_disk(dev); struct gendisk *disk = sdkp->disk; - + struct request_queue *q = disk->queue; + spin_lock(&sd_index_lock); ida_remove(&sd_index_ida, sdkp->index); spin_unlock(&sd_index_lock); + /* + * Wait until all requests that are in progress have completed. + * This is necessary to avoid that e.g. scsi_end_request() crashes + * due to clearing the disk->private_data pointer. Wait from inside + * scsi_disk_release() instead of from sd_release() to avoid that + * freezing and unfreezing the request queue affects user space I/O + * in case multiple processes open a /dev/sd... node concurrently. + */ + blk_mq_freeze_queue(q); + blk_mq_unfreeze_queue(q); + disk->private_data = NULL; put_disk(disk); put_device(&sdkp->device->sdev_gendev); diff --git a/drivers/scsi/sr_ioctl.c b/drivers/scsi/sr_ioctl.c index 03054c0e7689..3c3e8115f73d 100644 --- a/drivers/scsi/sr_ioctl.c +++ b/drivers/scsi/sr_ioctl.c @@ -187,30 +187,25 @@ int sr_do_ioctl(Scsi_CD *cd, struct packet_command *cgc) struct scsi_device *SDev; struct scsi_sense_hdr sshdr; int result, err = 0, retries = 0; - struct request_sense *sense = cgc->sense; + unsigned char sense_buffer[SCSI_SENSE_BUFFERSIZE]; SDev = cd->device; - if (!sense) { - sense = kmalloc(SCSI_SENSE_BUFFERSIZE, GFP_KERNEL); - if (!sense) { - err = -ENOMEM; - goto out; - } - } - retry: if (!scsi_block_when_processing_errors(SDev)) { err = -ENODEV; goto out; } - memset(sense, 0, sizeof(*sense)); + memset(sense_buffer, 0, sizeof(sense_buffer)); result = scsi_execute(SDev, cgc->cmd, cgc->data_direction, - cgc->buffer, cgc->buflen, (char *)sense, + cgc->buffer, cgc->buflen, sense_buffer, cgc->timeout, IOCTL_RETRIES, 0, NULL); - scsi_normalize_sense((char *)sense, sizeof(*sense), &sshdr); + scsi_normalize_sense(sense_buffer, sizeof(sense_buffer), &sshdr); + + if (cgc->sense) + memcpy(cgc->sense, sense_buffer, sizeof(*cgc->sense)); /* Minimal error checking. Ignore cases we know about, and report the rest. */ if (driver_byte(result) != 0) { @@ -261,8 +256,6 @@ int sr_do_ioctl(Scsi_CD *cd, struct packet_command *cgc) /* Wake up a process waiting for device */ out: - if (!cgc->sense) - kfree(sense); cgc->stat = err; return err; } diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c index 2e522951b619..088a68ab4246 100644 --- a/drivers/scsi/st.c +++ b/drivers/scsi/st.c @@ -4821,9 +4821,8 @@ static int sgl_map_user_pages(struct st_buffer *STbp, current->mm, uaddr, nr_pages, - rw == READ, - 0, /* don't force */ - pages); + pages, + rw == READ ? FOLL_WRITE : 0); /* don't force */ /* Errors and no page mapped should return here */ if (res < nr_pages) diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index 8ef905cbfc9c..9237427728ce 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -692,7 +692,6 @@ static int virtscsi_device_reset(struct scsi_cmnd *sc) return FAILED; memset(cmd, 0, sizeof(*cmd)); - cmd->sc = sc; cmd->req.tmf = (struct virtio_scsi_ctrl_tmf_req){ .type = VIRTIO_SCSI_T_TMF, .subtype = cpu_to_virtio32(vscsi->vdev, @@ -751,7 +750,6 @@ static int virtscsi_abort(struct scsi_cmnd *sc) return FAILED; memset(cmd, 0, sizeof(*cmd)); - cmd->sc = sc; cmd->req.tmf = (struct virtio_scsi_ctrl_tmf_req){ .type = VIRTIO_SCSI_T_TMF, .subtype = VIRTIO_SCSI_T_TMF_ABORT_TASK, diff --git a/drivers/scsi/vmw_pvscsi.c b/drivers/scsi/vmw_pvscsi.c index 0de2f9069e23..23081ed8f1e3 100644 --- a/drivers/scsi/vmw_pvscsi.c +++ b/drivers/scsi/vmw_pvscsi.c @@ -1199,8 +1199,6 @@ static void pvscsi_shutdown_intr(struct pvscsi_adapter *adapter) static void pvscsi_release_resources(struct pvscsi_adapter *adapter) { - pvscsi_shutdown_intr(adapter); - if (adapter->workqueue) destroy_workqueue(adapter->workqueue); @@ -1529,6 +1527,7 @@ static int pvscsi_probe(struct pci_dev *pdev, const struct pci_device_id *id) out_reset_adapter: ll_adapter_reset(adapter); out_release_resources: + pvscsi_shutdown_intr(adapter); pvscsi_release_resources(adapter); scsi_host_put(host); out_disable_device: @@ -1537,6 +1536,7 @@ out_disable_device: return error; out_release_resources_and_disable: + pvscsi_shutdown_intr(adapter); pvscsi_release_resources(adapter); goto out_disable_device; } diff --git a/drivers/soc/qcom/qcom_gsbi.c b/drivers/soc/qcom/qcom_gsbi.c index 09c669e70d63..038abc377fdb 100644 --- a/drivers/soc/qcom/qcom_gsbi.c +++ b/drivers/soc/qcom/qcom_gsbi.c @@ -138,7 +138,7 @@ static int gsbi_probe(struct platform_device *pdev) struct resource *res; void __iomem *base; struct gsbi_info *gsbi; - int i; + int i, ret; u32 mask, gsbi_num; const struct crci_config *config = NULL; @@ -221,7 +221,10 @@ static int gsbi_probe(struct platform_device *pdev) platform_set_drvdata(pdev, gsbi); - return of_platform_populate(node, NULL, NULL, &pdev->dev); + ret = of_platform_populate(node, NULL, NULL, &pdev->dev); + if (ret) + clk_disable_unprepare(gsbi->hclk); + return ret; } static int gsbi_remove(struct platform_device *pdev) diff --git a/drivers/soc/tegra/common.c b/drivers/soc/tegra/common.c index cd8f41351add..7bfb154d6fa5 100644 --- a/drivers/soc/tegra/common.c +++ b/drivers/soc/tegra/common.c @@ -22,11 +22,15 @@ static const struct of_device_id tegra_machine_match[] = { bool soc_is_tegra(void) { + const struct of_device_id *match; struct device_node *root; root = of_find_node_by_path("/"); if (!root) return false; - return of_match_node(tegra_machine_match, root) != NULL; + match = of_match_node(tegra_machine_match, root); + of_node_put(root); + + return match != NULL; } diff --git a/drivers/soc/tegra/fuse/fuse-tegra.c b/drivers/soc/tegra/fuse/fuse-tegra.c index de2c1bfe28b5..c4f5e5bbb8dc 100644 --- a/drivers/soc/tegra/fuse/fuse-tegra.c +++ b/drivers/soc/tegra/fuse/fuse-tegra.c @@ -131,13 +131,17 @@ static int tegra_fuse_probe(struct platform_device *pdev) /* take over the memory region from the early initialization */ res = platform_get_resource(pdev, IORESOURCE_MEM, 0); fuse->base = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(fuse->base)) - return PTR_ERR(fuse->base); + if (IS_ERR(fuse->base)) { + err = PTR_ERR(fuse->base); + fuse->base = base; + return err; + } fuse->clk = devm_clk_get(&pdev->dev, "fuse"); if (IS_ERR(fuse->clk)) { dev_err(&pdev->dev, "failed to get FUSE clock: %ld", PTR_ERR(fuse->clk)); + fuse->base = base; return PTR_ERR(fuse->clk); } @@ -146,8 +150,10 @@ static int tegra_fuse_probe(struct platform_device *pdev) if (fuse->soc->probe) { err = fuse->soc->probe(fuse); - if (err < 0) + if (err < 0) { + fuse->base = base; return err; + } } if (tegra_fuse_create_sysfs(&pdev->dev, fuse->soc->info->size, diff --git a/drivers/spi/spi-bcm2835.c b/drivers/spi/spi-bcm2835.c index cf04960cc3e6..1a1368f5863c 100644 --- a/drivers/spi/spi-bcm2835.c +++ b/drivers/spi/spi-bcm2835.c @@ -88,7 +88,7 @@ struct bcm2835_spi { u8 *rx_buf; int tx_len; int rx_len; - bool dma_pending; + unsigned int dma_pending; }; static inline u32 bcm2835_rd(struct bcm2835_spi *bs, unsigned reg) @@ -155,8 +155,7 @@ static irqreturn_t bcm2835_spi_interrupt(int irq, void *dev_id) /* Write as many bytes as possible to FIFO */ bcm2835_wr_fifo(bs); - /* based on flags decide if we can finish the transfer */ - if (bcm2835_rd(bs, BCM2835_SPI_CS) & BCM2835_SPI_CS_DONE) { + if (!bs->rx_len) { /* Transfer complete - reset SPI HW */ bcm2835_spi_reset_hw(master); /* wake up the framework */ @@ -233,10 +232,9 @@ static void bcm2835_spi_dma_done(void *data) * is called the tx-dma must have finished - can't get to this * situation otherwise... */ - dmaengine_terminate_all(master->dma_tx); - - /* mark as no longer pending */ - bs->dma_pending = 0; + if (cmpxchg(&bs->dma_pending, true, false)) { + dmaengine_terminate_all(master->dma_tx); + } /* and mark as completed */; complete(&master->xfer_completion); @@ -342,6 +340,7 @@ static int bcm2835_spi_transfer_one_dma(struct spi_master *master, if (ret) { /* need to reset on errors */ dmaengine_terminate_all(master->dma_tx); + bs->dma_pending = false; bcm2835_spi_reset_hw(master); return ret; } @@ -617,10 +616,9 @@ static void bcm2835_spi_handle_err(struct spi_master *master, struct bcm2835_spi *bs = spi_master_get_devdata(master); /* if an error occurred and we have an active dma, then terminate */ - if (bs->dma_pending) { + if (cmpxchg(&bs->dma_pending, true, false)) { dmaengine_terminate_all(master->dma_tx); dmaengine_terminate_all(master->dma_rx); - bs->dma_pending = 0; } /* and reset */ bcm2835_spi_reset_hw(master); diff --git a/drivers/staging/android/ion/ion_heap.c b/drivers/staging/android/ion/ion_heap.c index 13a9b4c42b26..ac331c898f5e 100644 --- a/drivers/staging/android/ion/ion_heap.c +++ b/drivers/staging/android/ion/ion_heap.c @@ -321,8 +321,9 @@ struct ion_heap *ion_heap_create(struct ion_platform_heap *heap_data) switch (heap_data->type) { case ION_HEAP_TYPE_SYSTEM_CONTIG: - heap = ion_system_contig_heap_create(heap_data); - break; + pr_err("%s: Heap type is disabled: %d\n", __func__, + heap_data->type); + return ERR_PTR(-EINVAL); case ION_HEAP_TYPE_SYSTEM: heap = ion_system_heap_create(heap_data); break; @@ -361,7 +362,8 @@ void ion_heap_destroy(struct ion_heap *heap) switch (heap->type) { case ION_HEAP_TYPE_SYSTEM_CONTIG: - ion_system_contig_heap_destroy(heap); + pr_err("%s: Heap type is disabled: %d\n", __func__, + heap->type); break; case ION_HEAP_TYPE_SYSTEM: ion_system_heap_destroy(heap); diff --git a/drivers/staging/android/lowmemorykiller.c b/drivers/staging/android/lowmemorykiller.c index 6a8d07915c25..a7f1a3fafef6 100644 --- a/drivers/staging/android/lowmemorykiller.c +++ b/drivers/staging/android/lowmemorykiller.c @@ -42,6 +42,9 @@ #include #include #include +#include +#include +#include #define CREATE_TRACE_POINTS #include "trace/lowmemorykiller.h" @@ -70,6 +73,157 @@ static unsigned long lowmem_deathpending_timeout; pr_info(x); \ } while (0) + +static DECLARE_WAIT_QUEUE_HEAD(event_wait); +static DEFINE_SPINLOCK(lmk_event_lock); +static struct circ_buf event_buffer; +#define MAX_BUFFERED_EVENTS 8 +#define MAX_TASKNAME 128 + +struct lmk_event { + char taskname[MAX_TASKNAME]; + pid_t pid; + uid_t uid; + pid_t group_leader_pid; + unsigned long min_flt; + unsigned long maj_flt; + unsigned long rss_in_pages; + short oom_score_adj; + short min_score_adj; + unsigned long long start_time; + struct list_head list; +}; + +void handle_lmk_event(struct task_struct *selected, short min_score_adj) +{ + int head; + int tail; + struct lmk_event *events; + struct lmk_event *event; + int res; + long rss_in_pages = -1; + struct mm_struct *mm = get_task_mm(selected); + + if (mm) { + rss_in_pages = get_mm_rss(mm); + mmput(mm); + } + + spin_lock(&lmk_event_lock); + + head = event_buffer.head; + tail = READ_ONCE(event_buffer.tail); + + /* Do not continue to log if no space remains in the buffer. */ + if (CIRC_SPACE(head, tail, MAX_BUFFERED_EVENTS) < 1) { + spin_unlock(&lmk_event_lock); + return; + } + + events = (struct lmk_event *) event_buffer.buf; + event = &events[head]; + + res = get_cmdline(selected, event->taskname, MAX_TASKNAME - 1); + + /* No valid process name means this is definitely not associated with a + * userspace activity. + */ + + if (res <= 0 || res >= MAX_TASKNAME) { + spin_unlock(&lmk_event_lock); + return; + } + + event->taskname[res] = '\0'; + event->pid = selected->pid; + event->uid = from_kuid_munged(current_user_ns(), task_uid(selected)); + if (selected->group_leader) + event->group_leader_pid = selected->group_leader->pid; + else + event->group_leader_pid = -1; + event->min_flt = selected->min_flt; + event->maj_flt = selected->maj_flt; + event->oom_score_adj = selected->signal->oom_score_adj; + event->start_time = nsec_to_clock_t(selected->real_start_time); + event->rss_in_pages = rss_in_pages; + event->min_score_adj = min_score_adj; + + event_buffer.head = (head + 1) & (MAX_BUFFERED_EVENTS - 1); + + spin_unlock(&lmk_event_lock); + + wake_up_interruptible(&event_wait); +} + +static int lmk_event_show(struct seq_file *s, void *unused) +{ + struct lmk_event *events = (struct lmk_event *) event_buffer.buf; + int head; + int tail; + struct lmk_event *event; + + spin_lock(&lmk_event_lock); + + head = event_buffer.head; + tail = event_buffer.tail; + + if (head == tail) { + spin_unlock(&lmk_event_lock); + return -EAGAIN; + } + + event = &events[tail]; + + seq_printf(s, "%lu %lu %lu %lu %lu %lu %hd %hd %llu\n%s\n", + (unsigned long) event->pid, (unsigned long) event->uid, + (unsigned long) event->group_leader_pid, event->min_flt, + event->maj_flt, event->rss_in_pages, event->oom_score_adj, + event->min_score_adj, event->start_time, event->taskname); + + event_buffer.tail = (tail + 1) & (MAX_BUFFERED_EVENTS - 1); + + spin_unlock(&lmk_event_lock); + return 0; +} + +static unsigned int lmk_event_poll(struct file *file, poll_table *wait) +{ + int ret = 0; + + poll_wait(file, &event_wait, wait); + spin_lock(&lmk_event_lock); + if (event_buffer.head != event_buffer.tail) + ret = POLLIN; + spin_unlock(&lmk_event_lock); + return ret; +} + +static int lmk_event_open(struct inode *inode, struct file *file) +{ + return single_open(file, lmk_event_show, inode->i_private); +} + +static const struct file_operations event_file_ops = { + .open = lmk_event_open, + .poll = lmk_event_poll, + .read = seq_read +}; + +static void lmk_event_init(void) +{ + struct proc_dir_entry *entry; + + event_buffer.head = 0; + event_buffer.tail = 0; + event_buffer.buf = kmalloc( + sizeof(struct lmk_event) * MAX_BUFFERED_EVENTS, GFP_KERNEL); + if (!event_buffer.buf) + return; + entry = proc_create("lowmemorykiller", 0, NULL, &event_file_ops); + if (!entry) + pr_err("error creating kernel lmk event file\n"); +} + static unsigned long lowmem_count(struct shrinker *s, struct shrink_control *sc) { @@ -191,6 +345,8 @@ static unsigned long lowmem_scan(struct shrinker *s, struct shrink_control *sc) free); lowmem_deathpending_timeout = jiffies + HZ; rem += selected_tasksize; + + handle_lmk_event(selected, min_score_adj); } lowmem_print(4, "lowmem_scan %lu, %x, return %lu\n", @@ -208,6 +364,7 @@ static struct shrinker lowmem_shrinker = { static int __init lowmem_init(void) { register_shrinker(&lowmem_shrinker); + lmk_event_init(); return 0; } device_initcall(lowmem_init); diff --git a/drivers/staging/comedi/drivers/ni_usb6501.c b/drivers/staging/comedi/drivers/ni_usb6501.c index 95b537a8ecdb..6778e2b73667 100644 --- a/drivers/staging/comedi/drivers/ni_usb6501.c +++ b/drivers/staging/comedi/drivers/ni_usb6501.c @@ -472,10 +472,8 @@ static int ni6501_alloc_usb_buffers(struct comedi_device *dev) size = le16_to_cpu(devpriv->ep_tx->wMaxPacketSize); devpriv->usb_tx_buf = kzalloc(size, GFP_KERNEL); - if (!devpriv->usb_tx_buf) { - kfree(devpriv->usb_rx_buf); + if (!devpriv->usb_tx_buf) return -ENOMEM; - } return 0; } @@ -527,6 +525,9 @@ static int ni6501_auto_attach(struct comedi_device *dev, if (!devpriv) return -ENOMEM; + mutex_init(&devpriv->mut); + usb_set_intfdata(intf, devpriv); + ret = ni6501_find_endpoints(dev); if (ret) return ret; @@ -535,9 +536,6 @@ static int ni6501_auto_attach(struct comedi_device *dev, if (ret) return ret; - mutex_init(&devpriv->mut); - usb_set_intfdata(intf, devpriv); - ret = comedi_alloc_subdevices(dev, 2); if (ret) return ret; diff --git a/drivers/staging/comedi/drivers/vmk80xx.c b/drivers/staging/comedi/drivers/vmk80xx.c index 8c7393ef762d..95e53cfd76a4 100644 --- a/drivers/staging/comedi/drivers/vmk80xx.c +++ b/drivers/staging/comedi/drivers/vmk80xx.c @@ -691,10 +691,8 @@ static int vmk80xx_alloc_usb_buffers(struct comedi_device *dev) size = le16_to_cpu(devpriv->ep_tx->wMaxPacketSize); devpriv->usb_tx_buf = kzalloc(size, GFP_KERNEL); - if (!devpriv->usb_tx_buf) { - kfree(devpriv->usb_rx_buf); + if (!devpriv->usb_tx_buf) return -ENOMEM; - } return 0; } @@ -809,6 +807,8 @@ static int vmk80xx_auto_attach(struct comedi_device *dev, devpriv->model = board->model; + sema_init(&devpriv->limit_sem, 8); + ret = vmk80xx_find_usb_endpoints(dev); if (ret) return ret; @@ -817,8 +817,6 @@ static int vmk80xx_auto_attach(struct comedi_device *dev, if (ret) return ret; - sema_init(&devpriv->limit_sem, 8); - usb_set_intfdata(intf, devpriv); if (devpriv->model == VMK8055_MODEL) diff --git a/drivers/staging/iio/adc/ad7280a.c b/drivers/staging/iio/adc/ad7280a.c index 35acb1a4669b..db8390022732 100644 --- a/drivers/staging/iio/adc/ad7280a.c +++ b/drivers/staging/iio/adc/ad7280a.c @@ -250,7 +250,9 @@ static int ad7280_read(struct ad7280_state *st, unsigned devaddr, if (ret) return ret; - __ad7280_read32(st, &tmp); + ret = __ad7280_read32(st, &tmp); + if (ret) + return ret; if (ad7280_check_crc(st, tmp)) return -EIO; @@ -288,7 +290,9 @@ static int ad7280_read_channel(struct ad7280_state *st, unsigned devaddr, ad7280_delay(st); - __ad7280_read32(st, &tmp); + ret = __ad7280_read32(st, &tmp); + if (ret) + return ret; if (ad7280_check_crc(st, tmp)) return -EIO; @@ -321,7 +325,9 @@ static int ad7280_read_all_channels(struct ad7280_state *st, unsigned cnt, ad7280_delay(st); for (i = 0; i < cnt; i++) { - __ad7280_read32(st, &tmp); + ret = __ad7280_read32(st, &tmp); + if (ret) + return ret; if (ad7280_check_crc(st, tmp)) return -EIO; @@ -364,7 +370,10 @@ static int ad7280_chain_setup(struct ad7280_state *st) return ret; for (n = 0; n <= AD7280A_MAX_CHAIN; n++) { - __ad7280_read32(st, &val); + ret = __ad7280_read32(st, &val); + if (ret) + return ret; + if (val == 0) return n - 1; diff --git a/drivers/staging/iio/adc/ad7780.c b/drivers/staging/iio/adc/ad7780.c index 3abc7789237f..531338ea5eb4 100644 --- a/drivers/staging/iio/adc/ad7780.c +++ b/drivers/staging/iio/adc/ad7780.c @@ -90,12 +90,16 @@ static int ad7780_read_raw(struct iio_dev *indio_dev, long m) { struct ad7780_state *st = iio_priv(indio_dev); + int voltage_uv; switch (m) { case IIO_CHAN_INFO_RAW: return ad_sigma_delta_single_conversion(indio_dev, chan, val); case IIO_CHAN_INFO_SCALE: - *val = st->int_vref_mv * st->gain; + voltage_uv = regulator_get_voltage(st->reg); + if (voltage_uv < 0) + return voltage_uv; + *val = (voltage_uv / 1000) * st->gain; *val2 = chan->scan_type.realbits - 1; return IIO_VAL_FRACTIONAL_LOG2; case IIO_CHAN_INFO_OFFSET: diff --git a/drivers/staging/iio/resolver/ad2s90.c b/drivers/staging/iio/resolver/ad2s90.c index 5b1c0db33e7f..b44253eb62ec 100644 --- a/drivers/staging/iio/resolver/ad2s90.c +++ b/drivers/staging/iio/resolver/ad2s90.c @@ -86,7 +86,12 @@ static int ad2s90_probe(struct spi_device *spi) /* need 600ns between CS and the first falling edge of SCLK */ spi->max_speed_hz = 830000; spi->mode = SPI_MODE_3; - spi_setup(spi); + ret = spi_setup(spi); + + if (ret < 0) { + dev_err(&spi->dev, "spi_setup failed!\n"); + return ret; + } return 0; } diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c index ecfe73302350..46a24b4ead09 100644 --- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c +++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c @@ -2621,8 +2621,8 @@ ksocknal_enumerate_interfaces(ksock_net_t *net) net->ksnn_interfaces[j].ksni_ipaddr = ip; net->ksnn_interfaces[j].ksni_netmask = mask; - strncpy(&net->ksnn_interfaces[j].ksni_name[0], - names[i], IFNAMSIZ); + strlcpy(net->ksnn_interfaces[j].ksni_name, + names[i], sizeof(net->ksnn_interfaces[j].ksni_name)); j++; } @@ -2805,8 +2805,9 @@ ksocknal_startup(lnet_ni_t *ni) goto fail_1; } - strncpy(&net->ksnn_interfaces[i].ksni_name[0], - ni->ni_interfaces[i], IFNAMSIZ); + strlcpy(net->ksnn_interfaces[i].ksni_name, + ni->ni_interfaces[i], + sizeof(net->ksnn_interfaces[i].ksni_name)); } net->ksnn_ninterfaces = i; } diff --git a/drivers/staging/lustre/lnet/lnet/config.c b/drivers/staging/lustre/lnet/lnet/config.c index 1b3bc8386524..75f120da0a84 100644 --- a/drivers/staging/lustre/lnet/lnet/config.c +++ b/drivers/staging/lustre/lnet/lnet/config.c @@ -650,8 +650,8 @@ lnet_parse_route(char *str, int *im_a_router) INIT_LIST_HEAD(&nets); /* save a copy of the string for error messages */ - strncpy(cmd, str, sizeof(cmd) - 1); - cmd[sizeof(cmd) - 1] = 0; + strncpy(cmd, str, sizeof(cmd)); + cmd[sizeof(cmd) - 1] = '\0'; sep = str; for (;;) { @@ -972,11 +972,13 @@ lnet_splitnets(char *source, struct list_head *nets) return 0; offset += (int)(sep - tb->ltb_text); - tb2 = lnet_new_text_buf(strlen(sep)); + len = strlen(sep); + tb2 = lnet_new_text_buf(len); if (tb2 == NULL) return -ENOMEM; - strcpy(tb2->ltb_text, sep); + strncpy(tb2->ltb_text, sep, len); + tb2->ltb_text[len] = '\0'; list_add_tail(&tb2->ltb_list, nets); tb = tb2; @@ -1021,8 +1023,8 @@ lnet_match_networks(char **networksp, char *ip2nets, __u32 *ipaddrs, int nip) tb = list_entry(raw_entries.next, struct lnet_text_buf_t, ltb_list); - strncpy(source, tb->ltb_text, sizeof(source)-1); - source[sizeof(source)-1] = 0; + strncpy(source, tb->ltb_text, sizeof(source)); + source[sizeof(source)-1] = '\0'; /* replace ltb_text with the network(s) add on match */ rc = lnet_match_network_tokens(tb->ltb_text, ipaddrs, nip); diff --git a/drivers/staging/lustre/lnet/selftest/conrpc.c b/drivers/staging/lustre/lnet/selftest/conrpc.c index 64a0335934f3..1066c70434b1 100644 --- a/drivers/staging/lustre/lnet/selftest/conrpc.c +++ b/drivers/staging/lustre/lnet/selftest/conrpc.c @@ -612,8 +612,8 @@ lstcon_sesrpc_prep(lstcon_node_t *nd, int transop, msrq = &(*crpc)->crp_rpc->crpc_reqstmsg.msg_body.mksn_reqst; msrq->mksn_sid = console_session.ses_id; msrq->mksn_force = console_session.ses_force; - strncpy(msrq->mksn_name, console_session.ses_name, - strlen(console_session.ses_name)); + strlcpy(msrq->mksn_name, console_session.ses_name, + sizeof(msrq->mksn_name)); break; case LST_TRANS_SESEND: diff --git a/drivers/staging/lustre/lnet/selftest/console.c b/drivers/staging/lustre/lnet/selftest/console.c index d315dd44ae3b..ed1bc6ac79dd 100644 --- a/drivers/staging/lustre/lnet/selftest/console.c +++ b/drivers/staging/lustre/lnet/selftest/console.c @@ -1739,7 +1739,8 @@ lstcon_session_new(char *name, int key, unsigned feats, console_session.ses_feats_updated = 0; console_session.ses_timeout = (timeout <= 0) ? LST_CONSOLE_TIMEOUT : timeout; - strcpy(console_session.ses_name, name); + strlcpy(console_session.ses_name, name, + sizeof(console_session.ses_name)); rc = lstcon_batch_add(LST_DEFAULT_BATCH); if (rc != 0) @@ -1959,7 +1960,8 @@ lstcon_acceptor_handle(srpc_server_rpc_t *rpc) if (grp->grp_userland == 0) grp->grp_userland = 1; - strcpy(jrep->join_session, console_session.ses_name); + strlcpy(jrep->join_session, console_session.ses_name, + sizeof(jrep->join_session)); jrep->join_timeout = console_session.ses_timeout; jrep->join_status = 0; diff --git a/drivers/staging/lustre/lustre/include/lustre_disk.h b/drivers/staging/lustre/lustre/include/lustre_disk.h index 5e1ac129a681..7c6933ffc9c1 100644 --- a/drivers/staging/lustre/lustre/include/lustre_disk.h +++ b/drivers/staging/lustre/lustre/include/lustre_disk.h @@ -68,6 +68,7 @@ everything as string options */ #define LMD_MAGIC 0xbdacbd03 +#define LMD_PARAMS_MAXLEN 4096 /* gleaned from the mount command - no persistent info here */ struct lustre_mount_data { diff --git a/drivers/staging/lustre/lustre/libcfs/debug.c b/drivers/staging/lustre/lustre/libcfs/debug.c index 1d1c67164418..170775bc7bc0 100644 --- a/drivers/staging/lustre/lustre/libcfs/debug.c +++ b/drivers/staging/lustre/lustre/libcfs/debug.c @@ -512,9 +512,9 @@ int libcfs_debug_init(unsigned long bufsize) } if (libcfs_debug_file_path != NULL) { - strncpy(libcfs_debug_file_path_arr, - libcfs_debug_file_path, PATH_MAX-1); - libcfs_debug_file_path_arr[PATH_MAX - 1] = '\0'; + strlcpy(libcfs_debug_file_path_arr, + libcfs_debug_file_path, + sizeof(libcfs_debug_file_path_arr)); } /* If libcfs_debug_mb is set to an invalid value or uninitialized diff --git a/drivers/staging/lustre/lustre/libcfs/hash.c b/drivers/staging/lustre/lustre/libcfs/hash.c index 030874428952..55fc2190a5bb 100644 --- a/drivers/staging/lustre/lustre/libcfs/hash.c +++ b/drivers/staging/lustre/lustre/libcfs/hash.c @@ -1062,8 +1062,7 @@ cfs_hash_create(char *name, unsigned cur_bits, unsigned max_bits, if (hs == NULL) return NULL; - strncpy(hs->hs_name, name, len); - hs->hs_name[len - 1] = '\0'; + strlcpy(hs->hs_name, name, len); hs->hs_flags = flags; atomic_set(&hs->hs_refcount, 1); diff --git a/drivers/staging/lustre/lustre/libcfs/workitem.c b/drivers/staging/lustre/lustre/libcfs/workitem.c index e1143a566ac4..f6cc434af756 100644 --- a/drivers/staging/lustre/lustre/libcfs/workitem.c +++ b/drivers/staging/lustre/lustre/libcfs/workitem.c @@ -360,8 +360,8 @@ cfs_wi_sched_create(char *name, struct cfs_cpt_table *cptab, if (sched == NULL) return -ENOMEM; - strncpy(sched->ws_name, name, CFS_WS_NAME_LEN); - sched->ws_name[CFS_WS_NAME_LEN - 1] = '\0'; + strlcpy(sched->ws_name, name, CFS_WS_NAME_LEN); + sched->ws_cptab = cptab; sched->ws_cpt = cpt; diff --git a/drivers/staging/lustre/lustre/llite/dir.c b/drivers/staging/lustre/lustre/llite/dir.c index 5c9502b5b358..951259a98323 100644 --- a/drivers/staging/lustre/lustre/llite/dir.c +++ b/drivers/staging/lustre/lustre/llite/dir.c @@ -641,7 +641,7 @@ static int ll_send_mgc_param(struct obd_export *mgc, char *string) if (!msp) return -ENOMEM; - strncpy(msp->mgs_param, string, MGS_PARAM_MAXLEN); + strlcpy(msp->mgs_param, string, sizeof(msp->mgs_param)); rc = obd_set_info_async(NULL, mgc, sizeof(KEY_SET_INFO), KEY_SET_INFO, sizeof(struct mgs_send_param), msp, NULL); if (rc) diff --git a/drivers/staging/lustre/lustre/lov/lov_pool.c b/drivers/staging/lustre/lustre/lov/lov_pool.c index b03827ef6514..b43ce6cd64c2 100644 --- a/drivers/staging/lustre/lustre/lov/lov_pool.c +++ b/drivers/staging/lustre/lustre/lov/lov_pool.c @@ -412,8 +412,7 @@ int lov_pool_new(struct obd_device *obd, char *poolname) if (!new_pool) return -ENOMEM; - strncpy(new_pool->pool_name, poolname, LOV_MAXPOOLNAME); - new_pool->pool_name[LOV_MAXPOOLNAME] = '\0'; + strlcpy(new_pool->pool_name, poolname, sizeof(new_pool->pool_name)); new_pool->pool_lobd = obd; /* ref count init to 1 because when created a pool is always used * up to deletion diff --git a/drivers/staging/lustre/lustre/obdclass/obd_mount.c b/drivers/staging/lustre/lustre/obdclass/obd_mount.c index 48003d5325e3..7617c57d16e0 100644 --- a/drivers/staging/lustre/lustre/obdclass/obd_mount.c +++ b/drivers/staging/lustre/lustre/obdclass/obd_mount.c @@ -892,7 +892,7 @@ static int lmd_parse(char *options, struct lustre_mount_data *lmd) } lmd->lmd_magic = LMD_MAGIC; - lmd->lmd_params = kzalloc(4096, GFP_NOFS); + lmd->lmd_params = kzalloc(LMD_PARAMS_MAXLEN, GFP_NOFS); if (!lmd->lmd_params) return -ENOMEM; lmd->lmd_params[0] = '\0'; @@ -978,7 +978,7 @@ static int lmd_parse(char *options, struct lustre_mount_data *lmd) goto invalid; clear++; } else if (strncmp(s1, "param=", 6) == 0) { - int length; + size_t length, params_length; char *tail = strchr(s1 + 6, ','); if (tail == NULL) @@ -986,8 +986,12 @@ static int lmd_parse(char *options, struct lustre_mount_data *lmd) else length = tail - s1; length -= 6; + params_length = strlen(lmd->lmd_params); + if (params_length + length + 1 >= LMD_PARAMS_MAXLEN) + return -E2BIG; strncat(lmd->lmd_params, s1 + 6, length); - strcat(lmd->lmd_params, " "); + lmd->lmd_params[params_length + length] = '\0'; + strlcat(lmd->lmd_params, " ", LMD_PARAMS_MAXLEN); clear++; } else if (strncmp(s1, "osd=", 4) == 0) { rc = lmd_parse_string(&lmd->lmd_osd_type, s1 + 4); diff --git a/drivers/staging/lustre/lustre/ptlrpc/ptlrpcd.c b/drivers/staging/lustre/lustre/ptlrpc/ptlrpcd.c index ce036a1ac466..ac87aa12bd7e 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/ptlrpcd.c +++ b/drivers/staging/lustre/lustre/ptlrpc/ptlrpcd.c @@ -422,6 +422,7 @@ static int ptlrpcd(void *arg) complete(&pc->pc_starting); /* + * This mainloop strongly resembles ptlrpc_set_wait() except that our * set never completes. ptlrpcd_check() calls ptlrpc_check_set() when * there are requests in the set. New requests come in on the set's diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_config.c b/drivers/staging/lustre/lustre/ptlrpc/sec_config.c index 7ff948fe1424..7a206705865b 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/sec_config.c +++ b/drivers/staging/lustre/lustre/ptlrpc/sec_config.c @@ -83,8 +83,7 @@ int sptlrpc_parse_flavor(const char *str, struct sptlrpc_flavor *flvr) return 0; } - strncpy(buf, str, sizeof(buf)); - buf[sizeof(buf) - 1] = '\0'; + strlcpy(buf, str, sizeof(buf)); bulk = strchr(buf, '-'); if (bulk) diff --git a/drivers/staging/rdma/hfi1/ud.c b/drivers/staging/rdma/hfi1/ud.c index 5a9c784bec04..a88e37444be0 100644 --- a/drivers/staging/rdma/hfi1/ud.c +++ b/drivers/staging/rdma/hfi1/ud.c @@ -793,7 +793,6 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) opcode == IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE) { wc.ex.imm_data = ohdr->u.ud.imm_data; wc.wc_flags = IB_WC_WITH_IMM; - tlen -= sizeof(u32); } else if (opcode == IB_OPCODE_UD_SEND_ONLY) { wc.ex.imm_data = 0; wc.wc_flags = 0; diff --git a/drivers/staging/rdma/hfi1/user_pages.c b/drivers/staging/rdma/hfi1/user_pages.c index 9071afbd7bf4..b776b74d3d14 100644 --- a/drivers/staging/rdma/hfi1/user_pages.c +++ b/drivers/staging/rdma/hfi1/user_pages.c @@ -85,7 +85,7 @@ static int __hfi1_get_user_pages(unsigned long start_page, size_t num_pages, for (got = 0; got < num_pages; got += ret) { ret = get_user_pages(current, current->mm, start_page + got * PAGE_SIZE, - num_pages - got, 1, 1, + num_pages - got, FOLL_WRITE | FOLL_FORCE, p + got, NULL); if (ret < 0) goto bail_release; diff --git a/drivers/staging/rdma/ipath/ipath_user_pages.c b/drivers/staging/rdma/ipath/ipath_user_pages.c index d29b4daf61f8..f69ec728e0de 100644 --- a/drivers/staging/rdma/ipath/ipath_user_pages.c +++ b/drivers/staging/rdma/ipath/ipath_user_pages.c @@ -72,7 +72,7 @@ static int __ipath_get_user_pages(unsigned long start_page, size_t num_pages, for (got = 0; got < num_pages; got += ret) { ret = get_user_pages(current, current->mm, start_page + got * PAGE_SIZE, - num_pages - got, 1, 1, + num_pages - got, FOLL_WRITE | FOLL_FORCE, p + got, NULL); if (ret < 0) goto bail_release; diff --git a/drivers/staging/rtl8188eu/os_dep/usb_intf.c b/drivers/staging/rtl8188eu/os_dep/usb_intf.c index c2d2c17550a7..951f22265105 100644 --- a/drivers/staging/rtl8188eu/os_dep/usb_intf.c +++ b/drivers/staging/rtl8188eu/os_dep/usb_intf.c @@ -47,6 +47,7 @@ static struct usb_device_id rtw_usb_id_tbl[] = { {USB_DEVICE(0x2001, 0x330F)}, /* DLink DWA-125 REV D1 */ {USB_DEVICE(0x2001, 0x3310)}, /* Dlink DWA-123 REV D1 */ {USB_DEVICE(0x2001, 0x3311)}, /* DLink GO-USB-N150 REV B1 */ + {USB_DEVICE(0x2001, 0x331B)}, /* D-Link DWA-121 rev B1 */ {USB_DEVICE(0x2357, 0x010c)}, /* TP-Link TL-WN722N v2 */ {USB_DEVICE(0x0df6, 0x0076)}, /* Sitecom N150 v2 */ {USB_DEVICE(USB_VENDER_ID_REALTEK, 0xffef)}, /* Rosewill RNX-N150NUB */ diff --git a/drivers/staging/speakup/kobjects.c b/drivers/staging/speakup/kobjects.c index 06ef26872462..52aed7cfeb24 100644 --- a/drivers/staging/speakup/kobjects.c +++ b/drivers/staging/speakup/kobjects.c @@ -387,7 +387,7 @@ static ssize_t synth_store(struct kobject *kobj, struct kobj_attribute *attr, len = strlen(buf); if (len < 2 || len > 9) return -EINVAL; - strncpy(new_synth_name, buf, len); + memcpy(new_synth_name, buf, len); if (new_synth_name[len - 1] == '\n') len--; new_synth_name[len] = '\0'; @@ -514,7 +514,7 @@ static ssize_t punc_store(struct kobject *kobj, struct kobj_attribute *attr, return -EINVAL; } - strncpy(punc_buf, buf, x); + memcpy(punc_buf, buf, x); while (x && punc_buf[x - 1] == '\n') x--; diff --git a/drivers/staging/vt6655/device_main.c b/drivers/staging/vt6655/device_main.c index 8fd8f3a2d1bf..58b6403458b7 100644 --- a/drivers/staging/vt6655/device_main.c +++ b/drivers/staging/vt6655/device_main.c @@ -972,8 +972,6 @@ static void vnt_interrupt_process(struct vnt_private *priv) return; } - MACvIntDisable(priv->PortOffset); - spin_lock_irqsave(&priv->lock, flags); /* Read low level stats */ @@ -1062,8 +1060,6 @@ static void vnt_interrupt_process(struct vnt_private *priv) } spin_unlock_irqrestore(&priv->lock, flags); - - MACvIntEnable(priv->PortOffset, IMR_MASK_VALUE); } static void vnt_interrupt_work(struct work_struct *work) @@ -1073,14 +1069,17 @@ static void vnt_interrupt_work(struct work_struct *work) if (priv->vif) vnt_interrupt_process(priv); + + MACvIntEnable(priv->PortOffset, IMR_MASK_VALUE); } static irqreturn_t vnt_interrupt(int irq, void *arg) { struct vnt_private *priv = arg; - if (priv->vif) - schedule_work(&priv->interrupt_work); + schedule_work(&priv->interrupt_work); + + MACvIntDisable(priv->PortOffset); return IRQ_HANDLED; } diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 58fe27705b96..cbb4414edd71 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -4232,9 +4232,9 @@ static void iscsit_release_commands_from_conn(struct iscsi_conn *conn) struct se_cmd *se_cmd = &cmd->se_cmd; if (se_cmd->se_tfo != NULL) { - spin_lock(&se_cmd->t_state_lock); + spin_lock_irq(&se_cmd->t_state_lock); se_cmd->transport_state |= CMD_T_FABRIC_STOP; - spin_unlock(&se_cmd->t_state_lock); + spin_unlock_irq(&se_cmd->t_state_lock); } } spin_unlock_bh(&conn->cmd_lock); diff --git a/drivers/target/target_core_spc.c b/drivers/target/target_core_spc.c index 9413e1a949e5..5af4d6a03d6e 100644 --- a/drivers/target/target_core_spc.c +++ b/drivers/target/target_core_spc.c @@ -108,12 +108,17 @@ spc_emulate_inquiry_std(struct se_cmd *cmd, unsigned char *buf) buf[7] = 0x2; /* CmdQue=1 */ - memcpy(&buf[8], "LIO-ORG ", 8); - memset(&buf[16], 0x20, 16); + /* + * ASCII data fields described as being left-aligned shall have any + * unused bytes at the end of the field (i.e., highest offset) and the + * unused bytes shall be filled with ASCII space characters (20h). + */ + memset(&buf[8], 0x20, 8 + 16 + 4); + memcpy(&buf[8], "LIO-ORG", sizeof("LIO-ORG") - 1); memcpy(&buf[16], dev->t10_wwn.model, - min_t(size_t, strlen(dev->t10_wwn.model), 16)); + strnlen(dev->t10_wwn.model, 16)); memcpy(&buf[32], dev->t10_wwn.revision, - min_t(size_t, strlen(dev->t10_wwn.revision), 4)); + strnlen(dev->t10_wwn.revision, 4)); buf[4] = 31; /* Set additional length to 31 */ return 0; @@ -251,7 +256,9 @@ check_t10_vend_desc: buf[off] = 0x2; /* ASCII */ buf[off+1] = 0x1; /* T10 Vendor ID */ buf[off+2] = 0x0; - memcpy(&buf[off+4], "LIO-ORG", 8); + /* left align Vendor ID and pad with spaces */ + memset(&buf[off+4], 0x20, 8); + memcpy(&buf[off+4], "LIO-ORG", sizeof("LIO-ORG") - 1); /* Extra Byte for NULL Terminator */ id_len++; /* Identifier Length */ diff --git a/drivers/thermal/int340x_thermal/int3400_thermal.c b/drivers/thermal/int340x_thermal/int3400_thermal.c index 5836e5554433..d4c374cc4f74 100644 --- a/drivers/thermal/int340x_thermal/int3400_thermal.c +++ b/drivers/thermal/int340x_thermal/int3400_thermal.c @@ -20,6 +20,13 @@ enum int3400_thermal_uuid { INT3400_THERMAL_PASSIVE_1, INT3400_THERMAL_ACTIVE, INT3400_THERMAL_CRITICAL, + INT3400_THERMAL_ADAPTIVE_PERFORMANCE, + INT3400_THERMAL_EMERGENCY_CALL_MODE, + INT3400_THERMAL_PASSIVE_2, + INT3400_THERMAL_POWER_BOSS, + INT3400_THERMAL_VIRTUAL_SENSOR, + INT3400_THERMAL_COOLING_MODE, + INT3400_THERMAL_HARDWARE_DUTY_CYCLING, INT3400_THERMAL_MAXIMUM_UUID, }; @@ -27,6 +34,13 @@ static u8 *int3400_thermal_uuids[INT3400_THERMAL_MAXIMUM_UUID] = { "42A441D6-AE6A-462b-A84B-4A8CE79027D3", "3A95C389-E4B8-4629-A526-C52C88626BAE", "97C68AE7-15FA-499c-B8C9-5DA81D606E0A", + "63BE270F-1C11-48FD-A6F7-3AF253FF3E2D", + "5349962F-71E6-431D-9AE8-0A635B710AEE", + "9E04115A-AE87-4D1C-9500-0F3E340BFE75", + "F5A35014-C209-46A4-993A-EB56DE7530A1", + "6ED722A7-9240-48A5-B479-31EEF723D7CF", + "16CAF1B7-DD38-40ED-B1C1-1B8A1913D531", + "BE84BABF-C4D4-403D-B495-3128FD44dAC1", }; struct int3400_thermal_priv { @@ -271,10 +285,9 @@ static int int3400_thermal_probe(struct platform_device *pdev) platform_set_drvdata(pdev, priv); - if (priv->uuid_bitmap & 1 << INT3400_THERMAL_PASSIVE_1) { - int3400_thermal_ops.get_mode = int3400_thermal_get_mode; - int3400_thermal_ops.set_mode = int3400_thermal_set_mode; - } + int3400_thermal_ops.get_mode = int3400_thermal_get_mode; + int3400_thermal_ops.set_mode = int3400_thermal_set_mode; + priv->thermal = thermal_zone_device_register("INT3400 Thermal", 0, 0, priv, &int3400_thermal_ops, &int3400_thermal_params, 0, 0); diff --git a/drivers/thermal/int340x_thermal/processor_thermal_device.c b/drivers/thermal/int340x_thermal/processor_thermal_device.c index ccc0ad02d066..7f374ab5b176 100644 --- a/drivers/thermal/int340x_thermal/processor_thermal_device.c +++ b/drivers/thermal/int340x_thermal/processor_thermal_device.c @@ -363,7 +363,7 @@ static int proc_thermal_pci_probe(struct pci_dev *pdev, proc_priv->soc_dts = intel_soc_dts_iosf_init( INTEL_SOC_DTS_INTERRUPT_MSI, 2, 0); - if (proc_priv->soc_dts && pdev->irq) { + if (!IS_ERR(proc_priv->soc_dts) && pdev->irq) { ret = pci_enable_msi(pdev); if (!ret) { ret = request_threaded_irq(pdev->irq, NULL, diff --git a/drivers/thermal/thermal_hwmon.h b/drivers/thermal/thermal_hwmon.h index c798fdb2ae43..f97f76691bd0 100644 --- a/drivers/thermal/thermal_hwmon.h +++ b/drivers/thermal/thermal_hwmon.h @@ -34,13 +34,13 @@ int thermal_add_hwmon_sysfs(struct thermal_zone_device *tz); void thermal_remove_hwmon_sysfs(struct thermal_zone_device *tz); #else -static int +static inline int thermal_add_hwmon_sysfs(struct thermal_zone_device *tz) { return 0; } -static void +static inline void thermal_remove_hwmon_sysfs(struct thermal_zone_device *tz) { } diff --git a/drivers/tty/Kconfig b/drivers/tty/Kconfig index 4537abf6425d..52e49524645f 100644 --- a/drivers/tty/Kconfig +++ b/drivers/tty/Kconfig @@ -473,4 +473,27 @@ config MIPS_EJTAG_FDC_KGDB_CHAN help FDC channel number to use for KGDB. +config LDISC_AUTOLOAD + bool "Automatically load TTY Line Disciplines" + default y + help + Historically the kernel has always automatically loaded any + line discipline that is in a kernel module when a user asks + for it to be loaded with the TIOCSETD ioctl, or through other + means. This is not always the best thing to do on systems + where you know you will not be using some of the more + "ancient" line disciplines, so prevent the kernel from doing + this unless the request is coming from a process with the + CAP_SYS_MODULE permissions. + + Say 'Y' here if you trust your userspace users to do the right + thing, or if you have only provided the line disciplines that + you know you will be using, or if you wish to continue to use + the traditional method of on-demand loading of these modules + by any user. + + This functionality can be changed at runtime with the + dev.tty.ldisc_autoload sysctl, this configuration option will + only set the default value of this functionality. + endif # TTY diff --git a/drivers/tty/n_hdlc.c b/drivers/tty/n_hdlc.c index 6d1e2f746ab4..8d6253903f24 100644 --- a/drivers/tty/n_hdlc.c +++ b/drivers/tty/n_hdlc.c @@ -598,6 +598,7 @@ static ssize_t n_hdlc_tty_read(struct tty_struct *tty, struct file *file, /* too large for caller's buffer */ ret = -EOVERFLOW; } else { + __set_current_state(TASK_RUNNING); if (copy_to_user(buf, rbuf->buf, rbuf->count)) ret = -EFAULT; else diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c index 746c76b358a0..b032add92722 100644 --- a/drivers/tty/serial/8250/8250_pci.c +++ b/drivers/tty/serial/8250/8250_pci.c @@ -2326,6 +2326,111 @@ static struct pci_serial_quirk pci_serial_quirks[] __refdata = { .setup = pci_default_setup, .exit = pci_plx9050_exit, }, + { + .vendor = PCI_VENDOR_ID_ACCESIO, + .device = PCI_DEVICE_ID_ACCESIO_PCIE_COM_4SDB, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .setup = pci_pericom_setup, + }, + { + .vendor = PCI_VENDOR_ID_ACCESIO, + .device = PCI_DEVICE_ID_ACCESIO_MPCIE_COM_4S, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .setup = pci_pericom_setup, + }, + { + .vendor = PCI_VENDOR_ID_ACCESIO, + .device = PCI_DEVICE_ID_ACCESIO_PCIE_COM232_4DB, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .setup = pci_pericom_setup, + }, + { + .vendor = PCI_VENDOR_ID_ACCESIO, + .device = PCI_DEVICE_ID_ACCESIO_MPCIE_COM232_4, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .setup = pci_pericom_setup, + }, + { + .vendor = PCI_VENDOR_ID_ACCESIO, + .device = PCI_DEVICE_ID_ACCESIO_PCIE_COM_4SMDB, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .setup = pci_pericom_setup, + }, + { + .vendor = PCI_VENDOR_ID_ACCESIO, + .device = PCI_DEVICE_ID_ACCESIO_MPCIE_COM_4SM, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .setup = pci_pericom_setup, + }, + { + .vendor = PCI_VENDOR_ID_ACCESIO, + .device = PCI_DEVICE_ID_ACCESIO_MPCIE_ICM422_4, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .setup = pci_pericom_setup, + }, + { + .vendor = PCI_VENDOR_ID_ACCESIO, + .device = PCI_DEVICE_ID_ACCESIO_MPCIE_ICM485_4, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .setup = pci_pericom_setup, + }, + { + .vendor = PCI_DEVICE_ID_ACCESIO_PCIE_ICM_4S, + .device = PCI_DEVICE_ID_ACCESIO_PCIE_ICM232_4, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .setup = pci_pericom_setup, + }, + { + .vendor = PCI_VENDOR_ID_ACCESIO, + .device = PCI_DEVICE_ID_ACCESIO_MPCIE_ICM232_4, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .setup = pci_pericom_setup, + }, + { + .vendor = PCI_VENDOR_ID_ACCESIO, + .device = PCI_DEVICE_ID_ACCESIO_PCIE_COM422_4, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .setup = pci_pericom_setup, + }, + { + .vendor = PCI_VENDOR_ID_ACCESIO, + .device = PCI_DEVICE_ID_ACCESIO_PCIE_COM485_4, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .setup = pci_pericom_setup, + }, + { + .vendor = PCI_VENDOR_ID_ACCESIO, + .device = PCI_DEVICE_ID_ACCESIO_PCIE_COM232_4, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .setup = pci_pericom_setup, + }, + { + .vendor = PCI_VENDOR_ID_ACCESIO, + .device = PCI_DEVICE_ID_ACCESIO_PCIE_COM_4SM, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .setup = pci_pericom_setup, + }, + { + .vendor = PCI_VENDOR_ID_ACCESIO, + .device = PCI_DEVICE_ID_ACCESIO_PCIE_ICM_4SM, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .setup = pci_pericom_setup, + }, /* * SBS Technologies, Inc., PMC-OCTALPRO 232 */ @@ -5176,10 +5281,10 @@ static struct pci_device_id serial_pci_tbl[] = { */ { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM_2SDB, PCI_ANY_ID, PCI_ANY_ID, 0, 0, - pbn_pericom_PI7C9X7954 }, + pbn_pericom_PI7C9X7952 }, { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_COM_2S, PCI_ANY_ID, PCI_ANY_ID, 0, 0, - pbn_pericom_PI7C9X7954 }, + pbn_pericom_PI7C9X7952 }, { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM_4SDB, PCI_ANY_ID, PCI_ANY_ID, 0, 0, pbn_pericom_PI7C9X7954 }, @@ -5188,10 +5293,10 @@ static struct pci_device_id serial_pci_tbl[] = { pbn_pericom_PI7C9X7954 }, { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM232_2DB, PCI_ANY_ID, PCI_ANY_ID, 0, 0, - pbn_pericom_PI7C9X7954 }, + pbn_pericom_PI7C9X7952 }, { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_COM232_2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, - pbn_pericom_PI7C9X7954 }, + pbn_pericom_PI7C9X7952 }, { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM232_4DB, PCI_ANY_ID, PCI_ANY_ID, 0, 0, pbn_pericom_PI7C9X7954 }, @@ -5200,10 +5305,10 @@ static struct pci_device_id serial_pci_tbl[] = { pbn_pericom_PI7C9X7954 }, { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM_2SMDB, PCI_ANY_ID, PCI_ANY_ID, 0, 0, - pbn_pericom_PI7C9X7954 }, + pbn_pericom_PI7C9X7952 }, { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_COM_2SM, PCI_ANY_ID, PCI_ANY_ID, 0, 0, - pbn_pericom_PI7C9X7954 }, + pbn_pericom_PI7C9X7952 }, { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM_4SMDB, PCI_ANY_ID, PCI_ANY_ID, 0, 0, pbn_pericom_PI7C9X7954 }, @@ -5212,13 +5317,13 @@ static struct pci_device_id serial_pci_tbl[] = { pbn_pericom_PI7C9X7954 }, { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_ICM485_1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, - pbn_pericom_PI7C9X7954 }, + pbn_pericom_PI7C9X7951 }, { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_ICM422_2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, - pbn_pericom_PI7C9X7954 }, + pbn_pericom_PI7C9X7952 }, { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_ICM485_2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, - pbn_pericom_PI7C9X7954 }, + pbn_pericom_PI7C9X7952 }, { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_ICM422_4, PCI_ANY_ID, PCI_ANY_ID, 0, 0, pbn_pericom_PI7C9X7954 }, @@ -5227,16 +5332,16 @@ static struct pci_device_id serial_pci_tbl[] = { pbn_pericom_PI7C9X7954 }, { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_ICM_2S, PCI_ANY_ID, PCI_ANY_ID, 0, 0, - pbn_pericom_PI7C9X7954 }, + pbn_pericom_PI7C9X7952 }, { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_ICM_4S, PCI_ANY_ID, PCI_ANY_ID, 0, 0, pbn_pericom_PI7C9X7954 }, { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_ICM232_2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, - pbn_pericom_PI7C9X7954 }, + pbn_pericom_PI7C9X7952 }, { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_ICM232_2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, - pbn_pericom_PI7C9X7954 }, + pbn_pericom_PI7C9X7952 }, { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_ICM232_4, PCI_ANY_ID, PCI_ANY_ID, 0, 0, pbn_pericom_PI7C9X7954 }, @@ -5245,13 +5350,13 @@ static struct pci_device_id serial_pci_tbl[] = { pbn_pericom_PI7C9X7954 }, { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_ICM_2SM, PCI_ANY_ID, PCI_ANY_ID, 0, 0, - pbn_pericom_PI7C9X7954 }, + pbn_pericom_PI7C9X7952 }, { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM422_4, PCI_ANY_ID, PCI_ANY_ID, 0, 0, - pbn_pericom_PI7C9X7958 }, + pbn_pericom_PI7C9X7954 }, { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM485_4, PCI_ANY_ID, PCI_ANY_ID, 0, 0, - pbn_pericom_PI7C9X7958 }, + pbn_pericom_PI7C9X7954 }, { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM422_8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, pbn_pericom_PI7C9X7958 }, @@ -5260,19 +5365,19 @@ static struct pci_device_id serial_pci_tbl[] = { pbn_pericom_PI7C9X7958 }, { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM232_4, PCI_ANY_ID, PCI_ANY_ID, 0, 0, - pbn_pericom_PI7C9X7958 }, + pbn_pericom_PI7C9X7954 }, { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM232_8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, pbn_pericom_PI7C9X7958 }, { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM_4SM, PCI_ANY_ID, PCI_ANY_ID, 0, 0, - pbn_pericom_PI7C9X7958 }, + pbn_pericom_PI7C9X7954 }, { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM_8SM, PCI_ANY_ID, PCI_ANY_ID, 0, 0, pbn_pericom_PI7C9X7958 }, { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_ICM_4SM, PCI_ANY_ID, PCI_ANY_ID, 0, 0, - pbn_pericom_PI7C9X7958 }, + pbn_pericom_PI7C9X7954 }, /* * Topic TP560 Data/Fax/Voice 56k modem (reported by Evan Clarke) */ diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c index e0277cf0bf58..fc46c8cf5fcd 100644 --- a/drivers/tty/serial/atmel_serial.c +++ b/drivers/tty/serial/atmel_serial.c @@ -169,6 +169,8 @@ struct atmel_uart_port { unsigned int pending_status; spinlock_t lock_suspended; + bool hd_start_rx; /* can start RX during half-duplex operation */ + int (*prepare_rx)(struct uart_port *port); int (*prepare_tx)(struct uart_port *port); void (*schedule_rx)(struct uart_port *port); @@ -237,6 +239,12 @@ static inline void atmel_uart_write_char(struct uart_port *port, u8 value) #endif +static inline int atmel_uart_is_half_duplex(struct uart_port *port) +{ + return (port->rs485.flags & SER_RS485_ENABLED) && + !(port->rs485.flags & SER_RS485_RX_DURING_TX); +} + #ifdef CONFIG_SERIAL_ATMEL_PDC static bool atmel_use_pdc_rx(struct uart_port *port) { @@ -481,9 +489,9 @@ static void atmel_stop_tx(struct uart_port *port) /* Disable interrupts */ atmel_uart_writel(port, ATMEL_US_IDR, atmel_port->tx_done_mask); - if ((port->rs485.flags & SER_RS485_ENABLED) && - !(port->rs485.flags & SER_RS485_RX_DURING_TX)) + if (atmel_uart_is_half_duplex(port)) atmel_start_rx(port); + } /* @@ -500,8 +508,7 @@ static void atmel_start_tx(struct uart_port *port) return; if (atmel_use_pdc_tx(port) || atmel_use_dma_tx(port)) - if ((port->rs485.flags & SER_RS485_ENABLED) && - !(port->rs485.flags & SER_RS485_RX_DURING_TX)) + if (atmel_uart_is_half_duplex(port)) atmel_stop_rx(port); if (atmel_use_pdc_tx(port)) @@ -810,10 +817,14 @@ static void atmel_complete_tx_dma(void *arg) */ if (!uart_circ_empty(xmit)) tasklet_schedule(&atmel_port->tasklet); - else if ((port->rs485.flags & SER_RS485_ENABLED) && - !(port->rs485.flags & SER_RS485_RX_DURING_TX)) { - /* DMA done, stop TX, start RX for RS485 */ - atmel_start_rx(port); + else if (atmel_uart_is_half_duplex(port)) { + /* + * DMA done, re-enable TXEMPTY and signal that we can stop + * TX and start RX for RS485 + */ + atmel_port->hd_start_rx = true; + atmel_uart_writel(port, ATMEL_US_IER, + atmel_port->tx_done_mask); } spin_unlock_irqrestore(&port->lock, flags); @@ -1167,6 +1178,10 @@ static int atmel_prepare_rx_dma(struct uart_port *port) sg_dma_len(&atmel_port->sg_rx)/2, DMA_DEV_TO_MEM, DMA_PREP_INTERRUPT); + if (!desc) { + dev_err(port->dev, "Preparing DMA cyclic failed\n"); + goto chan_err; + } desc->callback = atmel_complete_rx_dma; desc->callback_param = port; atmel_port->desc_rx = desc; @@ -1249,9 +1264,20 @@ atmel_handle_transmit(struct uart_port *port, unsigned int pending) struct atmel_uart_port *atmel_port = to_atmel_uart_port(port); if (pending & atmel_port->tx_done_mask) { - /* Either PDC or interrupt transmission */ atmel_uart_writel(port, ATMEL_US_IDR, atmel_port->tx_done_mask); + + /* Start RX if flag was set and FIFO is empty */ + if (atmel_port->hd_start_rx) { + if (!(atmel_uart_readl(port, ATMEL_US_CSR) + & ATMEL_US_TXEMPTY)) + dev_warn(port->dev, "Should start RX, but TX fifo is not empty\n"); + + atmel_port->hd_start_rx = false; + atmel_start_rx(port); + return; + } + tasklet_schedule(&atmel_port->tasklet); } } @@ -1384,8 +1410,7 @@ static void atmel_tx_pdc(struct uart_port *port) atmel_uart_writel(port, ATMEL_US_IER, atmel_port->tx_done_mask); } else { - if ((port->rs485.flags & SER_RS485_ENABLED) && - !(port->rs485.flags & SER_RS485_RX_DURING_TX)) { + if (atmel_uart_is_half_duplex(port)) { /* DMA done, stop TX, start RX for RS485 */ atmel_start_rx(port); } diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c index 01e2274b23f2..1544a7cc76ff 100644 --- a/drivers/tty/serial/fsl_lpuart.c +++ b/drivers/tty/serial/fsl_lpuart.c @@ -1267,6 +1267,8 @@ lpuart_set_termios(struct uart_port *port, struct ktermios *termios, else cr1 &= ~UARTCR1_PT; } + } else { + cr1 &= ~UARTCR1_PE; } /* ask the core to calculate the divisor */ @@ -1402,10 +1404,12 @@ lpuart32_set_termios(struct uart_port *port, struct ktermios *termios, else ctrl &= ~UARTCTRL_PT; } + } else { + ctrl &= ~UARTCTRL_PE; } /* ask the core to calculate the divisor */ - baud = uart_get_baud_rate(port, termios, old, 50, port->uartclk / 16); + baud = uart_get_baud_rate(port, termios, old, 50, port->uartclk / 4); spin_lock_irqsave(&sport->port.lock, flags); diff --git a/drivers/tty/serial/kgdboc.c b/drivers/tty/serial/kgdboc.c index f2b0d8cee8ef..0314e78e31ff 100644 --- a/drivers/tty/serial/kgdboc.c +++ b/drivers/tty/serial/kgdboc.c @@ -148,8 +148,10 @@ static int configure_kgdboc(void) char *cptr = config; struct console *cons; - if (!strlen(config) || isspace(config[0])) + if (!strlen(config) || isspace(config[0])) { + err = 0; goto noconfig; + } kgdboc_io_ops.is_console = 0; kgdb_tty_driver = NULL; diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c index 3f98165b479c..ab516a90c89f 100644 --- a/drivers/tty/serial/max310x.c +++ b/drivers/tty/serial/max310x.c @@ -1306,6 +1306,8 @@ static int max310x_spi_probe(struct spi_device *spi) if (spi->dev.of_node) { const struct of_device_id *of_id = of_match_device(max310x_dt_ids, &spi->dev); + if (!of_id) + return -ENODEV; devtype = (struct max310x_devtype *)of_id->data; } else { diff --git a/drivers/tty/serial/samsung.c b/drivers/tty/serial/samsung.c index 4d532a085db9..12bac2cbae4b 100644 --- a/drivers/tty/serial/samsung.c +++ b/drivers/tty/serial/samsung.c @@ -1329,11 +1329,14 @@ static void s3c24xx_serial_set_termios(struct uart_port *port, wr_regl(port, S3C2410_ULCON, ulcon); wr_regl(port, S3C2410_UBRDIV, quot); + port->status &= ~UPSTAT_AUTOCTS; + umcon = rd_regl(port, S3C2410_UMCON); if (termios->c_cflag & CRTSCTS) { umcon |= S3C2410_UMCOM_AFC; /* Disable RTS when RX FIFO contains 63 bytes */ umcon &= ~S3C2412_UMCON_AFC_8; + port->status = UPSTAT_AUTOCTS; } else { umcon &= ~S3C2410_UMCOM_AFC; } diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c index b63920481b1d..669134e27ed9 100644 --- a/drivers/tty/serial/sh-sci.c +++ b/drivers/tty/serial/sh-sci.c @@ -746,19 +746,9 @@ static void sci_transmit_chars(struct uart_port *port) if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) uart_write_wakeup(port); - if (uart_circ_empty(xmit)) { + if (uart_circ_empty(xmit)) sci_stop_tx(port); - } else { - ctrl = serial_port_in(port, SCSCR); - if (port->type != PORT_SCI) { - serial_port_in(port, SCxSR); /* Dummy read */ - sci_clear_SCxSR(port, SCxSR_TDxE_CLEAR(port)); - } - - ctrl |= SCSCR_TIE; - serial_port_out(port, SCSCR, ctrl); - } } /* On SH3, SCIF may read end-of-break as a space->mark char */ diff --git a/drivers/tty/serial/sprd_serial.c b/drivers/tty/serial/sprd_serial.c index 1e302caaa450..c894eca57e73 100644 --- a/drivers/tty/serial/sprd_serial.c +++ b/drivers/tty/serial/sprd_serial.c @@ -36,7 +36,7 @@ #define SPRD_FIFO_SIZE 128 #define SPRD_DEF_RATE 26000000 #define SPRD_BAUD_IO_LIMIT 3000000 -#define SPRD_TIMEOUT 256 +#define SPRD_TIMEOUT 256000 /* the offset of serial registers and BITs for them */ /* data registers */ @@ -63,6 +63,7 @@ /* interrupt clear register */ #define SPRD_ICLR 0x0014 +#define SPRD_ICLR_TIMEOUT BIT(13) /* line control register */ #define SPRD_LCR 0x0018 @@ -298,7 +299,8 @@ static irqreturn_t sprd_handle_irq(int irq, void *dev_id) return IRQ_NONE; } - serial_out(port, SPRD_ICLR, ~0); + if (ims & SPRD_IMSR_TIMEOUT) + serial_out(port, SPRD_ICLR, SPRD_ICLR_TIMEOUT); if (ims & (SPRD_IMSR_RX_FIFO_FULL | SPRD_IMSR_BREAK_DETECT | SPRD_IMSR_TIMEOUT)) diff --git a/drivers/tty/serial/suncore.c b/drivers/tty/serial/suncore.c index 127472bd6a7c..209f314745ab 100644 --- a/drivers/tty/serial/suncore.c +++ b/drivers/tty/serial/suncore.c @@ -111,6 +111,7 @@ void sunserial_console_termios(struct console *con, struct device_node *uart_dp) mode = of_get_property(dp, mode_prop, NULL); if (!mode) mode = "9600,8,n,1,-"; + of_node_put(dp); } cflag = CREAD | HUPCL | CLOCAL; diff --git a/drivers/tty/serial/xilinx_uartps.c b/drivers/tty/serial/xilinx_uartps.c index 4f2f4aca8d2e..06efcef1b495 100644 --- a/drivers/tty/serial/xilinx_uartps.c +++ b/drivers/tty/serial/xilinx_uartps.c @@ -1145,7 +1145,7 @@ static void cdns_uart_console_write(struct console *co, const char *s, * * Return: 0 on success, negative errno otherwise. */ -static int __init cdns_uart_console_setup(struct console *co, char *options) +static int cdns_uart_console_setup(struct console *co, char *options) { struct uart_port *port = &cdns_uart_port[co->index]; int baud = 9600; diff --git a/drivers/tty/tty_buffer.c b/drivers/tty/tty_buffer.c index 355e9cad680d..4706df20191b 100644 --- a/drivers/tty/tty_buffer.c +++ b/drivers/tty/tty_buffer.c @@ -25,7 +25,7 @@ * Byte threshold to limit memory consumption for flip buffers. * The actual memory limit is > 2x this amount. */ -#define TTYB_DEFAULT_MEM_LIMIT 65536 +#define TTYB_DEFAULT_MEM_LIMIT (640 * 1024UL) /* * We default to dicing tty buffer allocations to this many characters diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index 861c44ca71ed..5b64bb5280fb 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -513,6 +513,8 @@ void proc_clear_tty(struct task_struct *p) tty_kref_put(tty); } +extern void tty_sysctl_init(void); + /** * proc_set_tty - set the controlling terminal * @@ -2303,7 +2305,8 @@ static int tiocsti(struct tty_struct *tty, char __user *p) return -EFAULT; tty_audit_tiocsti(tty, ch); ld = tty_ldisc_ref_wait(tty); - ld->ops->receive_buf(tty, &ch, &mbz, 1); + if (ld->ops->receive_buf) + ld->ops->receive_buf(tty, &ch, &mbz, 1); tty_ldisc_deref(ld); return 0; } @@ -3694,6 +3697,7 @@ void console_sysfs_notify(void) */ int __init tty_init(void) { + tty_sysctl_init(); cdev_init(&tty_cdev, &tty_fops); if (cdev_add(&tty_cdev, MKDEV(TTYAUX_MAJOR, 0), 1) || register_chrdev_region(MKDEV(TTYAUX_MAJOR, 0), 1, "/dev/tty") < 0) diff --git a/drivers/tty/tty_ldisc.c b/drivers/tty/tty_ldisc.c index eb1b44c87b6c..10f2736c53d7 100644 --- a/drivers/tty/tty_ldisc.c +++ b/drivers/tty/tty_ldisc.c @@ -148,6 +148,13 @@ static void put_ldops(struct tty_ldisc_ops *ldops) * takes tty_ldiscs_lock to guard against ldisc races */ +#if defined(CONFIG_LDISC_AUTOLOAD) + #define INITIAL_AUTOLOAD_STATE 1 +#else + #define INITIAL_AUTOLOAD_STATE 0 +#endif +static int tty_ldisc_autoload = INITIAL_AUTOLOAD_STATE; + static struct tty_ldisc *tty_ldisc_get(struct tty_struct *tty, int disc) { struct tty_ldisc *ld; @@ -162,6 +169,8 @@ static struct tty_ldisc *tty_ldisc_get(struct tty_struct *tty, int disc) */ ldops = get_ldops(disc); if (IS_ERR(ldops)) { + if (!capable(CAP_SYS_MODULE) && !tty_ldisc_autoload) + return ERR_PTR(-EPERM); request_module("tty-ldisc-%d", disc); ldops = get_ldops(disc); if (IS_ERR(ldops)) @@ -830,3 +839,41 @@ void tty_ldisc_begin(void) /* Setup the default TTY line discipline. */ (void) tty_register_ldisc(N_TTY, &tty_ldisc_N_TTY); } + +static int zero; +static int one = 1; +static struct ctl_table tty_table[] = { + { + .procname = "ldisc_autoload", + .data = &tty_ldisc_autoload, + .maxlen = sizeof(tty_ldisc_autoload), + .mode = 0644, + .proc_handler = proc_dointvec, + .extra1 = &zero, + .extra2 = &one, + }, + { } +}; + +static struct ctl_table tty_dir_table[] = { + { + .procname = "tty", + .mode = 0555, + .child = tty_table, + }, + { } +}; + +static struct ctl_table tty_root_table[] = { + { + .procname = "dev", + .mode = 0555, + .child = tty_dir_table, + }, + { } +}; + +void tty_sysctl_init(void) +{ + register_sysctl_table(tty_root_table); +} diff --git a/drivers/tty/tty_ldsem.c b/drivers/tty/tty_ldsem.c index ad7eba5ca380..34234c233851 100644 --- a/drivers/tty/tty_ldsem.c +++ b/drivers/tty/tty_ldsem.c @@ -307,6 +307,16 @@ down_write_failed(struct ld_semaphore *sem, long count, long timeout) if (!locked) ldsem_atomic_update(-LDSEM_WAIT_BIAS, sem); list_del(&waiter.list); + + /* + * In case of timeout, wake up every reader who gave the right of way + * to writer. Prevent separation readers into two groups: + * one that helds semaphore and another that sleeps. + * (in case of no contention with a writer) + */ + if (!locked && list_empty(&sem->write_wait)) + __ldsem_wake_readers(sem); + raw_spin_unlock_irq(&sem->wait_lock); __set_task_state(tsk, TASK_RUNNING); diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index ff3286fc22d8..6779f733bb83 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -958,6 +958,7 @@ static int vc_do_resize(struct tty_struct *tty, struct vc_data *vc, if (CON_IS_VISIBLE(vc)) update_screen(vc); vt_event_post(VT_EVENT_RESIZE, vc->vc_num, vc->vc_num); + notify_update(vc); return err; } diff --git a/drivers/usb/chipidea/core.c b/drivers/usb/chipidea/core.c index 57ee43512992..dee22d8effda 100644 --- a/drivers/usb/chipidea/core.c +++ b/drivers/usb/chipidea/core.c @@ -913,8 +913,15 @@ static int ci_hdrc_probe(struct platform_device *pdev) } else if (ci->platdata->usb_phy) { ci->usb_phy = ci->platdata->usb_phy; } else { + ci->usb_phy = devm_usb_get_phy_by_phandle(dev->parent, "phys", + 0); ci->phy = devm_phy_get(dev->parent, "usb-phy"); - ci->usb_phy = devm_usb_get_phy(dev->parent, USB_PHY_TYPE_USB2); + + /* Fallback to grabbing any registered USB2 PHY */ + if (IS_ERR(ci->usb_phy) && + PTR_ERR(ci->usb_phy) != -EPROBE_DEFER) + ci->usb_phy = devm_usb_get_phy(dev->parent, + USB_PHY_TYPE_USB2); /* if both generic PHY and USB PHY layers aren't enabled */ if (PTR_ERR(ci->phy) == -ENOSYS && diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 0a8e5ac891d4..736de1021d8b 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -507,6 +507,13 @@ static int acm_tty_install(struct tty_driver *driver, struct tty_struct *tty) if (retval) goto error_init_termios; + /* + * Suppress initial echoing for some devices which might send data + * immediately after acm driver has been installed. + */ + if (acm->quirks & DISABLE_ECHO) + tty->termios.c_lflag &= ~ECHO; + tty->driver_data = acm; return 0; @@ -1677,6 +1684,9 @@ static const struct usb_device_id acm_ids[] = { { USB_DEVICE(0x0e8d, 0x0003), /* FIREFLY, MediaTek Inc; andrey.arapov@gmail.com */ .driver_info = NO_UNION_NORMAL, /* has no union descriptor */ }, + { USB_DEVICE(0x0e8d, 0x2000), /* MediaTek Inc Preloader */ + .driver_info = DISABLE_ECHO, /* DISABLE ECHO in termios flag */ + }, { USB_DEVICE(0x0e8d, 0x3329), /* MediaTek Inc GPS */ .driver_info = NO_UNION_NORMAL, /* has no union descriptor */ }, @@ -1875,6 +1885,13 @@ static const struct usb_device_id acm_ids[] = { .driver_info = IGNORE_DEVICE, }, + { USB_DEVICE(0x1bc7, 0x0021), /* Telit 3G ACM only composition */ + .driver_info = SEND_ZERO_PACKET, + }, + { USB_DEVICE(0x1bc7, 0x0023), /* Telit 3G ACM + ECM composition */ + .driver_info = SEND_ZERO_PACKET, + }, + /* control interfaces without any protocol set */ { USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ACM, USB_CDC_PROTO_NONE) }, diff --git a/drivers/usb/class/cdc-acm.h b/drivers/usb/class/cdc-acm.h index b30ac5fcde68..1ad9ff9f493d 100644 --- a/drivers/usb/class/cdc-acm.h +++ b/drivers/usb/class/cdc-acm.h @@ -134,3 +134,4 @@ struct acm { #define QUIRK_CONTROL_LINE_STATE BIT(6) #define CLEAR_HALT_CONDITIONS BIT(7) #define SEND_ZERO_PACKET BIT(8) +#define DISABLE_ECHO BIT(9) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 1e5f55118d42..df20d5634d51 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -1092,6 +1092,16 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type) USB_PORT_FEAT_ENABLE); } + /* + * Add debounce if USB3 link is in polling/link training state. + * Link will automatically transition to Enabled state after + * link training completes. + */ + if (hub_is_superspeed(hdev) && + ((portstatus & USB_PORT_STAT_LINK_STATE) == + USB_SS_PORT_LS_POLLING)) + need_debounce_delay = true; + /* Clear status-change flags; we'll debounce later */ if (portchange & USB_PORT_STAT_C_CONNECTION) { need_debounce_delay = true; diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index f0cd76c667a1..a717e5541975 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -251,7 +251,8 @@ static const struct usb_device_id usb_quirk_list[] = { USB_QUIRK_LINEAR_UFRAME_INTR_BINTERVAL }, /* Corsair K70 RGB */ - { USB_DEVICE(0x1b1c, 0x1b13), .driver_info = USB_QUIRK_DELAY_INIT }, + { USB_DEVICE(0x1b1c, 0x1b13), .driver_info = USB_QUIRK_DELAY_INIT | + USB_QUIRK_DELAY_CTRL_MSG }, /* Corsair Strafe */ { USB_DEVICE(0x1b1c, 0x1b15), .driver_info = USB_QUIRK_DELAY_INIT | diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index 3b2863a697a4..a29ba1d423dc 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -1823,6 +1823,8 @@ unknown: break; case USB_RECIP_ENDPOINT: + if (!cdev->config) + break; endp = ((w_index & 0x80) >> 3) | (w_index & 0x0f); list_for_each_entry(f, &cdev->config->functions, list) { if (test_bit(endp, f->endpoints)) diff --git a/drivers/usb/gadget/function/f_sourcesink.c b/drivers/usb/gadget/function/f_sourcesink.c index 67b243989938..d7d095781be1 100644 --- a/drivers/usb/gadget/function/f_sourcesink.c +++ b/drivers/usb/gadget/function/f_sourcesink.c @@ -849,7 +849,7 @@ static struct usb_function *source_sink_alloc_func( ss = kzalloc(sizeof(*ss), GFP_KERNEL); if (!ss) - return NULL; + return ERR_PTR(-ENOMEM); ss_opts = container_of(fi, struct f_ss_opts, func_inst); diff --git a/drivers/usb/gadget/function/rndis.c b/drivers/usb/gadget/function/rndis.c index 2ec7171b3f04..4dba794a6ad5 100644 --- a/drivers/usb/gadget/function/rndis.c +++ b/drivers/usb/gadget/function/rndis.c @@ -690,9 +690,8 @@ static int rndis_reset_response(struct rndis_params *params, { rndis_reset_cmplt_type *resp; rndis_resp_t *r; - - u32 length; u8 *xbuf; + u32 length; /* drain the response queue */ while ((xbuf = rndis_get_next_response(params, &length))) diff --git a/drivers/usb/gadget/function/u_serial.c b/drivers/usb/gadget/function/u_serial.c index 4ea44f7122ee..d73618475664 100644 --- a/drivers/usb/gadget/function/u_serial.c +++ b/drivers/usb/gadget/function/u_serial.c @@ -361,10 +361,15 @@ __acquires(&port->port_lock) */ { struct list_head *pool = &port->write_pool; - struct usb_ep *in = port->port_usb->in; + struct usb_ep *in; int status = 0; bool do_tty_wake = false; + if (!port->port_usb) + return status; + + in = port->port_usb->in; + while (!port->write_busy && !list_empty(pool)) { struct usb_request *req; int len; diff --git a/drivers/usb/gadget/udc/net2272.c b/drivers/usb/gadget/udc/net2272.c index 18f5ebd447b8..3b6e34fc032b 100644 --- a/drivers/usb/gadget/udc/net2272.c +++ b/drivers/usb/gadget/udc/net2272.c @@ -2100,7 +2100,7 @@ static irqreturn_t net2272_irq(int irq, void *_dev) #if defined(PLX_PCI_RDK2) /* see if PCI int for us by checking irqstat */ intcsr = readl(dev->rdk2.fpga_base_addr + RDK2_IRQSTAT); - if (!intcsr & (1 << NET2272_PCI_IRQ)) { + if (!(intcsr & (1 << NET2272_PCI_IRQ))) { spin_unlock(&dev->lock); return IRQ_NONE; } diff --git a/drivers/usb/gadget/udc/omap_udc.c b/drivers/usb/gadget/udc/omap_udc.c index 9b7d39484ed3..d1ed92acafa3 100644 --- a/drivers/usb/gadget/udc/omap_udc.c +++ b/drivers/usb/gadget/udc/omap_udc.c @@ -2037,6 +2037,7 @@ static inline int machine_without_vbus_sense(void) { return machine_is_omap_innovator() || machine_is_omap_osk() + || machine_is_omap_palmte() || machine_is_sx1() /* No known omap7xx boards with vbus sense */ || cpu_is_omap7xx(); @@ -2045,7 +2046,7 @@ static inline int machine_without_vbus_sense(void) static int omap_udc_start(struct usb_gadget *g, struct usb_gadget_driver *driver) { - int status = -ENODEV; + int status; struct omap_ep *ep; unsigned long flags; @@ -2083,6 +2084,7 @@ static int omap_udc_start(struct usb_gadget *g, goto done; } } else { + status = 0; if (can_pullup(udc)) pullup_enable(udc); else @@ -2612,9 +2614,22 @@ omap_ep_setup(char *name, u8 addr, u8 type, static void omap_udc_release(struct device *dev) { - complete(udc->done); + pullup_disable(udc); + if (!IS_ERR_OR_NULL(udc->transceiver)) { + usb_put_phy(udc->transceiver); + udc->transceiver = NULL; + } + omap_writew(0, UDC_SYSCON1); + remove_proc_file(); + if (udc->dc_clk) { + if (udc->clk_requested) + omap_udc_enable_clock(0); + clk_put(udc->hhc_clk); + clk_put(udc->dc_clk); + } + if (udc->done) + complete(udc->done); kfree(udc); - udc = NULL; } static int @@ -2886,8 +2901,8 @@ bad_on_1710: udc->clr_halt = UDC_RESET_EP; /* USB general purpose IRQ: ep0, state changes, dma, etc */ - status = request_irq(pdev->resource[1].start, omap_udc_irq, - 0, driver_name, udc); + status = devm_request_irq(&pdev->dev, pdev->resource[1].start, + omap_udc_irq, 0, driver_name, udc); if (status != 0) { ERR("can't get irq %d, err %d\n", (int) pdev->resource[1].start, status); @@ -2895,20 +2910,20 @@ bad_on_1710: } /* USB "non-iso" IRQ (PIO for all but ep0) */ - status = request_irq(pdev->resource[2].start, omap_udc_pio_irq, - 0, "omap_udc pio", udc); + status = devm_request_irq(&pdev->dev, pdev->resource[2].start, + omap_udc_pio_irq, 0, "omap_udc pio", udc); if (status != 0) { ERR("can't get irq %d, err %d\n", (int) pdev->resource[2].start, status); - goto cleanup2; + goto cleanup1; } #ifdef USE_ISO - status = request_irq(pdev->resource[3].start, omap_udc_iso_irq, - 0, "omap_udc iso", udc); + status = devm_request_irq(&pdev->dev, pdev->resource[3].start, + omap_udc_iso_irq, 0, "omap_udc iso", udc); if (status != 0) { ERR("can't get irq %d, err %d\n", (int) pdev->resource[3].start, status); - goto cleanup3; + goto cleanup1; } #endif if (cpu_is_omap16xx() || cpu_is_omap7xx()) { @@ -2919,23 +2934,8 @@ bad_on_1710: } create_proc_file(); - status = usb_add_gadget_udc_release(&pdev->dev, &udc->gadget, - omap_udc_release); - if (status) - goto cleanup4; - - return 0; - -cleanup4: - remove_proc_file(); - -#ifdef USE_ISO -cleanup3: - free_irq(pdev->resource[2].start, udc); -#endif - -cleanup2: - free_irq(pdev->resource[1].start, udc); + return usb_add_gadget_udc_release(&pdev->dev, &udc->gadget, + omap_udc_release); cleanup1: kfree(udc); @@ -2962,42 +2962,15 @@ static int omap_udc_remove(struct platform_device *pdev) { DECLARE_COMPLETION_ONSTACK(done); - if (!udc) - return -ENODEV; - - usb_del_gadget_udc(&udc->gadget); - if (udc->driver) - return -EBUSY; - udc->done = &done; - pullup_disable(udc); - if (!IS_ERR_OR_NULL(udc->transceiver)) { - usb_put_phy(udc->transceiver); - udc->transceiver = NULL; - } - omap_writew(0, UDC_SYSCON1); + usb_del_gadget_udc(&udc->gadget); - remove_proc_file(); - -#ifdef USE_ISO - free_irq(pdev->resource[3].start, udc); -#endif - free_irq(pdev->resource[2].start, udc); - free_irq(pdev->resource[1].start, udc); - - if (udc->dc_clk) { - if (udc->clk_requested) - omap_udc_enable_clock(0); - clk_put(udc->hhc_clk); - clk_put(udc->dc_clk); - } + wait_for_completion(&done); release_mem_region(pdev->resource[0].start, pdev->resource[0].end - pdev->resource[0].start + 1); - wait_for_completion(&done); - return 0; } diff --git a/drivers/usb/host/r8a66597-hcd.c b/drivers/usb/host/r8a66597-hcd.c index a11c2c8bda53..a217f71b45c6 100644 --- a/drivers/usb/host/r8a66597-hcd.c +++ b/drivers/usb/host/r8a66597-hcd.c @@ -1990,6 +1990,8 @@ static int r8a66597_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, static void r8a66597_endpoint_disable(struct usb_hcd *hcd, struct usb_host_endpoint *hep) +__acquires(r8a66597->lock) +__releases(r8a66597->lock) { struct r8a66597 *r8a66597 = hcd_to_r8a66597(hcd); struct r8a66597_pipe *pipe = (struct r8a66597_pipe *)hep->hcpriv; @@ -2002,13 +2004,14 @@ static void r8a66597_endpoint_disable(struct usb_hcd *hcd, return; pipenum = pipe->info.pipenum; + spin_lock_irqsave(&r8a66597->lock, flags); if (pipenum == 0) { kfree(hep->hcpriv); hep->hcpriv = NULL; + spin_unlock_irqrestore(&r8a66597->lock, flags); return; } - spin_lock_irqsave(&r8a66597->lock, flags); pipe_stop(r8a66597, pipe); pipe_irq_disable(r8a66597, pipenum); disable_irq_empty(r8a66597, pipenum); diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index 73bd25ce4c76..56ac5135f901 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -1332,7 +1332,8 @@ int xhci_bus_suspend(struct usb_hcd *hcd) portsc_buf[port_index] = 0; /* Bail out if a USB3 port has a new device in link training */ - if ((t1 & PORT_PLS_MASK) == XDEV_POLLING) { + if ((hcd->speed >= HCD_USB3) && + (t1 & PORT_PLS_MASK) == XDEV_POLLING) { bus_state->bus_suspended = 0; spin_unlock_irqrestore(&xhci->lock, flags); xhci_dbg(xhci, "Bus suspend bailout, port in polling\n"); diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index beadfcf077dc..b4572d066d42 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -1684,10 +1684,13 @@ static void handle_port_status(struct xhci_hcd *xhci, } } - if ((portsc & PORT_PLC) && (portsc & PORT_PLS_MASK) == XDEV_U0 && - DEV_SUPERSPEED_ANY(portsc)) { + if ((portsc & PORT_PLC) && + DEV_SUPERSPEED_ANY(portsc) && + ((portsc & PORT_PLS_MASK) == XDEV_U0 || + (portsc & PORT_PLS_MASK) == XDEV_U1 || + (portsc & PORT_PLS_MASK) == XDEV_U2)) { xhci_dbg(xhci, "resume SS port %d finished\n", port_id); - /* We've just brought the device into U0 through either the + /* We've just brought the device into U0/1/2 through either the * Resume state after a device remote wakeup, or through the * U3Exit state after a host-initiated resume. If it's a device * initiated remote wake, don't pass up the link state change, diff --git a/drivers/usb/phy/phy-am335x.c b/drivers/usb/phy/phy-am335x.c index 90b67a4ca221..558f33a75fd9 100644 --- a/drivers/usb/phy/phy-am335x.c +++ b/drivers/usb/phy/phy-am335x.c @@ -56,9 +56,6 @@ static int am335x_phy_probe(struct platform_device *pdev) if (ret) return ret; - ret = usb_add_phy_dev(&am_phy->usb_phy_gen.phy); - if (ret) - return ret; am_phy->usb_phy_gen.phy.init = am335x_init; am_phy->usb_phy_gen.phy.shutdown = am335x_shutdown; @@ -77,7 +74,7 @@ static int am335x_phy_probe(struct platform_device *pdev) device_set_wakeup_enable(dev, false); phy_ctrl_power(am_phy->phy_ctrl, am_phy->id, false); - return 0; + return usb_add_phy_dev(&am_phy->usb_phy_gen.phy); } static int am335x_phy_remove(struct platform_device *pdev) diff --git a/drivers/usb/renesas_usbhs/mod_gadget.c b/drivers/usb/renesas_usbhs/mod_gadget.c index 8647d2c2a8c4..c5553028e616 100644 --- a/drivers/usb/renesas_usbhs/mod_gadget.c +++ b/drivers/usb/renesas_usbhs/mod_gadget.c @@ -641,14 +641,11 @@ static int usbhsg_ep_disable(struct usb_ep *ep) struct usbhsg_uep *uep = usbhsg_ep_to_uep(ep); struct usbhs_pipe *pipe; unsigned long flags; - int ret = 0; spin_lock_irqsave(&uep->lock, flags); pipe = usbhsg_uep_to_pipe(uep); - if (!pipe) { - ret = -EINVAL; + if (!pipe) goto out; - } usbhsg_pipe_disable(uep); usbhs_pipe_free(pipe); diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index 97382301c393..e3ea0fdd3913 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -57,6 +57,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x08e6, 0x5501) }, /* Gemalto Prox-PU/CU contactless smartcard reader */ { USB_DEVICE(0x08FD, 0x000A) }, /* Digianswer A/S , ZigBee/802.15.4 MAC Device */ { USB_DEVICE(0x0908, 0x01FF) }, /* Siemens RUGGEDCOM USB Serial Console */ + { USB_DEVICE(0x0B00, 0x3070) }, /* Ingenico 3070 */ { USB_DEVICE(0x0BED, 0x1100) }, /* MEI (TM) Cashflow-SC Bill/Voucher Acceptor */ { USB_DEVICE(0x0BED, 0x1101) }, /* MEI series 2000 Combo Acceptor */ { USB_DEVICE(0x0FCF, 0x1003) }, /* Dynastream ANT development board */ @@ -75,6 +76,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x10C4, 0x804E) }, /* Software Bisque Paramount ME build-in converter */ { USB_DEVICE(0x10C4, 0x8053) }, /* Enfora EDG1228 */ { USB_DEVICE(0x10C4, 0x8054) }, /* Enfora GSM2228 */ + { USB_DEVICE(0x10C4, 0x8056) }, /* Lorenz Messtechnik devices */ { USB_DEVICE(0x10C4, 0x8066) }, /* Argussoft In-System Programmer */ { USB_DEVICE(0x10C4, 0x806F) }, /* IMS USB to RS422 Converter Cable */ { USB_DEVICE(0x10C4, 0x807A) }, /* Crumb128 board */ diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index 3e5b189a79b4..af258bb632dd 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -604,6 +604,8 @@ static const struct usb_device_id id_table_combined[] = { .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { USB_DEVICE(FTDI_VID, FTDI_NT_ORIONLXM_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, + { USB_DEVICE(FTDI_VID, FTDI_NT_ORIONLX_PLUS_PID) }, + { USB_DEVICE(FTDI_VID, FTDI_NT_ORION_IO_PID) }, { USB_DEVICE(FTDI_VID, FTDI_SYNAPSE_SS200_PID) }, { USB_DEVICE(FTDI_VID, FTDI_CUSTOMWARE_MINIPLEX_PID) }, { USB_DEVICE(FTDI_VID, FTDI_CUSTOMWARE_MINIPLEX2_PID) }, @@ -1020,6 +1022,8 @@ static const struct usb_device_id id_table_combined[] = { { USB_DEVICE(CYPRESS_VID, CYPRESS_WICED_BT_USB_PID) }, { USB_DEVICE(CYPRESS_VID, CYPRESS_WICED_WL_USB_PID) }, { USB_DEVICE(AIRBUS_DS_VID, AIRBUS_DS_P8GR) }, + /* EZPrototypes devices */ + { USB_DEVICE(EZPROTOTYPES_VID, HJELMSLUND_USB485_ISO_PID) }, { } /* Terminating entry */ }; diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h index 76a10b222ff9..15d220eaf6e6 100644 --- a/drivers/usb/serial/ftdi_sio_ids.h +++ b/drivers/usb/serial/ftdi_sio_ids.h @@ -566,7 +566,9 @@ /* * NovaTech product ids (FTDI_VID) */ -#define FTDI_NT_ORIONLXM_PID 0x7c90 /* OrionLXm Substation Automation Platform */ +#define FTDI_NT_ORIONLXM_PID 0x7c90 /* OrionLXm Substation Automation Platform */ +#define FTDI_NT_ORIONLX_PLUS_PID 0x7c91 /* OrionLX+ Substation Automation Platform */ +#define FTDI_NT_ORION_IO_PID 0x7c92 /* Orion I/O */ /* * Synapse Wireless product ids (FTDI_VID) @@ -1307,6 +1309,12 @@ #define IONICS_VID 0x1c0c #define IONICS_PLUGCOMPUTER_PID 0x0102 +/* + * EZPrototypes (PID reseller) + */ +#define EZPROTOTYPES_VID 0x1c40 +#define HJELMSLUND_USB485_ISO_PID 0x0477 + /* * Dresden Elektronik Sensor Terminal Board */ diff --git a/drivers/usb/serial/mos7720.c b/drivers/usb/serial/mos7720.c index 4581fa1dec98..286b43c79d38 100644 --- a/drivers/usb/serial/mos7720.c +++ b/drivers/usb/serial/mos7720.c @@ -368,8 +368,6 @@ static int write_parport_reg_nonblock(struct mos7715_parport *mos_parport, if (!urbtrack) return -ENOMEM; - kref_get(&mos_parport->ref_count); - urbtrack->mos_parport = mos_parport; urbtrack->urb = usb_alloc_urb(0, GFP_ATOMIC); if (!urbtrack->urb) { kfree(urbtrack); @@ -390,6 +388,8 @@ static int write_parport_reg_nonblock(struct mos7715_parport *mos_parport, usb_sndctrlpipe(usbdev, 0), (unsigned char *)urbtrack->setup, NULL, 0, async_complete, urbtrack); + kref_get(&mos_parport->ref_count); + urbtrack->mos_parport = mos_parport; kref_init(&urbtrack->ref_count); INIT_LIST_HEAD(&urbtrack->urblist_entry); diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 2b81939fecd7..9f96dd274370 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1066,7 +1066,8 @@ static const struct usb_device_id option_ids[] = { .driver_info = RSVD(3) }, { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x6613)}, /* Onda H600/ZTE MF330 */ { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x0023)}, /* ONYX 3G device */ - { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x9000)}, /* SIMCom SIM5218 */ + { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x9000), /* SIMCom SIM5218 */ + .driver_info = NCTRL(0) | NCTRL(1) | NCTRL(2) | NCTRL(3) | RSVD(4) }, /* Quectel products using Qualcomm vendor ID */ { USB_DEVICE(QUALCOMM_VENDOR_ID, QUECTEL_PRODUCT_UC15)}, { USB_DEVICE(QUALCOMM_VENDOR_ID, QUECTEL_PRODUCT_UC20), @@ -1147,6 +1148,8 @@ static const struct usb_device_id option_ids[] = { .driver_info = NCTRL(0) | RSVD(1) | RSVD(3) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910_DUAL_MODEM), .driver_info = NCTRL(0) | RSVD(3) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1102, 0xff), /* Telit ME910 (ECM) */ + .driver_info = NCTRL(0) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910), .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910_USBCFG4), @@ -1163,6 +1166,10 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920A4_1213, 0xff) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920A4_1214), .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) | RSVD(3) }, + { USB_DEVICE(TELIT_VENDOR_ID, 0x1900), /* Telit LN940 (QMI) */ + .driver_info = NCTRL(0) | RSVD(1) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1901, 0xff), /* Telit LN940 (MBIM) */ + .driver_info = NCTRL(0) }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, ZTE_PRODUCT_MF622, 0xff, 0xff, 0xff) }, /* ZTE WCDMA products */ { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0002, 0xff, 0xff, 0xff), .driver_info = RSVD(1) }, @@ -1327,6 +1334,7 @@ static const struct usb_device_id option_ids[] = { .driver_info = RSVD(4) }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0414, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0417, 0xff, 0xff, 0xff) }, + { USB_DEVICE_INTERFACE_CLASS(ZTE_VENDOR_ID, 0x0602, 0xff) }, /* GosunCn ZTE WeLink ME3630 (MBIM mode) */ { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1008, 0xff, 0xff, 0xff), .driver_info = RSVD(4) }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1010, 0xff, 0xff, 0xff), @@ -1530,6 +1538,7 @@ static const struct usb_device_id option_ids[] = { .driver_info = RSVD(2) }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1428, 0xff, 0xff, 0xff), /* Telewell TW-LTE 4G v2 */ .driver_info = RSVD(2) }, + { USB_DEVICE_INTERFACE_CLASS(ZTE_VENDOR_ID, 0x1476, 0xff) }, /* GosunCn ZTE WeLink ME3630 (ECM/NCM mode) */ { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1533, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1534, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1535, 0xff, 0xff, 0xff) }, @@ -1757,6 +1766,7 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(ALINK_VENDOR_ID, ALINK_PRODUCT_3GU, 0xff, 0xff, 0xff) }, { USB_DEVICE(ALINK_VENDOR_ID, SIMCOM_PRODUCT_SIM7100E), .driver_info = RSVD(5) | RSVD(6) }, + { USB_DEVICE_INTERFACE_CLASS(0x1e0e, 0x9003, 0xff) }, /* Simcom SIM7500/SIM7600 MBIM mode */ { USB_DEVICE(ALCATEL_VENDOR_ID, ALCATEL_PRODUCT_X060S_X200), .driver_info = NCTRL(0) | NCTRL(1) | RSVD(4) }, { USB_DEVICE(ALCATEL_VENDOR_ID, ALCATEL_PRODUCT_X220_X500D), @@ -1932,16 +1942,29 @@ static const struct usb_device_id option_ids[] = { .driver_info = RSVD(4) }, { USB_DEVICE_INTERFACE_CLASS(0x2001, 0x7e35, 0xff), /* D-Link DWM-222 */ .driver_info = RSVD(4) }, - { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e01, 0xff, 0xff, 0xff) }, /* D-Link DWM-152/C1 */ - { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e02, 0xff, 0xff, 0xff) }, /* D-Link DWM-156/C1 */ - { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x7e11, 0xff, 0xff, 0xff) }, /* D-Link DWM-156/A3 */ - { USB_DEVICE_INTERFACE_CLASS(0x2020, 0x4000, 0xff) }, /* OLICARD300 - MT6225 */ + { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e01, 0xff, 0xff, 0xff) }, /* D-Link DWM-152/C1 */ + { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e02, 0xff, 0xff, 0xff) }, /* D-Link DWM-156/C1 */ + { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x7e11, 0xff, 0xff, 0xff) }, /* D-Link DWM-156/A3 */ + { USB_DEVICE_INTERFACE_CLASS(0x2020, 0x2031, 0xff), /* Olicard 600 */ + .driver_info = RSVD(4) }, + { USB_DEVICE_INTERFACE_CLASS(0x2020, 0x4000, 0xff) }, /* OLICARD300 - MT6225 */ { USB_DEVICE(INOVIA_VENDOR_ID, INOVIA_SEW858) }, { USB_DEVICE(VIATELECOM_VENDOR_ID, VIATELECOM_PRODUCT_CDS7) }, { USB_DEVICE_AND_INTERFACE_INFO(WETELECOM_VENDOR_ID, WETELECOM_PRODUCT_WMD200, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(WETELECOM_VENDOR_ID, WETELECOM_PRODUCT_6802, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(WETELECOM_VENDOR_ID, WETELECOM_PRODUCT_WMD300, 0xff, 0xff, 0xff) }, - { USB_DEVICE_AND_INTERFACE_INFO(0x03f0, 0x421d, 0xff, 0xff, 0xff) }, /* HP lt2523 (Novatel E371) */ + { USB_DEVICE_AND_INTERFACE_INFO(0x03f0, 0x421d, 0xff, 0xff, 0xff) }, /* HP lt2523 (Novatel E371) */ + { USB_DEVICE_AND_INTERFACE_INFO(0x03f0, 0xa31d, 0xff, 0x06, 0x10) }, /* HP lt4132 (Huawei ME906s-158) */ + { USB_DEVICE_AND_INTERFACE_INFO(0x03f0, 0xa31d, 0xff, 0x06, 0x12) }, + { USB_DEVICE_AND_INTERFACE_INFO(0x03f0, 0xa31d, 0xff, 0x06, 0x13) }, + { USB_DEVICE_AND_INTERFACE_INFO(0x03f0, 0xa31d, 0xff, 0x06, 0x14) }, + { USB_DEVICE_AND_INTERFACE_INFO(0x03f0, 0xa31d, 0xff, 0x06, 0x1b) }, + { USB_DEVICE(0x1508, 0x1001), /* Fibocom NL668 */ + .driver_info = RSVD(4) | RSVD(5) | RSVD(6) }, + { USB_DEVICE(0x2cb7, 0x0104), /* Fibocom NL678 series */ + .driver_info = RSVD(4) | RSVD(5) }, + { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0105, 0xff), /* Fibocom NL678 series */ + .driver_info = RSVD(6) }, { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, option_ids); diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c index 3da25ad267a2..9706d214c409 100644 --- a/drivers/usb/serial/pl2303.c +++ b/drivers/usb/serial/pl2303.c @@ -47,6 +47,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_HCR331) }, { USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_MOTOROLA) }, { USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_ZTEK) }, + { USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_TB) }, { USB_DEVICE(IODATA_VENDOR_ID, IODATA_PRODUCT_ID) }, { USB_DEVICE(IODATA_VENDOR_ID, IODATA_PRODUCT_ID_RSAQ5) }, { USB_DEVICE(ATEN_VENDOR_ID, ATEN_PRODUCT_ID) }, @@ -86,9 +87,14 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(YCCABLE_VENDOR_ID, YCCABLE_PRODUCT_ID) }, { USB_DEVICE(SUPERIAL_VENDOR_ID, SUPERIAL_PRODUCT_ID) }, { USB_DEVICE(HP_VENDOR_ID, HP_LD220_PRODUCT_ID) }, + { USB_DEVICE(HP_VENDOR_ID, HP_LD220TA_PRODUCT_ID) }, { USB_DEVICE(HP_VENDOR_ID, HP_LD960_PRODUCT_ID) }, + { USB_DEVICE(HP_VENDOR_ID, HP_LD960TA_PRODUCT_ID) }, { USB_DEVICE(HP_VENDOR_ID, HP_LCM220_PRODUCT_ID) }, { USB_DEVICE(HP_VENDOR_ID, HP_LCM960_PRODUCT_ID) }, + { USB_DEVICE(HP_VENDOR_ID, HP_LM920_PRODUCT_ID) }, + { USB_DEVICE(HP_VENDOR_ID, HP_LM940_PRODUCT_ID) }, + { USB_DEVICE(HP_VENDOR_ID, HP_TD620_PRODUCT_ID) }, { USB_DEVICE(CRESSI_VENDOR_ID, CRESSI_EDY_PRODUCT_ID) }, { USB_DEVICE(ZEAGLE_VENDOR_ID, ZEAGLE_N2ITION3_PRODUCT_ID) }, { USB_DEVICE(SONY_VENDOR_ID, SONY_QN3USB_PRODUCT_ID) }, diff --git a/drivers/usb/serial/pl2303.h b/drivers/usb/serial/pl2303.h index 123289085ee2..d84c3b3d477b 100644 --- a/drivers/usb/serial/pl2303.h +++ b/drivers/usb/serial/pl2303.h @@ -13,6 +13,7 @@ #define PL2303_VENDOR_ID 0x067b #define PL2303_PRODUCT_ID 0x2303 +#define PL2303_PRODUCT_ID_TB 0x2304 #define PL2303_PRODUCT_ID_RSAQ2 0x04bb #define PL2303_PRODUCT_ID_DCU11 0x1234 #define PL2303_PRODUCT_ID_PHAROS 0xaaa0 @@ -25,6 +26,7 @@ #define PL2303_PRODUCT_ID_MOTOROLA 0x0307 #define PL2303_PRODUCT_ID_ZTEK 0xe1f1 + #define ATEN_VENDOR_ID 0x0557 #define ATEN_VENDOR_ID2 0x0547 #define ATEN_PRODUCT_ID 0x2008 @@ -123,10 +125,15 @@ /* Hewlett-Packard POS Pole Displays */ #define HP_VENDOR_ID 0x03f0 +#define HP_LM920_PRODUCT_ID 0x026b +#define HP_TD620_PRODUCT_ID 0x0956 #define HP_LD960_PRODUCT_ID 0x0b39 #define HP_LCM220_PRODUCT_ID 0x3139 #define HP_LCM960_PRODUCT_ID 0x3239 #define HP_LD220_PRODUCT_ID 0x3524 +#define HP_LD220TA_PRODUCT_ID 0x4349 +#define HP_LD960TA_PRODUCT_ID 0x4439 +#define HP_LM940_PRODUCT_ID 0x5039 /* Cressi Edy (diving computer) PC interface */ #define CRESSI_VENDOR_ID 0x04b8 diff --git a/drivers/usb/serial/usb-serial-simple.c b/drivers/usb/serial/usb-serial-simple.c index 6d6acf2c07c3..511242111403 100644 --- a/drivers/usb/serial/usb-serial-simple.c +++ b/drivers/usb/serial/usb-serial-simple.c @@ -88,7 +88,8 @@ DEVICE(moto_modem, MOTO_IDS); /* Motorola Tetra driver */ #define MOTOROLA_TETRA_IDS() \ { USB_DEVICE(0x0cad, 0x9011) }, /* Motorola Solutions TETRA PEI */ \ - { USB_DEVICE(0x0cad, 0x9012) } /* MTP6550 */ + { USB_DEVICE(0x0cad, 0x9012) }, /* MTP6550 */ \ + { USB_DEVICE(0x0cad, 0x9016) } /* TPG2200 */ DEVICE(motorola_tetra, MOTOROLA_TETRA_IDS); /* Novatel Wireless GPS driver */ diff --git a/drivers/usb/storage/scsiglue.c b/drivers/usb/storage/scsiglue.c index e42b5dec85d8..aeb340abee5f 100644 --- a/drivers/usb/storage/scsiglue.c +++ b/drivers/usb/storage/scsiglue.c @@ -227,8 +227,12 @@ static int slave_configure(struct scsi_device *sdev) if (!(us->fflags & US_FL_NEEDS_CAP16)) sdev->try_rc_10_first = 1; - /* assume SPC3 or latter devices support sense size > 18 */ - if (sdev->scsi_level > SCSI_SPC_2) + /* + * assume SPC3 or latter devices support sense size > 18 + * unless US_FL_BAD_SENSE quirk is specified. + */ + if (sdev->scsi_level > SCSI_SPC_2 && + !(us->fflags & US_FL_BAD_SENSE)) us->fflags |= US_FL_SANE_SENSE; /* USB-IDE bridges tend to report SK = 0x04 (Non-recoverable diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h index 6a7d814b72bf..4019654cc903 100644 --- a/drivers/usb/storage/unusual_devs.h +++ b/drivers/usb/storage/unusual_devs.h @@ -1398,6 +1398,18 @@ UNUSUAL_DEV( 0x0d49, 0x7310, 0x0000, 0x9999, USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_SANE_SENSE), +/* + * Reported by Icenowy Zheng + * The SMI SM3350 USB-UFS bridge controller will enter a wrong state + * that do not process read/write command if a long sense is requested, + * so force to use 18-byte sense. + */ +UNUSUAL_DEV( 0x090c, 0x3350, 0x0000, 0xffff, + "SMI", + "SM3350 UFS-to-USB-Mass-Storage bridge", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_BAD_SENSE ), + /* * Pete Zaitcev , bz#164688. * The device blatantly ignores LUN and returns 1 in GetMaxLUN. diff --git a/drivers/vhost/Kconfig b/drivers/vhost/Kconfig index 533eaf04f12f..40764ecad9ce 100644 --- a/drivers/vhost/Kconfig +++ b/drivers/vhost/Kconfig @@ -2,7 +2,6 @@ config VHOST_NET tristate "Host kernel accelerator for virtio net" depends on NET && EVENTFD && (TUN || !TUN) && (MACVTAP || !MACVTAP) select VHOST - select VHOST_RING ---help--- This kernel module can be loaded in host kernel to accelerate guest networking with virtio_net. Not to be confused with virtio_net @@ -15,17 +14,24 @@ config VHOST_SCSI tristate "VHOST_SCSI TCM fabric driver" depends on TARGET_CORE && EVENTFD && m select VHOST - select VHOST_RING default n ---help--- Say M here to enable the vhost_scsi TCM fabric module for use with virtio-scsi guests -config VHOST_RING - tristate +config VHOST_VSOCK + tristate "vhost virtio-vsock driver" + depends on VSOCKETS && EVENTFD + select VIRTIO_VSOCKETS_COMMON + select VHOST + default n ---help--- - This option is selected by any driver which needs to access - the host side of a virtio ring. + This kernel module can be loaded in the host kernel to provide AF_VSOCK + sockets for communicating with guests. The guests must have the + virtio_transport.ko driver loaded to use the virtio-vsock device. + + To compile this driver as a module, choose M here: the module will be called + vhost_vsock. config VHOST tristate diff --git a/drivers/vhost/Kconfig.vringh b/drivers/vhost/Kconfig.vringh new file mode 100644 index 000000000000..6a4490c09d7f --- /dev/null +++ b/drivers/vhost/Kconfig.vringh @@ -0,0 +1,5 @@ +config VHOST_RING + tristate + ---help--- + This option is selected by any driver which needs to access + the host side of a virtio ring. diff --git a/drivers/vhost/Makefile b/drivers/vhost/Makefile index e0441c34db1c..6b012b986b57 100644 --- a/drivers/vhost/Makefile +++ b/drivers/vhost/Makefile @@ -4,5 +4,9 @@ vhost_net-y := net.o obj-$(CONFIG_VHOST_SCSI) += vhost_scsi.o vhost_scsi-y := scsi.o +obj-$(CONFIG_VHOST_VSOCK) += vhost_vsock.o +vhost_vsock-y := vsock.o + obj-$(CONFIG_VHOST_RING) += vringh.o + obj-$(CONFIG_VHOST) += vhost.o diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 645b2197930e..53cf130922f3 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -61,7 +61,8 @@ MODULE_PARM_DESC(experimental_zcopytx, "Enable Zero Copy TX;" enum { VHOST_NET_FEATURES = VHOST_FEATURES | (1ULL << VHOST_NET_F_VIRTIO_NET_HDR) | - (1ULL << VIRTIO_NET_F_MRG_RXBUF) + (1ULL << VIRTIO_NET_F_MRG_RXBUF) | + (1ULL << VIRTIO_F_IOMMU_PLATFORM) }; enum { @@ -287,6 +288,69 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success) rcu_read_unlock_bh(); } +static inline unsigned long busy_clock(void) +{ + return local_clock() >> 10; +} + +static bool vhost_can_busy_poll(struct vhost_dev *dev, + unsigned long endtime) +{ + return likely(!need_resched()) && + likely(!time_after(busy_clock(), endtime)) && + likely(!signal_pending(current)) && + !vhost_has_work(dev); +} + +static void vhost_net_disable_vq(struct vhost_net *n, + struct vhost_virtqueue *vq) +{ + struct vhost_net_virtqueue *nvq = + container_of(vq, struct vhost_net_virtqueue, vq); + struct vhost_poll *poll = n->poll + (nvq - n->vqs); + if (!vq->private_data) + return; + vhost_poll_stop(poll); +} + +static int vhost_net_enable_vq(struct vhost_net *n, + struct vhost_virtqueue *vq) +{ + struct vhost_net_virtqueue *nvq = + container_of(vq, struct vhost_net_virtqueue, vq); + struct vhost_poll *poll = n->poll + (nvq - n->vqs); + struct socket *sock; + + sock = vq->private_data; + if (!sock) + return 0; + + return vhost_poll_start(poll, sock->file); +} + +static int vhost_net_tx_get_vq_desc(struct vhost_net *net, + struct vhost_virtqueue *vq, + struct iovec iov[], unsigned int iov_size, + unsigned int *out_num, unsigned int *in_num) +{ + unsigned long uninitialized_var(endtime); + int r = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), + out_num, in_num, NULL, NULL); + + if (r == vq->num && vq->busyloop_timeout) { + preempt_disable(); + endtime = busy_clock() + vq->busyloop_timeout; + while (vhost_can_busy_poll(vq->dev, endtime) && + vhost_vq_avail_empty(vq->dev, vq)) + cpu_relax_lowlatency(); + preempt_enable(); + r = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), + out_num, in_num, NULL, NULL); + } + + return r; +} + /* Expects to be always run from workqueue - which acts as * read-size critical section for our kind of RCU. */ static void handle_tx(struct vhost_net *net) @@ -314,6 +378,9 @@ static void handle_tx(struct vhost_net *net) if (!sock) goto out; + if (!vq_iotlb_prefetch(vq)) + goto out; + vhost_disable_notify(&net->dev, vq); hdr_size = nvq->vhost_hlen; @@ -331,10 +398,9 @@ static void handle_tx(struct vhost_net *net) % UIO_MAXIOV == nvq->done_idx)) break; - head = vhost_get_vq_desc(vq, vq->iov, - ARRAY_SIZE(vq->iov), - &out, &in, - NULL, NULL); + head = vhost_net_tx_get_vq_desc(net, vq, vq->iov, + ARRAY_SIZE(vq->iov), + &out, &in); /* On error, stop handling until the next kick. */ if (unlikely(head < 0)) break; @@ -435,6 +501,43 @@ static int peek_head_len(struct sock *sk) return len; } +static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk) +{ + struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX]; + struct vhost_virtqueue *vq = &nvq->vq; + unsigned long uninitialized_var(endtime); + int len = peek_head_len(sk); + + if (!len && vq->busyloop_timeout) { + /* Both tx vq and rx socket were polled here */ + mutex_lock_nested(&vq->mutex, 1); + vhost_disable_notify(&net->dev, vq); + + preempt_disable(); + endtime = busy_clock() + vq->busyloop_timeout; + + while (vhost_can_busy_poll(&net->dev, endtime) && + skb_queue_empty(&sk->sk_receive_queue) && + vhost_vq_avail_empty(&net->dev, vq)) + cpu_relax_lowlatency(); + + preempt_enable(); + + if (!vhost_vq_avail_empty(&net->dev, vq)) + vhost_poll_queue(&vq->poll); + else if (unlikely(vhost_enable_notify(&net->dev, vq))) { + vhost_disable_notify(&net->dev, vq); + vhost_poll_queue(&vq->poll); + } + + mutex_unlock(&vq->mutex); + + len = peek_head_len(sk); + } + + return len; +} + /* This is a multi-buffer version of vhost_get_desc, that works if * vq has read descriptors only. * @vq - the relevant virtqueue @@ -540,11 +643,16 @@ static void handle_rx(struct vhost_net *net) struct iov_iter fixup; __virtio16 num_buffers; - mutex_lock(&vq->mutex); + mutex_lock_nested(&vq->mutex, 0); sock = vq->private_data; if (!sock) goto out; + + if (!vq_iotlb_prefetch(vq)) + goto out; + vhost_disable_notify(&net->dev, vq); + vhost_net_disable_vq(net, vq); vhost_hlen = nvq->vhost_hlen; sock_hlen = nvq->sock_hlen; @@ -553,7 +661,7 @@ static void handle_rx(struct vhost_net *net) vq->log : NULL; mergeable = vhost_has_feature(vq, VIRTIO_NET_F_MRG_RXBUF); - while ((sock_len = peek_head_len(sock->sk))) { + while ((sock_len = vhost_net_rx_peek_head_len(net, sock->sk))) { sock_len += sock_hlen; vhost_len = sock_len + vhost_hlen; headcount = get_rx_bufs(vq, vq->heads, vhost_len, @@ -561,7 +669,7 @@ static void handle_rx(struct vhost_net *net) likely(mergeable) ? UIO_MAXIOV : 1); /* On error, stop handling until the next kick. */ if (unlikely(headcount < 0)) - break; + goto out; /* On overrun, truncate and discard */ if (unlikely(headcount > UIO_MAXIOV)) { iov_iter_init(&msg.msg_iter, READ, vq->iov, 1, 1); @@ -580,7 +688,7 @@ static void handle_rx(struct vhost_net *net) } /* Nothing new? Wait for eventfd to tell us * they refilled. */ - break; + goto out; } /* We don't need to be notified again. */ iov_iter_init(&msg.msg_iter, READ, vq->iov, in, vhost_len); @@ -608,7 +716,7 @@ static void handle_rx(struct vhost_net *net) &fixup) != sizeof(hdr)) { vq_err(vq, "Unable to write vnet_hdr " "at addr %p\n", vq->iov->iov_base); - break; + goto out; } } else { /* Header came from socket; we'll need to patch @@ -624,7 +732,7 @@ static void handle_rx(struct vhost_net *net) &fixup) != sizeof num_buffers) { vq_err(vq, "Failed num_buffers write"); vhost_discard_vq_desc(vq, headcount); - break; + goto out; } vhost_add_used_and_signal_n(&net->dev, vq, vq->heads, headcount); @@ -633,9 +741,10 @@ static void handle_rx(struct vhost_net *net) total_len += vhost_len; if (unlikely(total_len >= VHOST_NET_WEIGHT)) { vhost_poll_queue(&vq->poll); - break; + goto out; } } + vhost_net_enable_vq(net, vq); out: mutex_unlock(&vq->mutex); } @@ -714,32 +823,6 @@ static int vhost_net_open(struct inode *inode, struct file *f) return 0; } -static void vhost_net_disable_vq(struct vhost_net *n, - struct vhost_virtqueue *vq) -{ - struct vhost_net_virtqueue *nvq = - container_of(vq, struct vhost_net_virtqueue, vq); - struct vhost_poll *poll = n->poll + (nvq - n->vqs); - if (!vq->private_data) - return; - vhost_poll_stop(poll); -} - -static int vhost_net_enable_vq(struct vhost_net *n, - struct vhost_virtqueue *vq) -{ - struct vhost_net_virtqueue *nvq = - container_of(vq, struct vhost_net_virtqueue, vq); - struct vhost_poll *poll = n->poll + (nvq - n->vqs); - struct socket *sock; - - sock = vq->private_data; - if (!sock) - return 0; - - return vhost_poll_start(poll, sock->file); -} - static struct socket *vhost_net_stop_vq(struct vhost_net *n, struct vhost_virtqueue *vq) { @@ -917,7 +1000,7 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) vhost_net_disable_vq(n, vq); vq->private_data = sock; - r = vhost_init_used(vq); + r = vhost_vq_init_access(vq); if (r) goto err_used; r = vhost_net_enable_vq(n, vq); @@ -969,21 +1052,21 @@ static long vhost_net_reset_owner(struct vhost_net *n) struct socket *tx_sock = NULL; struct socket *rx_sock = NULL; long err; - struct vhost_memory *memory; + struct vhost_umem *umem; mutex_lock(&n->dev.mutex); err = vhost_dev_check_owner(&n->dev); if (err) goto done; - memory = vhost_dev_reset_owner_prepare(); - if (!memory) { + umem = vhost_dev_reset_owner_prepare(); + if (!umem) { err = -ENOMEM; goto done; } vhost_net_stop(n, &tx_sock, &rx_sock); vhost_net_flush(n); vhost_dev_stop(&n->dev); - vhost_dev_reset_owner(&n->dev, memory); + vhost_dev_reset_owner(&n->dev, umem); vhost_net_vq_reset(n); done: mutex_unlock(&n->dev.mutex); @@ -1014,10 +1097,14 @@ static int vhost_net_set_features(struct vhost_net *n, u64 features) } mutex_lock(&n->dev.mutex); if ((features & (1 << VHOST_F_LOG_ALL)) && - !vhost_log_access_ok(&n->dev)) { - mutex_unlock(&n->dev.mutex); - return -EFAULT; + !vhost_log_access_ok(&n->dev)) + goto out_unlock; + + if ((features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))) { + if (vhost_init_device_iotlb(&n->dev, true)) + goto out_unlock; } + for (i = 0; i < VHOST_NET_VQ_MAX; ++i) { mutex_lock(&n->vqs[i].vq.mutex); n->vqs[i].vq.acked_features = features; @@ -1027,6 +1114,10 @@ static int vhost_net_set_features(struct vhost_net *n, u64 features) } mutex_unlock(&n->dev.mutex); return 0; + +out_unlock: + mutex_unlock(&n->dev.mutex); + return -EFAULT; } static long vhost_net_set_owner(struct vhost_net *n) @@ -1100,9 +1191,40 @@ static long vhost_net_compat_ioctl(struct file *f, unsigned int ioctl, } #endif +static ssize_t vhost_net_chr_read_iter(struct kiocb *iocb, struct iov_iter *to) +{ + struct file *file = iocb->ki_filp; + struct vhost_net *n = file->private_data; + struct vhost_dev *dev = &n->dev; + int noblock = file->f_flags & O_NONBLOCK; + + return vhost_chr_read_iter(dev, to, noblock); +} + +static ssize_t vhost_net_chr_write_iter(struct kiocb *iocb, + struct iov_iter *from) +{ + struct file *file = iocb->ki_filp; + struct vhost_net *n = file->private_data; + struct vhost_dev *dev = &n->dev; + + return vhost_chr_write_iter(dev, from); +} + +static unsigned int vhost_net_chr_poll(struct file *file, poll_table *wait) +{ + struct vhost_net *n = file->private_data; + struct vhost_dev *dev = &n->dev; + + return vhost_chr_poll(file, dev, wait); +} + static const struct file_operations vhost_net_fops = { .owner = THIS_MODULE, .release = vhost_net_release, + .read_iter = vhost_net_chr_read_iter, + .write_iter = vhost_net_chr_write_iter, + .poll = vhost_net_chr_poll, .unlocked_ioctl = vhost_net_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = vhost_net_compat_ioctl, diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 8fc62a03637a..009315f006bf 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -1277,7 +1277,7 @@ vhost_scsi_set_endpoint(struct vhost_scsi *vs, vq = &vs->vqs[i].vq; mutex_lock(&vq->mutex); vq->private_data = vs_tpg; - vhost_init_used(vq); + vhost_vq_init_access(vq); mutex_unlock(&vq->mutex); } ret = 0; diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c index f2882ac98726..388eec4e1a90 100644 --- a/drivers/vhost/test.c +++ b/drivers/vhost/test.c @@ -196,7 +196,7 @@ static long vhost_test_run(struct vhost_test *n, int test) oldpriv = vq->private_data; vq->private_data = priv; - r = vhost_init_used(&n->vqs[index]); + r = vhost_vq_init_access(&n->vqs[index]); mutex_unlock(&vq->mutex); diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index c54d388310f0..53b1b3cfce84 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include "vhost.h" @@ -35,6 +36,10 @@ static ushort max_mem_regions = 64; module_param(max_mem_regions, ushort, 0444); MODULE_PARM_DESC(max_mem_regions, "Maximum number of memory regions in memory map. (default: 64)"); +static int max_iotlb_entries = 2048; +module_param(max_iotlb_entries, int, 0444); +MODULE_PARM_DESC(max_iotlb_entries, + "Maximum number of iotlb entries. (default: 2048)"); enum { VHOST_MEMORY_F_LOG = 0x1, @@ -43,12 +48,26 @@ enum { #define vhost_used_event(vq) ((__virtio16 __user *)&vq->avail->ring[vq->num]) #define vhost_avail_event(vq) ((__virtio16 __user *)&vq->used->ring[vq->num]) +INTERVAL_TREE_DEFINE(struct vhost_umem_node, + rb, __u64, __subtree_last, + START, LAST, , vhost_umem_interval_tree); + #ifdef CONFIG_VHOST_CROSS_ENDIAN_LEGACY -static void vhost_vq_reset_user_be(struct vhost_virtqueue *vq) +static void vhost_disable_cross_endian(struct vhost_virtqueue *vq) { vq->user_be = !virtio_legacy_is_little_endian(); } +static void vhost_enable_cross_endian_big(struct vhost_virtqueue *vq) +{ + vq->user_be = true; +} + +static void vhost_enable_cross_endian_little(struct vhost_virtqueue *vq) +{ + vq->user_be = false; +} + static long vhost_set_vring_endian(struct vhost_virtqueue *vq, int __user *argp) { struct vhost_vring_state s; @@ -63,7 +82,10 @@ static long vhost_set_vring_endian(struct vhost_virtqueue *vq, int __user *argp) s.num != VHOST_VRING_BIG_ENDIAN) return -EINVAL; - vq->user_be = s.num; + if (s.num == VHOST_VRING_BIG_ENDIAN) + vhost_enable_cross_endian_big(vq); + else + vhost_enable_cross_endian_little(vq); return 0; } @@ -92,7 +114,7 @@ static void vhost_init_is_le(struct vhost_virtqueue *vq) vq->is_le = vhost_has_feature(vq, VIRTIO_F_VERSION_1) || !vq->user_be; } #else -static void vhost_vq_reset_user_be(struct vhost_virtqueue *vq) +static void vhost_disable_cross_endian(struct vhost_virtqueue *vq) { } @@ -109,11 +131,29 @@ static long vhost_get_vring_endian(struct vhost_virtqueue *vq, u32 idx, static void vhost_init_is_le(struct vhost_virtqueue *vq) { - if (vhost_has_feature(vq, VIRTIO_F_VERSION_1)) - vq->is_le = true; + vq->is_le = vhost_has_feature(vq, VIRTIO_F_VERSION_1) + || virtio_legacy_is_little_endian(); } #endif /* CONFIG_VHOST_CROSS_ENDIAN_LEGACY */ +static void vhost_reset_is_le(struct vhost_virtqueue *vq) +{ + vhost_init_is_le(vq); +} + +struct vhost_flush_struct { + struct vhost_work work; + struct completion wait_event; +}; + +static void vhost_flush_work(struct vhost_work *work) +{ + struct vhost_flush_struct *s; + + s = container_of(work, struct vhost_flush_struct, work); + complete(&s->wait_event); +} + static void vhost_poll_func(struct file *file, wait_queue_head_t *wqh, poll_table *pt) { @@ -138,11 +178,9 @@ static int vhost_poll_wakeup(wait_queue_t *wait, unsigned mode, int sync, void vhost_work_init(struct vhost_work *work, vhost_work_fn_t fn) { - INIT_LIST_HEAD(&work->node); + clear_bit(VHOST_WORK_QUEUED, &work->flags); work->fn = fn; init_waitqueue_head(&work->done); - work->flushing = 0; - work->queue_seq = work->done_seq = 0; } EXPORT_SYMBOL_GPL(vhost_work_init); @@ -193,31 +231,17 @@ void vhost_poll_stop(struct vhost_poll *poll) } EXPORT_SYMBOL_GPL(vhost_poll_stop); -static bool vhost_work_seq_done(struct vhost_dev *dev, struct vhost_work *work, - unsigned seq) -{ - int left; - - spin_lock_irq(&dev->work_lock); - left = seq - work->done_seq; - spin_unlock_irq(&dev->work_lock); - return left <= 0; -} - void vhost_work_flush(struct vhost_dev *dev, struct vhost_work *work) { - unsigned seq; - int flushing; + struct vhost_flush_struct flush; - spin_lock_irq(&dev->work_lock); - seq = work->queue_seq; - work->flushing++; - spin_unlock_irq(&dev->work_lock); - wait_event(work->done, vhost_work_seq_done(dev, work, seq)); - spin_lock_irq(&dev->work_lock); - flushing = --work->flushing; - spin_unlock_irq(&dev->work_lock); - BUG_ON(flushing < 0); + if (dev->worker) { + init_completion(&flush.wait_event); + vhost_work_init(&flush.work, vhost_flush_work); + + vhost_work_queue(dev, &flush.work); + wait_for_completion(&flush.wait_event); + } } EXPORT_SYMBOL_GPL(vhost_work_flush); @@ -231,20 +255,27 @@ EXPORT_SYMBOL_GPL(vhost_poll_flush); void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work) { - unsigned long flags; + if (!dev->worker) + return; - spin_lock_irqsave(&dev->work_lock, flags); - if (list_empty(&work->node)) { - list_add_tail(&work->node, &dev->work_list); - work->queue_seq++; - spin_unlock_irqrestore(&dev->work_lock, flags); + if (!test_and_set_bit(VHOST_WORK_QUEUED, &work->flags)) { + /* We can only add the work to the list after we're + * sure it was not in the list. + */ + smp_mb(); + llist_add(&work->node, &dev->work_list); wake_up_process(dev->worker); - } else { - spin_unlock_irqrestore(&dev->work_lock, flags); } } EXPORT_SYMBOL_GPL(vhost_work_queue); +/* A lockless hint for busy polling code to exit the loop */ +bool vhost_has_work(struct vhost_dev *dev) +{ + return !llist_empty(&dev->work_list); +} +EXPORT_SYMBOL_GPL(vhost_has_work); + void vhost_poll_queue(struct vhost_poll *poll) { vhost_work_queue(poll->dev, &poll->work); @@ -275,16 +306,18 @@ static void vhost_vq_reset(struct vhost_dev *dev, vq->call_ctx = NULL; vq->call = NULL; vq->log_ctx = NULL; - vq->memory = NULL; - vq->is_le = virtio_legacy_is_little_endian(); - vhost_vq_reset_user_be(vq); + vhost_reset_is_le(vq); + vhost_disable_cross_endian(vq); + vq->busyloop_timeout = 0; + vq->umem = NULL; + vq->iotlb = NULL; } static int vhost_worker(void *data) { struct vhost_dev *dev = data; - struct vhost_work *work = NULL; - unsigned uninitialized_var(seq); + struct vhost_work *work, *work_next; + struct llist_node *node; mm_segment_t oldfs = get_fs(); set_fs(USER_DS); @@ -294,35 +327,25 @@ static int vhost_worker(void *data) /* mb paired w/ kthread_stop */ set_current_state(TASK_INTERRUPTIBLE); - spin_lock_irq(&dev->work_lock); - if (work) { - work->done_seq = seq; - if (work->flushing) - wake_up_all(&work->done); - } - if (kthread_should_stop()) { - spin_unlock_irq(&dev->work_lock); __set_current_state(TASK_RUNNING); break; } - if (!list_empty(&dev->work_list)) { - work = list_first_entry(&dev->work_list, - struct vhost_work, node); - list_del_init(&work->node); - seq = work->queue_seq; - } else - work = NULL; - spin_unlock_irq(&dev->work_lock); - if (work) { + node = llist_del_all(&dev->work_list); + if (!node) + schedule(); + + node = llist_reverse_order(node); + /* make sure flag is seen after deletion */ + smp_wmb(); + llist_for_each_entry_safe(work, work_next, node, node) { + clear_bit(VHOST_WORK_QUEUED, &work->flags); __set_current_state(TASK_RUNNING); work->fn(work); if (need_resched()) schedule(); - } else - schedule(); - + } } unuse_mm(dev->mm); set_fs(oldfs); @@ -381,11 +404,16 @@ void vhost_dev_init(struct vhost_dev *dev, mutex_init(&dev->mutex); dev->log_ctx = NULL; dev->log_file = NULL; - dev->memory = NULL; + dev->umem = NULL; + dev->iotlb = NULL; dev->mm = NULL; - spin_lock_init(&dev->work_lock); - INIT_LIST_HEAD(&dev->work_list); dev->worker = NULL; + init_llist_head(&dev->work_list); + init_waitqueue_head(&dev->wait); + INIT_LIST_HEAD(&dev->read_list); + INIT_LIST_HEAD(&dev->pending_list); + spin_lock_init(&dev->iotlb_lock); + for (i = 0; i < dev->nvqs; ++i) { vq = dev->vqs[i]; @@ -486,27 +514,36 @@ err_mm: } EXPORT_SYMBOL_GPL(vhost_dev_set_owner); -struct vhost_memory *vhost_dev_reset_owner_prepare(void) +static void *vhost_kvzalloc(unsigned long size) { - return kmalloc(offsetof(struct vhost_memory, regions), GFP_KERNEL); + void *n = kzalloc(size, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT); + + if (!n) + n = vzalloc(size); + return n; +} + +struct vhost_umem *vhost_dev_reset_owner_prepare(void) +{ + return vhost_kvzalloc(sizeof(struct vhost_umem)); } EXPORT_SYMBOL_GPL(vhost_dev_reset_owner_prepare); /* Caller should have device mutex */ -void vhost_dev_reset_owner(struct vhost_dev *dev, struct vhost_memory *memory) +void vhost_dev_reset_owner(struct vhost_dev *dev, struct vhost_umem *umem) { int i; vhost_dev_cleanup(dev, true); /* Restore memory to default empty mapping. */ - memory->nregions = 0; - dev->memory = memory; + INIT_LIST_HEAD(&umem->umem_list); + dev->umem = umem; /* We don't need VQ locks below since vhost_dev_cleanup makes sure * VQs aren't running. */ for (i = 0; i < dev->nvqs; ++i) - dev->vqs[i]->memory = memory; + dev->vqs[i]->umem = umem; } EXPORT_SYMBOL_GPL(vhost_dev_reset_owner); @@ -523,6 +560,47 @@ void vhost_dev_stop(struct vhost_dev *dev) } EXPORT_SYMBOL_GPL(vhost_dev_stop); +static void vhost_umem_free(struct vhost_umem *umem, + struct vhost_umem_node *node) +{ + vhost_umem_interval_tree_remove(node, &umem->umem_tree); + list_del(&node->link); + kfree(node); + umem->numem--; +} + +static void vhost_umem_clean(struct vhost_umem *umem) +{ + struct vhost_umem_node *node, *tmp; + + if (!umem) + return; + + list_for_each_entry_safe(node, tmp, &umem->umem_list, link) + vhost_umem_free(umem, node); + + kvfree(umem); +} + +static void vhost_clear_msg(struct vhost_dev *dev) +{ + struct vhost_msg_node *node, *n; + + spin_lock(&dev->iotlb_lock); + + list_for_each_entry_safe(node, n, &dev->read_list, node) { + list_del(&node->node); + kfree(node); + } + + list_for_each_entry_safe(node, n, &dev->pending_list, node) { + list_del(&node->node); + kfree(node); + } + + spin_unlock(&dev->iotlb_lock); +} + /* Caller should have device mutex if and only if locked is set */ void vhost_dev_cleanup(struct vhost_dev *dev, bool locked) { @@ -549,9 +627,13 @@ void vhost_dev_cleanup(struct vhost_dev *dev, bool locked) fput(dev->log_file); dev->log_file = NULL; /* No one will access memory at this point */ - kvfree(dev->memory); - dev->memory = NULL; - WARN_ON(!list_empty(&dev->work_list)); + vhost_umem_clean(dev->umem); + dev->umem = NULL; + vhost_umem_clean(dev->iotlb); + dev->iotlb = NULL; + vhost_clear_msg(dev); + wake_up_interruptible_poll(&dev->wait, POLLIN | POLLRDNORM); + WARN_ON(!llist_empty(&dev->work_list)); if (dev->worker) { kthread_stop(dev->worker); dev->worker = NULL; @@ -575,26 +657,34 @@ static int log_access_ok(void __user *log_base, u64 addr, unsigned long sz) (sz + VHOST_PAGE_SIZE * 8 - 1) / VHOST_PAGE_SIZE / 8); } +static bool vhost_overflow(u64 uaddr, u64 size) +{ + /* Make sure 64 bit math will not overflow. */ + return uaddr > ULONG_MAX || size > ULONG_MAX || uaddr > ULONG_MAX - size; +} + /* Caller should have vq mutex and device mutex. */ -static int vq_memory_access_ok(void __user *log_base, struct vhost_memory *mem, +static int vq_memory_access_ok(void __user *log_base, struct vhost_umem *umem, int log_all) { - int i; + struct vhost_umem_node *node; - if (!mem) + if (!umem) return 0; - for (i = 0; i < mem->nregions; ++i) { - struct vhost_memory_region *m = mem->regions + i; - unsigned long a = m->userspace_addr; - if (m->memory_size > ULONG_MAX) + list_for_each_entry(node, &umem->umem_list, link) { + unsigned long a = node->userspace_addr; + + if (vhost_overflow(node->userspace_addr, node->size)) return 0; - else if (!access_ok(VERIFY_WRITE, (void __user *)a, - m->memory_size)) + + + if (!access_ok(VERIFY_WRITE, (void __user *)a, + node->size)) return 0; else if (log_all && !log_access_ok(log_base, - m->guest_phys_addr, - m->memory_size)) + node->start, + node->size)) return 0; } return 1; @@ -602,7 +692,7 @@ static int vq_memory_access_ok(void __user *log_base, struct vhost_memory *mem, /* Can we switch to this memory table? */ /* Caller should have device mutex but not vq mutex */ -static int memory_access_ok(struct vhost_dev *d, struct vhost_memory *mem, +static int memory_access_ok(struct vhost_dev *d, struct vhost_umem *umem, int log_all) { int i; @@ -615,7 +705,8 @@ static int memory_access_ok(struct vhost_dev *d, struct vhost_memory *mem, log = log_all || vhost_has_feature(d->vqs[i], VHOST_F_LOG_ALL); /* If ring is inactive, will check when it's enabled. */ if (d->vqs[i]->private_data) - ok = vq_memory_access_ok(d->vqs[i]->log_base, mem, log); + ok = vq_memory_access_ok(d->vqs[i]->log_base, + umem, log); else ok = 1; mutex_unlock(&d->vqs[i]->mutex); @@ -625,12 +716,388 @@ static int memory_access_ok(struct vhost_dev *d, struct vhost_memory *mem, return 1; } +static int translate_desc(struct vhost_virtqueue *vq, u64 addr, u32 len, + struct iovec iov[], int iov_size, int access); + +static int vhost_copy_to_user(struct vhost_virtqueue *vq, void *to, + const void *from, unsigned size) +{ + int ret; + + if (!vq->iotlb) + return __copy_to_user(to, from, size); + else { + /* This function should be called after iotlb + * prefetch, which means we're sure that all vq + * could be access through iotlb. So -EAGAIN should + * not happen in this case. + */ + /* TODO: more fast path */ + struct iov_iter t; + ret = translate_desc(vq, (u64)(uintptr_t)to, size, vq->iotlb_iov, + ARRAY_SIZE(vq->iotlb_iov), + VHOST_ACCESS_WO); + if (ret < 0) + goto out; + iov_iter_init(&t, WRITE, vq->iotlb_iov, ret, size); + ret = copy_to_iter(from, size, &t); + if (ret == size) + ret = 0; + } +out: + return ret; +} + +static int vhost_copy_from_user(struct vhost_virtqueue *vq, void *to, + void *from, unsigned size) +{ + int ret; + + if (!vq->iotlb) + return __copy_from_user(to, from, size); + else { + /* This function should be called after iotlb + * prefetch, which means we're sure that vq + * could be access through iotlb. So -EAGAIN should + * not happen in this case. + */ + /* TODO: more fast path */ + struct iov_iter f; + ret = translate_desc(vq, (u64)(uintptr_t)from, size, vq->iotlb_iov, + ARRAY_SIZE(vq->iotlb_iov), + VHOST_ACCESS_RO); + if (ret < 0) { + vq_err(vq, "IOTLB translation failure: uaddr " + "%p size 0x%llx\n", from, + (unsigned long long) size); + goto out; + } + iov_iter_init(&f, READ, vq->iotlb_iov, ret, size); + ret = copy_from_iter(to, size, &f); + if (ret == size) + ret = 0; + } + +out: + return ret; +} + +static void __user *__vhost_get_user(struct vhost_virtqueue *vq, + void *addr, unsigned size) +{ + int ret; + + /* This function should be called after iotlb + * prefetch, which means we're sure that vq + * could be access through iotlb. So -EAGAIN should + * not happen in this case. + */ + /* TODO: more fast path */ + ret = translate_desc(vq, (u64)(uintptr_t)addr, size, vq->iotlb_iov, + ARRAY_SIZE(vq->iotlb_iov), + VHOST_ACCESS_RO); + if (ret < 0) { + vq_err(vq, "IOTLB translation failure: uaddr " + "%p size 0x%llx\n", addr, + (unsigned long long) size); + return NULL; + } + + if (ret != 1 || vq->iotlb_iov[0].iov_len != size) { + vq_err(vq, "Non atomic userspace memory access: uaddr " + "%p size 0x%llx\n", addr, + (unsigned long long) size); + return NULL; + } + + return vq->iotlb_iov[0].iov_base; +} + +#define vhost_put_user(vq, x, ptr) \ +({ \ + int ret = -EFAULT; \ + if (!vq->iotlb) { \ + ret = __put_user(x, ptr); \ + } else { \ + __typeof__(ptr) to = \ + (__typeof__(ptr)) __vhost_get_user(vq, ptr, sizeof(*ptr)); \ + if (to != NULL) \ + ret = __put_user(x, to); \ + else \ + ret = -EFAULT; \ + } \ + ret; \ +}) + +#define vhost_get_user(vq, x, ptr) \ +({ \ + int ret; \ + if (!vq->iotlb) { \ + ret = __get_user(x, ptr); \ + } else { \ + __typeof__(ptr) from = \ + (__typeof__(ptr)) __vhost_get_user(vq, ptr, sizeof(*ptr)); \ + if (from != NULL) \ + ret = __get_user(x, from); \ + else \ + ret = -EFAULT; \ + } \ + ret; \ +}) + +static void vhost_dev_lock_vqs(struct vhost_dev *d) +{ + int i = 0; + for (i = 0; i < d->nvqs; ++i) + mutex_lock_nested(&d->vqs[i]->mutex, i); +} + +static void vhost_dev_unlock_vqs(struct vhost_dev *d) +{ + int i = 0; + for (i = 0; i < d->nvqs; ++i) + mutex_unlock(&d->vqs[i]->mutex); +} + +static int vhost_new_umem_range(struct vhost_umem *umem, + u64 start, u64 size, u64 end, + u64 userspace_addr, int perm) +{ + struct vhost_umem_node *tmp, *node = kmalloc(sizeof(*node), GFP_ATOMIC); + + if (!node) + return -ENOMEM; + + if (umem->numem == max_iotlb_entries) { + tmp = list_first_entry(&umem->umem_list, typeof(*tmp), link); + vhost_umem_free(umem, tmp); + } + + node->start = start; + node->size = size; + node->last = end; + node->userspace_addr = userspace_addr; + node->perm = perm; + INIT_LIST_HEAD(&node->link); + list_add_tail(&node->link, &umem->umem_list); + vhost_umem_interval_tree_insert(node, &umem->umem_tree); + umem->numem++; + + return 0; +} + +static void vhost_del_umem_range(struct vhost_umem *umem, + u64 start, u64 end) +{ + struct vhost_umem_node *node; + + while ((node = vhost_umem_interval_tree_iter_first(&umem->umem_tree, + start, end))) + vhost_umem_free(umem, node); +} + +static void vhost_iotlb_notify_vq(struct vhost_dev *d, + struct vhost_iotlb_msg *msg) +{ + struct vhost_msg_node *node, *n; + + spin_lock(&d->iotlb_lock); + + list_for_each_entry_safe(node, n, &d->pending_list, node) { + struct vhost_iotlb_msg *vq_msg = &node->msg.iotlb; + if (msg->iova <= vq_msg->iova && + msg->iova + msg->size - 1 >= vq_msg->iova && + vq_msg->type == VHOST_IOTLB_MISS) { + vhost_poll_queue(&node->vq->poll); + list_del(&node->node); + kfree(node); + } + } + + spin_unlock(&d->iotlb_lock); +} + +static int umem_access_ok(u64 uaddr, u64 size, int access) +{ + unsigned long a = uaddr; + + /* Make sure 64 bit math will not overflow. */ + if (vhost_overflow(uaddr, size)) + return -EFAULT; + + if ((access & VHOST_ACCESS_RO) && + !access_ok(VERIFY_READ, (void __user *)a, size)) + return -EFAULT; + if ((access & VHOST_ACCESS_WO) && + !access_ok(VERIFY_WRITE, (void __user *)a, size)) + return -EFAULT; + return 0; +} + +int vhost_process_iotlb_msg(struct vhost_dev *dev, + struct vhost_iotlb_msg *msg) +{ + int ret = 0; + + mutex_lock(&dev->mutex); + vhost_dev_lock_vqs(dev); + switch (msg->type) { + case VHOST_IOTLB_UPDATE: + if (!dev->iotlb) { + ret = -EFAULT; + break; + } + if (umem_access_ok(msg->uaddr, msg->size, msg->perm)) { + ret = -EFAULT; + break; + } + if (vhost_new_umem_range(dev->iotlb, msg->iova, msg->size, + msg->iova + msg->size - 1, + msg->uaddr, msg->perm)) { + ret = -ENOMEM; + break; + } + vhost_iotlb_notify_vq(dev, msg); + break; + case VHOST_IOTLB_INVALIDATE: + vhost_del_umem_range(dev->iotlb, msg->iova, + msg->iova + msg->size - 1); + break; + default: + ret = -EINVAL; + break; + } + + vhost_dev_unlock_vqs(dev); + mutex_unlock(&dev->mutex); + + return ret; +} +ssize_t vhost_chr_write_iter(struct vhost_dev *dev, + struct iov_iter *from) +{ + struct vhost_msg_node node; + unsigned size = sizeof(struct vhost_msg); + size_t ret; + int err; + + if (iov_iter_count(from) < size) + return 0; + ret = copy_from_iter(&node.msg, size, from); + if (ret != size) + goto done; + + switch (node.msg.type) { + case VHOST_IOTLB_MSG: + err = vhost_process_iotlb_msg(dev, &node.msg.iotlb); + if (err) + ret = err; + break; + default: + ret = -EINVAL; + break; + } + +done: + return ret; +} +EXPORT_SYMBOL(vhost_chr_write_iter); + +unsigned int vhost_chr_poll(struct file *file, struct vhost_dev *dev, + poll_table *wait) +{ + unsigned int mask = 0; + + poll_wait(file, &dev->wait, wait); + + if (!list_empty(&dev->read_list)) + mask |= POLLIN | POLLRDNORM; + + return mask; +} +EXPORT_SYMBOL(vhost_chr_poll); + +ssize_t vhost_chr_read_iter(struct vhost_dev *dev, struct iov_iter *to, + int noblock) +{ + DEFINE_WAIT(wait); + struct vhost_msg_node *node; + ssize_t ret = 0; + unsigned size = sizeof(struct vhost_msg); + + if (iov_iter_count(to) < size) + return 0; + + while (1) { + if (!noblock) + prepare_to_wait(&dev->wait, &wait, + TASK_INTERRUPTIBLE); + + node = vhost_dequeue_msg(dev, &dev->read_list); + if (node) + break; + if (noblock) { + ret = -EAGAIN; + break; + } + if (signal_pending(current)) { + ret = -ERESTARTSYS; + break; + } + if (!dev->iotlb) { + ret = -EBADFD; + break; + } + + schedule(); + } + + if (!noblock) + finish_wait(&dev->wait, &wait); + + if (node) { + ret = copy_to_iter(&node->msg, size, to); + + if (ret != size || node->msg.type != VHOST_IOTLB_MISS) { + kfree(node); + return ret; + } + + vhost_enqueue_msg(dev, &dev->pending_list, node); + } + + return ret; +} +EXPORT_SYMBOL_GPL(vhost_chr_read_iter); + +static int vhost_iotlb_miss(struct vhost_virtqueue *vq, u64 iova, int access) +{ + struct vhost_dev *dev = vq->dev; + struct vhost_msg_node *node; + struct vhost_iotlb_msg *msg; + + node = vhost_new_msg(vq, VHOST_IOTLB_MISS); + if (!node) + return -ENOMEM; + + msg = &node->msg.iotlb; + msg->type = VHOST_IOTLB_MISS; + msg->iova = iova; + msg->perm = access; + + vhost_enqueue_msg(dev, &dev->read_list, node); + + return 0; +} + static int vq_access_ok(struct vhost_virtqueue *vq, unsigned int num, struct vring_desc __user *desc, struct vring_avail __user *avail, struct vring_used __user *used) + { size_t s = vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; + return access_ok(VERIFY_READ, desc, num * sizeof *desc) && access_ok(VERIFY_READ, avail, sizeof *avail + num * sizeof *avail->ring + s) && @@ -638,11 +1105,59 @@ static int vq_access_ok(struct vhost_virtqueue *vq, unsigned int num, sizeof *used + num * sizeof *used->ring + s); } +static int iotlb_access_ok(struct vhost_virtqueue *vq, + int access, u64 addr, u64 len) +{ + const struct vhost_umem_node *node; + struct vhost_umem *umem = vq->iotlb; + u64 s = 0, size; + + while (len > s) { + node = vhost_umem_interval_tree_iter_first(&umem->umem_tree, + addr, + addr + len - 1); + if (node == NULL || node->start > addr) { + vhost_iotlb_miss(vq, addr, access); + return false; + } else if (!(node->perm & access)) { + /* Report the possible access violation by + * request another translation from userspace. + */ + return false; + } + + size = node->size - addr + node->start; + s += size; + addr += size; + } + + return true; +} + +int vq_iotlb_prefetch(struct vhost_virtqueue *vq) +{ + size_t s = vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; + unsigned int num = vq->num; + + if (!vq->iotlb) + return 1; + + return iotlb_access_ok(vq, VHOST_ACCESS_RO, (u64)(uintptr_t)vq->desc, + num * sizeof *vq->desc) && + iotlb_access_ok(vq, VHOST_ACCESS_RO, (u64)(uintptr_t)vq->avail, + sizeof *vq->avail + + num * sizeof *vq->avail->ring + s) && + iotlb_access_ok(vq, VHOST_ACCESS_WO, (u64)(uintptr_t)vq->used, + sizeof *vq->used + + num * sizeof *vq->used->ring + s); +} +EXPORT_SYMBOL_GPL(vq_iotlb_prefetch); + /* Can we log writes? */ /* Caller should have device mutex but not vq mutex */ int vhost_log_access_ok(struct vhost_dev *dev) { - return memory_access_ok(dev, dev->memory, 1); + return memory_access_ok(dev, dev->umem, 1); } EXPORT_SYMBOL_GPL(vhost_log_access_ok); @@ -653,7 +1168,7 @@ static int vq_log_access_ok(struct vhost_virtqueue *vq, { size_t s = vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; - return vq_memory_access_ok(log_base, vq->memory, + return vq_memory_access_ok(log_base, vq->umem, vhost_has_feature(vq, VHOST_F_LOG_ALL)) && (!vq->log_used || log_access_ok(log_base, vq->log_addr, sizeof *vq->used + @@ -664,33 +1179,36 @@ static int vq_log_access_ok(struct vhost_virtqueue *vq, /* Caller should have vq mutex and device mutex */ int vhost_vq_access_ok(struct vhost_virtqueue *vq) { - return vq_access_ok(vq, vq->num, vq->desc, vq->avail, vq->used) && - vq_log_access_ok(vq, vq->log_base); + if (!vq_log_access_ok(vq, vq->log_base)) + return 0; + + /* Access validation occurs at prefetch time with IOTLB */ + if (vq->iotlb) + return 1; + + return vq_access_ok(vq, vq->num, vq->desc, vq->avail, vq->used); } EXPORT_SYMBOL_GPL(vhost_vq_access_ok); -static int vhost_memory_reg_sort_cmp(const void *p1, const void *p2) +static struct vhost_umem *vhost_umem_alloc(void) { - const struct vhost_memory_region *r1 = p1, *r2 = p2; - if (r1->guest_phys_addr < r2->guest_phys_addr) - return 1; - if (r1->guest_phys_addr > r2->guest_phys_addr) - return -1; - return 0; -} + struct vhost_umem *umem = vhost_kvzalloc(sizeof(*umem)); -static void *vhost_kvzalloc(unsigned long size) -{ - void *n = kzalloc(size, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT); + if (!umem) + return NULL; - if (!n) - n = vzalloc(size); - return n; + umem->umem_tree = RB_ROOT; + umem->numem = 0; + INIT_LIST_HEAD(&umem->umem_list); + + return umem; } static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m) { - struct vhost_memory mem, *newmem, *oldmem; + struct vhost_memory mem, *newmem; + struct vhost_memory_region *region; + struct vhost_umem *newumem, *oldumem; unsigned long size = offsetof(struct vhost_memory, regions); int i; @@ -710,24 +1228,47 @@ static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m) kvfree(newmem); return -EFAULT; } - sort(newmem->regions, newmem->nregions, sizeof(*newmem->regions), - vhost_memory_reg_sort_cmp, NULL); - if (!memory_access_ok(d, newmem, 0)) { + newumem = vhost_umem_alloc(); + if (!newumem) { kvfree(newmem); - return -EFAULT; + return -ENOMEM; } - oldmem = d->memory; - d->memory = newmem; + + for (region = newmem->regions; + region < newmem->regions + mem.nregions; + region++) { + if (vhost_new_umem_range(newumem, + region->guest_phys_addr, + region->memory_size, + region->guest_phys_addr + + region->memory_size - 1, + region->userspace_addr, + VHOST_ACCESS_RW)) + goto err; + } + + if (!memory_access_ok(d, newumem, 0)) + goto err; + + oldumem = d->umem; + d->umem = newumem; /* All memory accesses are done under some VQ mutex. */ for (i = 0; i < d->nvqs; ++i) { mutex_lock(&d->vqs[i]->mutex); - d->vqs[i]->memory = newmem; + d->vqs[i]->umem = newumem; mutex_unlock(&d->vqs[i]->mutex); } - kvfree(oldmem); + + kvfree(newmem); + vhost_umem_clean(oldumem); return 0; + +err: + vhost_umem_clean(newumem); + kvfree(newmem); + return -EFAULT; } long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp) @@ -913,6 +1454,19 @@ long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp) case VHOST_GET_VRING_ENDIAN: r = vhost_get_vring_endian(vq, idx, argp); break; + case VHOST_SET_VRING_BUSYLOOP_TIMEOUT: + if (copy_from_user(&s, argp, sizeof(s))) { + r = -EFAULT; + break; + } + vq->busyloop_timeout = s.num; + break; + case VHOST_GET_VRING_BUSYLOOP_TIMEOUT: + s.index = idx; + s.num = vq->busyloop_timeout; + if (copy_to_user(argp, &s, sizeof(s))) + r = -EFAULT; + break; default: r = -ENOIOCTLCMD; } @@ -936,6 +1490,30 @@ long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp) } EXPORT_SYMBOL_GPL(vhost_vring_ioctl); +int vhost_init_device_iotlb(struct vhost_dev *d, bool enabled) +{ + struct vhost_umem *niotlb, *oiotlb; + int i; + + niotlb = vhost_umem_alloc(); + if (!niotlb) + return -ENOMEM; + + oiotlb = d->iotlb; + d->iotlb = niotlb; + + for (i = 0; i < d->nvqs; ++i) { + mutex_lock(&d->vqs[i]->mutex); + d->vqs[i]->iotlb = niotlb; + mutex_unlock(&d->vqs[i]->mutex); + } + + vhost_umem_clean(oiotlb); + + return 0; +} +EXPORT_SYMBOL_GPL(vhost_init_device_iotlb); + /* Caller must have device mutex */ long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp) { @@ -1018,28 +1596,6 @@ done: } EXPORT_SYMBOL_GPL(vhost_dev_ioctl); -static const struct vhost_memory_region *find_region(struct vhost_memory *mem, - __u64 addr, __u32 len) -{ - const struct vhost_memory_region *reg; - int start = 0, end = mem->nregions; - - while (start < end) { - int slot = start + (end - start) / 2; - reg = mem->regions + slot; - if (addr >= reg->guest_phys_addr) - end = slot; - else - start = slot + 1; - } - - reg = mem->regions + start; - if (addr >= reg->guest_phys_addr && - reg->guest_phys_addr + reg->memory_size > addr) - return reg; - return NULL; -} - /* TODO: This is really inefficient. We need something like get_user() * (instruction directly accesses the data, with an exception table entry * returning -EFAULT). See Documentation/x86/exception-tables.txt. @@ -1118,7 +1674,8 @@ EXPORT_SYMBOL_GPL(vhost_log_write); static int vhost_update_used_flags(struct vhost_virtqueue *vq) { void __user *used; - if (__put_user(cpu_to_vhost16(vq, vq->used_flags), &vq->used->flags) < 0) + if (vhost_put_user(vq, cpu_to_vhost16(vq, vq->used_flags), + &vq->used->flags) < 0) return -EFAULT; if (unlikely(vq->log_used)) { /* Make sure the flag is seen before log. */ @@ -1136,7 +1693,8 @@ static int vhost_update_used_flags(struct vhost_virtqueue *vq) static int vhost_update_avail_event(struct vhost_virtqueue *vq, u16 avail_event) { - if (__put_user(cpu_to_vhost16(vq, vq->avail_idx), vhost_avail_event(vq))) + if (vhost_put_user(vq, cpu_to_vhost16(vq, vq->avail_idx), + vhost_avail_event(vq))) return -EFAULT; if (unlikely(vq->log_used)) { void __user *used; @@ -1153,62 +1711,84 @@ static int vhost_update_avail_event(struct vhost_virtqueue *vq, u16 avail_event) return 0; } -int vhost_init_used(struct vhost_virtqueue *vq) +int vhost_vq_init_access(struct vhost_virtqueue *vq) { __virtio16 last_used_idx; int r; - if (!vq->private_data) { - vq->is_le = virtio_legacy_is_little_endian(); + bool is_le = vq->is_le; + + if (!vq->private_data) return 0; - } vhost_init_is_le(vq); r = vhost_update_used_flags(vq); if (r) - return r; + goto err; vq->signalled_used_valid = false; - if (!access_ok(VERIFY_READ, &vq->used->idx, sizeof vq->used->idx)) - return -EFAULT; - r = __get_user(last_used_idx, &vq->used->idx); - if (r) - return r; + if (!vq->iotlb && + !access_ok(VERIFY_READ, &vq->used->idx, sizeof vq->used->idx)) { + r = -EFAULT; + goto err; + } + r = vhost_get_user(vq, last_used_idx, &vq->used->idx); + if (r) { + vq_err(vq, "Can't access used idx at %p\n", + &vq->used->idx); + goto err; + } vq->last_used_idx = vhost16_to_cpu(vq, last_used_idx); return 0; + +err: + vq->is_le = is_le; + return r; } -EXPORT_SYMBOL_GPL(vhost_init_used); +EXPORT_SYMBOL_GPL(vhost_vq_init_access); static int translate_desc(struct vhost_virtqueue *vq, u64 addr, u32 len, - struct iovec iov[], int iov_size) + struct iovec iov[], int iov_size, int access) { - const struct vhost_memory_region *reg; - struct vhost_memory *mem; + const struct vhost_umem_node *node; + struct vhost_dev *dev = vq->dev; + struct vhost_umem *umem = dev->iotlb ? dev->iotlb : dev->umem; struct iovec *_iov; u64 s = 0; int ret = 0; - mem = vq->memory; while ((u64)len > s) { u64 size; if (unlikely(ret >= iov_size)) { ret = -ENOBUFS; break; } - reg = find_region(mem, addr, len); - if (unlikely(!reg)) { - ret = -EFAULT; + + node = vhost_umem_interval_tree_iter_first(&umem->umem_tree, + addr, addr + len - 1); + if (node == NULL || node->start > addr) { + if (umem != dev->iotlb) { + ret = -EFAULT; + break; + } + ret = -EAGAIN; + break; + } else if (!(node->perm & access)) { + ret = -EPERM; break; } + _iov = iov + ret; - size = reg->memory_size - addr + reg->guest_phys_addr; + size = node->size - addr + node->start; _iov->iov_len = min((u64)len - s, size); _iov->iov_base = (void __user *)(unsigned long) - (reg->userspace_addr + addr - reg->guest_phys_addr); + (node->userspace_addr + addr - node->start); s += size; addr += size; ++ret; } + if (ret == -EAGAIN) + vhost_iotlb_miss(vq, addr, access); return ret; } @@ -1243,7 +1823,7 @@ static int get_indirect(struct vhost_virtqueue *vq, unsigned int i = 0, count, found = 0; u32 len = vhost32_to_cpu(vq, indirect->len); struct iov_iter from; - int ret; + int ret, access; /* Sanity check */ if (unlikely(len % sizeof desc)) { @@ -1255,9 +1835,10 @@ static int get_indirect(struct vhost_virtqueue *vq, } ret = translate_desc(vq, vhost64_to_cpu(vq, indirect->addr), len, vq->indirect, - UIO_MAXIOV); + UIO_MAXIOV, VHOST_ACCESS_RO); if (unlikely(ret < 0)) { - vq_err(vq, "Translation failure %d in indirect.\n", ret); + if (ret != -EAGAIN) + vq_err(vq, "Translation failure %d in indirect.\n", ret); return ret; } iov_iter_init(&from, READ, vq->indirect, ret, len); @@ -1295,16 +1876,22 @@ static int get_indirect(struct vhost_virtqueue *vq, return -EINVAL; } + if (desc.flags & cpu_to_vhost16(vq, VRING_DESC_F_WRITE)) + access = VHOST_ACCESS_WO; + else + access = VHOST_ACCESS_RO; + ret = translate_desc(vq, vhost64_to_cpu(vq, desc.addr), vhost32_to_cpu(vq, desc.len), iov + iov_count, - iov_size - iov_count); + iov_size - iov_count, access); if (unlikely(ret < 0)) { - vq_err(vq, "Translation failure %d indirect idx %d\n", - ret, i); + if (ret != -EAGAIN) + vq_err(vq, "Translation failure %d indirect idx %d\n", + ret, i); return ret; } /* If this is an input descriptor, increment that count. */ - if (desc.flags & cpu_to_vhost16(vq, VRING_DESC_F_WRITE)) { + if (access == VHOST_ACCESS_WO) { *in_num += ret; if (unlikely(log)) { log[*log_num].addr = vhost64_to_cpu(vq, desc.addr); @@ -1343,11 +1930,11 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq, u16 last_avail_idx; __virtio16 avail_idx; __virtio16 ring_head; - int ret; + int ret, access; /* Check it isn't doing very strange things with descriptor numbers. */ last_avail_idx = vq->last_avail_idx; - if (unlikely(__get_user(avail_idx, &vq->avail->idx))) { + if (unlikely(vhost_get_user(vq, avail_idx, &vq->avail->idx))) { vq_err(vq, "Failed to access avail idx at %p\n", &vq->avail->idx); return -EFAULT; @@ -1369,8 +1956,8 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq, /* Grab the next descriptor number they're advertising, and increment * the index we've seen. */ - if (unlikely(__get_user(ring_head, - &vq->avail->ring[last_avail_idx & (vq->num - 1)]))) { + if (unlikely(vhost_get_user(vq, ring_head, + &vq->avail->ring[last_avail_idx & (vq->num - 1)]))) { vq_err(vq, "Failed to read head: idx %d address %p\n", last_avail_idx, &vq->avail->ring[last_avail_idx % vq->num]); @@ -1405,7 +1992,8 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq, i, vq->num, head); return -EINVAL; } - ret = __copy_from_user(&desc, vq->desc + i, sizeof desc); + ret = vhost_copy_from_user(vq, &desc, vq->desc + i, + sizeof desc); if (unlikely(ret)) { vq_err(vq, "Failed to get descriptor: idx %d addr %p\n", i, vq->desc + i); @@ -1416,22 +2004,28 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq, out_num, in_num, log, log_num, &desc); if (unlikely(ret < 0)) { - vq_err(vq, "Failure detected " - "in indirect descriptor at idx %d\n", i); + if (ret != -EAGAIN) + vq_err(vq, "Failure detected " + "in indirect descriptor at idx %d\n", i); return ret; } continue; } + if (desc.flags & cpu_to_vhost16(vq, VRING_DESC_F_WRITE)) + access = VHOST_ACCESS_WO; + else + access = VHOST_ACCESS_RO; ret = translate_desc(vq, vhost64_to_cpu(vq, desc.addr), vhost32_to_cpu(vq, desc.len), iov + iov_count, - iov_size - iov_count); + iov_size - iov_count, access); if (unlikely(ret < 0)) { - vq_err(vq, "Translation failure %d descriptor idx %d\n", - ret, i); + if (ret != -EAGAIN) + vq_err(vq, "Translation failure %d descriptor idx %d\n", + ret, i); return ret; } - if (desc.flags & cpu_to_vhost16(vq, VRING_DESC_F_WRITE)) { + if (access == VHOST_ACCESS_WO) { /* If this is an input descriptor, * increment that count. */ *in_num += ret; @@ -1493,15 +2087,15 @@ static int __vhost_add_used_n(struct vhost_virtqueue *vq, start = vq->last_used_idx & (vq->num - 1); used = vq->used->ring + start; if (count == 1) { - if (__put_user(heads[0].id, &used->id)) { + if (vhost_put_user(vq, heads[0].id, &used->id)) { vq_err(vq, "Failed to write used id"); return -EFAULT; } - if (__put_user(heads[0].len, &used->len)) { + if (vhost_put_user(vq, heads[0].len, &used->len)) { vq_err(vq, "Failed to write used len"); return -EFAULT; } - } else if (__copy_to_user(used, heads, count * sizeof *used)) { + } else if (vhost_copy_to_user(vq, used, heads, count * sizeof *used)) { vq_err(vq, "Failed to write used"); return -EFAULT; } @@ -1545,11 +2139,14 @@ int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads, /* Make sure buffer is written before we update index. */ smp_wmb(); - if (__put_user(cpu_to_vhost16(vq, vq->last_used_idx), &vq->used->idx)) { + if (vhost_put_user(vq, cpu_to_vhost16(vq, vq->last_used_idx), + &vq->used->idx)) { vq_err(vq, "Failed to increment used idx"); return -EFAULT; } if (unlikely(vq->log_used)) { + /* Make sure used idx is seen before log. */ + smp_wmb(); /* Log used index update. */ log_write(vq->log_base, vq->log_addr + offsetof(struct vring_used, idx), @@ -1577,7 +2174,7 @@ static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) if (!vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX)) { __virtio16 flags; - if (__get_user(flags, &vq->avail->flags)) { + if (vhost_get_user(vq, flags, &vq->avail->flags)) { vq_err(vq, "Failed to get flags"); return true; } @@ -1591,7 +2188,7 @@ static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) if (unlikely(!v)) return true; - if (__get_user(event, vhost_used_event(vq))) { + if (vhost_get_user(vq, event, vhost_used_event(vq))) { vq_err(vq, "Failed to get used event idx"); return true; } @@ -1627,6 +2224,20 @@ void vhost_add_used_and_signal_n(struct vhost_dev *dev, } EXPORT_SYMBOL_GPL(vhost_add_used_and_signal_n); +/* return true if we're sure that avaiable ring is empty */ +bool vhost_vq_avail_empty(struct vhost_dev *dev, struct vhost_virtqueue *vq) +{ + __virtio16 avail_idx; + int r; + + r = vhost_get_user(vq, avail_idx, &vq->avail->idx); + if (r) + return false; + + return vhost16_to_cpu(vq, avail_idx) == vq->avail_idx; +} +EXPORT_SYMBOL_GPL(vhost_vq_avail_empty); + /* OK, now we need to know about added descriptors. */ bool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) { @@ -1654,7 +2265,7 @@ bool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) /* They could have slipped one in as we were doing that: make * sure it's written, then check again. */ smp_mb(); - r = __get_user(avail_idx, &vq->avail->idx); + r = vhost_get_user(vq, avail_idx, &vq->avail->idx); if (r) { vq_err(vq, "Failed to check avail idx at %p: %d\n", &vq->avail->idx, r); @@ -1682,6 +2293,50 @@ void vhost_disable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) } EXPORT_SYMBOL_GPL(vhost_disable_notify); +/* Create a new message. */ +struct vhost_msg_node *vhost_new_msg(struct vhost_virtqueue *vq, int type) +{ + struct vhost_msg_node *node = kmalloc(sizeof *node, GFP_KERNEL); + if (!node) + return NULL; + + /* Make sure all padding within the structure is initialized. */ + memset(&node->msg, 0, sizeof node->msg); + node->vq = vq; + node->msg.type = type; + return node; +} +EXPORT_SYMBOL_GPL(vhost_new_msg); + +void vhost_enqueue_msg(struct vhost_dev *dev, struct list_head *head, + struct vhost_msg_node *node) +{ + spin_lock(&dev->iotlb_lock); + list_add_tail(&node->node, head); + spin_unlock(&dev->iotlb_lock); + + wake_up_interruptible_poll(&dev->wait, POLLIN | POLLRDNORM); +} +EXPORT_SYMBOL_GPL(vhost_enqueue_msg); + +struct vhost_msg_node *vhost_dequeue_msg(struct vhost_dev *dev, + struct list_head *head) +{ + struct vhost_msg_node *node = NULL; + + spin_lock(&dev->iotlb_lock); + if (!list_empty(head)) { + node = list_first_entry(head, struct vhost_msg_node, + node); + list_del(&node->node); + } + spin_unlock(&dev->iotlb_lock); + + return node; +} +EXPORT_SYMBOL_GPL(vhost_dequeue_msg); + + static int __init vhost_init(void) { return 0; diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index d3f767448a72..78f3c5fc02e4 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -15,13 +15,15 @@ struct vhost_work; typedef void (*vhost_work_fn_t)(struct vhost_work *work); +#define VHOST_WORK_QUEUED 1 struct vhost_work { - struct list_head node; + struct llist_node node; vhost_work_fn_t fn; wait_queue_head_t done; int flushing; unsigned queue_seq; unsigned done_seq; + unsigned long flags; }; /* Poll a file (eventfd or socket) */ @@ -37,6 +39,7 @@ struct vhost_poll { void vhost_work_init(struct vhost_work *work, vhost_work_fn_t fn); void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work); +bool vhost_has_work(struct vhost_dev *dev); void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn, unsigned long mask, struct vhost_dev *dev); @@ -52,6 +55,27 @@ struct vhost_log { u64 len; }; +#define START(node) ((node)->start) +#define LAST(node) ((node)->last) + +struct vhost_umem_node { + struct rb_node rb; + struct list_head link; + __u64 start; + __u64 last; + __u64 size; + __u64 userspace_addr; + __u32 perm; + __u32 flags_padding; + __u64 __subtree_last; +}; + +struct vhost_umem { + struct rb_root umem_tree; + struct list_head umem_list; + int numem; +}; + /* The virtqueue structure describes a queue attached to a device. */ struct vhost_virtqueue { struct vhost_dev *dev; @@ -97,10 +121,12 @@ struct vhost_virtqueue { u64 log_addr; struct iovec iov[UIO_MAXIOV]; + struct iovec iotlb_iov[64]; struct iovec *indirect; struct vring_used_elem *heads; /* Protected by virtqueue mutex. */ - struct vhost_memory *memory; + struct vhost_umem *umem; + struct vhost_umem *iotlb; void *private_data; u64 acked_features; /* Log write descriptors */ @@ -114,27 +140,38 @@ struct vhost_virtqueue { /* Ring endianness requested by userspace for cross-endian support. */ bool user_be; #endif + u32 busyloop_timeout; +}; + +struct vhost_msg_node { + struct vhost_msg msg; + struct vhost_virtqueue *vq; + struct list_head node; }; struct vhost_dev { - struct vhost_memory *memory; struct mm_struct *mm; struct mutex mutex; struct vhost_virtqueue **vqs; int nvqs; struct file *log_file; struct eventfd_ctx *log_ctx; - spinlock_t work_lock; - struct list_head work_list; + struct llist_head work_list; struct task_struct *worker; + struct vhost_umem *umem; + struct vhost_umem *iotlb; + spinlock_t iotlb_lock; + struct list_head read_list; + struct list_head pending_list; + wait_queue_head_t wait; }; void vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue **vqs, int nvqs); long vhost_dev_set_owner(struct vhost_dev *dev); bool vhost_dev_has_owner(struct vhost_dev *dev); long vhost_dev_check_owner(struct vhost_dev *); -struct vhost_memory *vhost_dev_reset_owner_prepare(void); -void vhost_dev_reset_owner(struct vhost_dev *, struct vhost_memory *); +struct vhost_umem *vhost_dev_reset_owner_prepare(void); +void vhost_dev_reset_owner(struct vhost_dev *, struct vhost_umem *); void vhost_dev_cleanup(struct vhost_dev *, bool locked); void vhost_dev_stop(struct vhost_dev *); long vhost_dev_ioctl(struct vhost_dev *, unsigned int ioctl, void __user *argp); @@ -148,7 +185,7 @@ int vhost_get_vq_desc(struct vhost_virtqueue *, struct vhost_log *log, unsigned int *log_num); void vhost_discard_vq_desc(struct vhost_virtqueue *, int n); -int vhost_init_used(struct vhost_virtqueue *); +int vhost_vq_init_access(struct vhost_virtqueue *); int vhost_add_used(struct vhost_virtqueue *, unsigned int head, int len); int vhost_add_used_n(struct vhost_virtqueue *, struct vring_used_elem *heads, unsigned count); @@ -158,10 +195,26 @@ void vhost_add_used_and_signal_n(struct vhost_dev *, struct vhost_virtqueue *, struct vring_used_elem *heads, unsigned count); void vhost_signal(struct vhost_dev *, struct vhost_virtqueue *); void vhost_disable_notify(struct vhost_dev *, struct vhost_virtqueue *); +bool vhost_vq_avail_empty(struct vhost_dev *, struct vhost_virtqueue *); bool vhost_enable_notify(struct vhost_dev *, struct vhost_virtqueue *); int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log, unsigned int log_num, u64 len); +int vq_iotlb_prefetch(struct vhost_virtqueue *vq); + +struct vhost_msg_node *vhost_new_msg(struct vhost_virtqueue *vq, int type); +void vhost_enqueue_msg(struct vhost_dev *dev, + struct list_head *head, + struct vhost_msg_node *node); +struct vhost_msg_node *vhost_dequeue_msg(struct vhost_dev *dev, + struct list_head *head); +unsigned int vhost_chr_poll(struct file *file, struct vhost_dev *dev, + poll_table *wait); +ssize_t vhost_chr_read_iter(struct vhost_dev *dev, struct iov_iter *to, + int noblock); +ssize_t vhost_chr_write_iter(struct vhost_dev *dev, + struct iov_iter *from); +int vhost_init_device_iotlb(struct vhost_dev *d, bool enabled); #define vq_err(vq, fmt, ...) do { \ pr_debug(pr_fmt(fmt), ##__VA_ARGS__); \ diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c new file mode 100644 index 000000000000..72e914de473e --- /dev/null +++ b/drivers/vhost/vsock.c @@ -0,0 +1,797 @@ +/* + * vhost transport for vsock + * + * Copyright (C) 2013-2015 Red Hat, Inc. + * Author: Asias He + * Stefan Hajnoczi + * + * This work is licensed under the terms of the GNU GPL, version 2. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include "vhost.h" + +#define VHOST_VSOCK_DEFAULT_HOST_CID 2 + +enum { + VHOST_VSOCK_FEATURES = VHOST_FEATURES, +}; + +/* Used to track all the vhost_vsock instances on the system. */ +static DEFINE_SPINLOCK(vhost_vsock_lock); +static DEFINE_READ_MOSTLY_HASHTABLE(vhost_vsock_hash, 8); + +struct vhost_vsock { + struct vhost_dev dev; + struct vhost_virtqueue vqs[2]; + + /* Link to global vhost_vsock_hash, writes use vhost_vsock_lock */ + struct hlist_node hash; + + struct vhost_work send_pkt_work; + spinlock_t send_pkt_list_lock; + struct list_head send_pkt_list; /* host->guest pending packets */ + + atomic_t queued_replies; + + u32 guest_cid; +}; + +static u32 vhost_transport_get_local_cid(void) +{ + return VHOST_VSOCK_DEFAULT_HOST_CID; +} + +/* Callers that dereference the return value must hold vhost_vsock_lock or the + * RCU read lock. + */ +static struct vhost_vsock *vhost_vsock_get(u32 guest_cid) +{ + struct vhost_vsock *vsock; + + hash_for_each_possible_rcu(vhost_vsock_hash, vsock, hash, guest_cid) { + u32 other_cid = vsock->guest_cid; + + /* Skip instances that have no CID yet */ + if (other_cid == 0) + continue; + + if (other_cid == guest_cid) { + return vsock; + } + } + + return NULL; +} + +static void +vhost_transport_do_send_pkt(struct vhost_vsock *vsock, + struct vhost_virtqueue *vq) +{ + struct vhost_virtqueue *tx_vq = &vsock->vqs[VSOCK_VQ_TX]; + bool added = false; + bool restart_tx = false; + + mutex_lock(&vq->mutex); + + if (!vq->private_data) + goto out; + + /* Avoid further vmexits, we're already processing the virtqueue */ + vhost_disable_notify(&vsock->dev, vq); + + for (;;) { + struct virtio_vsock_pkt *pkt; + struct iov_iter iov_iter; + unsigned out, in; + size_t nbytes; + size_t len; + int head; + + spin_lock_bh(&vsock->send_pkt_list_lock); + if (list_empty(&vsock->send_pkt_list)) { + spin_unlock_bh(&vsock->send_pkt_list_lock); + vhost_enable_notify(&vsock->dev, vq); + break; + } + + pkt = list_first_entry(&vsock->send_pkt_list, + struct virtio_vsock_pkt, list); + list_del_init(&pkt->list); + spin_unlock_bh(&vsock->send_pkt_list_lock); + + head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), + &out, &in, NULL, NULL); + if (head < 0) { + spin_lock_bh(&vsock->send_pkt_list_lock); + list_add(&pkt->list, &vsock->send_pkt_list); + spin_unlock_bh(&vsock->send_pkt_list_lock); + break; + } + + if (head == vq->num) { + spin_lock_bh(&vsock->send_pkt_list_lock); + list_add(&pkt->list, &vsock->send_pkt_list); + spin_unlock_bh(&vsock->send_pkt_list_lock); + + /* We cannot finish yet if more buffers snuck in while + * re-enabling notify. + */ + if (unlikely(vhost_enable_notify(&vsock->dev, vq))) { + vhost_disable_notify(&vsock->dev, vq); + continue; + } + break; + } + + if (out) { + virtio_transport_free_pkt(pkt); + vq_err(vq, "Expected 0 output buffers, got %u\n", out); + break; + } + + len = iov_length(&vq->iov[out], in); + iov_iter_init(&iov_iter, READ, &vq->iov[out], in, len); + + nbytes = copy_to_iter(&pkt->hdr, sizeof(pkt->hdr), &iov_iter); + if (nbytes != sizeof(pkt->hdr)) { + virtio_transport_free_pkt(pkt); + vq_err(vq, "Faulted on copying pkt hdr\n"); + break; + } + + nbytes = copy_to_iter(pkt->buf, pkt->len, &iov_iter); + if (nbytes != pkt->len) { + virtio_transport_free_pkt(pkt); + vq_err(vq, "Faulted on copying pkt buf\n"); + break; + } + + vhost_add_used(vq, head, sizeof(pkt->hdr) + pkt->len); + added = true; + + if (pkt->reply) { + int val; + + val = atomic_dec_return(&vsock->queued_replies); + + /* Do we have resources to resume tx processing? */ + if (val + 1 == tx_vq->num) + restart_tx = true; + } + + virtio_transport_free_pkt(pkt); + } + if (added) + vhost_signal(&vsock->dev, vq); + +out: + mutex_unlock(&vq->mutex); + + if (restart_tx) + vhost_poll_queue(&tx_vq->poll); +} + +static void vhost_transport_send_pkt_work(struct vhost_work *work) +{ + struct vhost_virtqueue *vq; + struct vhost_vsock *vsock; + + vsock = container_of(work, struct vhost_vsock, send_pkt_work); + vq = &vsock->vqs[VSOCK_VQ_RX]; + + vhost_transport_do_send_pkt(vsock, vq); +} + +static int +vhost_transport_send_pkt(struct virtio_vsock_pkt *pkt) +{ + struct vhost_vsock *vsock; + struct vhost_virtqueue *vq; + int len = pkt->len; + + rcu_read_lock(); + + /* Find the vhost_vsock according to guest context id */ + vsock = vhost_vsock_get(le64_to_cpu(pkt->hdr.dst_cid)); + if (!vsock) { + rcu_read_unlock(); + virtio_transport_free_pkt(pkt); + return -ENODEV; + } + + vq = &vsock->vqs[VSOCK_VQ_RX]; + + if (pkt->reply) + atomic_inc(&vsock->queued_replies); + + spin_lock_bh(&vsock->send_pkt_list_lock); + list_add_tail(&pkt->list, &vsock->send_pkt_list); + spin_unlock_bh(&vsock->send_pkt_list_lock); + + vhost_work_queue(&vsock->dev, &vsock->send_pkt_work); + + rcu_read_unlock(); + return len; +} + +static int +vhost_transport_cancel_pkt(struct vsock_sock *vsk) +{ + struct vhost_vsock *vsock; + struct virtio_vsock_pkt *pkt, *n; + int cnt = 0; + int ret = -ENODEV; + LIST_HEAD(freeme); + + rcu_read_lock(); + + /* Find the vhost_vsock according to guest context id */ + vsock = vhost_vsock_get(vsk->remote_addr.svm_cid); + if (!vsock) + goto out; + + spin_lock_bh(&vsock->send_pkt_list_lock); + list_for_each_entry_safe(pkt, n, &vsock->send_pkt_list, list) { + if (pkt->vsk != vsk) + continue; + list_move(&pkt->list, &freeme); + } + spin_unlock_bh(&vsock->send_pkt_list_lock); + + list_for_each_entry_safe(pkt, n, &freeme, list) { + if (pkt->reply) + cnt++; + list_del(&pkt->list); + virtio_transport_free_pkt(pkt); + } + + if (cnt) { + struct vhost_virtqueue *tx_vq = &vsock->vqs[VSOCK_VQ_TX]; + int new_cnt; + + new_cnt = atomic_sub_return(cnt, &vsock->queued_replies); + if (new_cnt + cnt >= tx_vq->num && new_cnt < tx_vq->num) + vhost_poll_queue(&tx_vq->poll); + } + + ret = 0; +out: + rcu_read_unlock(); + return ret; +} + +static struct virtio_vsock_pkt * +vhost_vsock_alloc_pkt(struct vhost_virtqueue *vq, + unsigned int out, unsigned int in) +{ + struct virtio_vsock_pkt *pkt; + struct iov_iter iov_iter; + size_t nbytes; + size_t len; + + if (in != 0) { + vq_err(vq, "Expected 0 input buffers, got %u\n", in); + return NULL; + } + + pkt = kzalloc(sizeof(*pkt), GFP_KERNEL); + if (!pkt) + return NULL; + + len = iov_length(vq->iov, out); + iov_iter_init(&iov_iter, WRITE, vq->iov, out, len); + + nbytes = copy_from_iter(&pkt->hdr, sizeof(pkt->hdr), &iov_iter); + if (nbytes != sizeof(pkt->hdr)) { + vq_err(vq, "Expected %zu bytes for pkt->hdr, got %zu bytes\n", + sizeof(pkt->hdr), nbytes); + kfree(pkt); + return NULL; + } + + if (le16_to_cpu(pkt->hdr.type) == VIRTIO_VSOCK_TYPE_STREAM) + pkt->len = le32_to_cpu(pkt->hdr.len); + + /* No payload */ + if (!pkt->len) + return pkt; + + /* The pkt is too big */ + if (pkt->len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) { + kfree(pkt); + return NULL; + } + + pkt->buf = kmalloc(pkt->len, GFP_KERNEL); + if (!pkt->buf) { + kfree(pkt); + return NULL; + } + + nbytes = copy_from_iter(pkt->buf, pkt->len, &iov_iter); + if (nbytes != pkt->len) { + vq_err(vq, "Expected %u byte payload, got %zu bytes\n", + pkt->len, nbytes); + virtio_transport_free_pkt(pkt); + return NULL; + } + + return pkt; +} + +/* Is there space left for replies to rx packets? */ +static bool vhost_vsock_more_replies(struct vhost_vsock *vsock) +{ + struct vhost_virtqueue *vq = &vsock->vqs[VSOCK_VQ_TX]; + int val; + + smp_rmb(); /* paired with atomic_inc() and atomic_dec_return() */ + val = atomic_read(&vsock->queued_replies); + + return val < vq->num; +} + +static void vhost_vsock_handle_tx_kick(struct vhost_work *work) +{ + struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue, + poll.work); + struct vhost_vsock *vsock = container_of(vq->dev, struct vhost_vsock, + dev); + struct virtio_vsock_pkt *pkt; + int head; + unsigned int out, in; + bool added = false; + + mutex_lock(&vq->mutex); + + if (!vq->private_data) + goto out; + + vhost_disable_notify(&vsock->dev, vq); + for (;;) { + u32 len; + + if (!vhost_vsock_more_replies(vsock)) { + /* Stop tx until the device processes already + * pending replies. Leave tx virtqueue + * callbacks disabled. + */ + goto no_more_replies; + } + + head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), + &out, &in, NULL, NULL); + if (head < 0) + break; + + if (head == vq->num) { + if (unlikely(vhost_enable_notify(&vsock->dev, vq))) { + vhost_disable_notify(&vsock->dev, vq); + continue; + } + break; + } + + pkt = vhost_vsock_alloc_pkt(vq, out, in); + if (!pkt) { + vq_err(vq, "Faulted on pkt\n"); + continue; + } + + len = pkt->len; + + /* Only accept correctly addressed packets */ + if (le64_to_cpu(pkt->hdr.src_cid) == vsock->guest_cid) + virtio_transport_recv_pkt(pkt); + else + virtio_transport_free_pkt(pkt); + + vhost_add_used(vq, head, sizeof(pkt->hdr) + len); + added = true; + } + +no_more_replies: + if (added) + vhost_signal(&vsock->dev, vq); + +out: + mutex_unlock(&vq->mutex); +} + +static void vhost_vsock_handle_rx_kick(struct vhost_work *work) +{ + struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue, + poll.work); + struct vhost_vsock *vsock = container_of(vq->dev, struct vhost_vsock, + dev); + + vhost_transport_do_send_pkt(vsock, vq); +} + +static int vhost_vsock_start(struct vhost_vsock *vsock) +{ + struct vhost_virtqueue *vq; + size_t i; + int ret; + + mutex_lock(&vsock->dev.mutex); + + ret = vhost_dev_check_owner(&vsock->dev); + if (ret) + goto err; + + for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) { + vq = &vsock->vqs[i]; + + mutex_lock(&vq->mutex); + + if (!vhost_vq_access_ok(vq)) { + ret = -EFAULT; + goto err_vq; + } + + if (!vq->private_data) { + vq->private_data = vsock; + ret = vhost_vq_init_access(vq); + if (ret) + goto err_vq; + } + + mutex_unlock(&vq->mutex); + } + + mutex_unlock(&vsock->dev.mutex); + return 0; + +err_vq: + vq->private_data = NULL; + mutex_unlock(&vq->mutex); + + for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) { + vq = &vsock->vqs[i]; + + mutex_lock(&vq->mutex); + vq->private_data = NULL; + mutex_unlock(&vq->mutex); + } +err: + mutex_unlock(&vsock->dev.mutex); + return ret; +} + +static int vhost_vsock_stop(struct vhost_vsock *vsock) +{ + size_t i; + int ret; + + mutex_lock(&vsock->dev.mutex); + + ret = vhost_dev_check_owner(&vsock->dev); + if (ret) + goto err; + + for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) { + struct vhost_virtqueue *vq = &vsock->vqs[i]; + + mutex_lock(&vq->mutex); + vq->private_data = NULL; + mutex_unlock(&vq->mutex); + } + +err: + mutex_unlock(&vsock->dev.mutex); + return ret; +} + +static void vhost_vsock_free(struct vhost_vsock *vsock) +{ + kvfree(vsock); +} + +static int vhost_vsock_dev_open(struct inode *inode, struct file *file) +{ + struct vhost_virtqueue **vqs; + struct vhost_vsock *vsock; + int ret; + + /* This struct is large and allocation could fail, fall back to vmalloc + * if there is no other way. + */ + vsock = kzalloc(sizeof(*vsock), GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT); + if (!vsock) { + vsock = vmalloc(sizeof(*vsock)); + if (!vsock) + return -ENOMEM; + } + + vqs = kmalloc_array(ARRAY_SIZE(vsock->vqs), sizeof(*vqs), GFP_KERNEL); + if (!vqs) { + ret = -ENOMEM; + goto out; + } + + vsock->guest_cid = 0; /* no CID assigned yet */ + + atomic_set(&vsock->queued_replies, 0); + + vqs[VSOCK_VQ_TX] = &vsock->vqs[VSOCK_VQ_TX]; + vqs[VSOCK_VQ_RX] = &vsock->vqs[VSOCK_VQ_RX]; + vsock->vqs[VSOCK_VQ_TX].handle_kick = vhost_vsock_handle_tx_kick; + vsock->vqs[VSOCK_VQ_RX].handle_kick = vhost_vsock_handle_rx_kick; + + vhost_dev_init(&vsock->dev, vqs, ARRAY_SIZE(vsock->vqs)); + + file->private_data = vsock; + spin_lock_init(&vsock->send_pkt_list_lock); + INIT_LIST_HEAD(&vsock->send_pkt_list); + vhost_work_init(&vsock->send_pkt_work, vhost_transport_send_pkt_work); + return 0; + +out: + vhost_vsock_free(vsock); + return ret; +} + +static void vhost_vsock_flush(struct vhost_vsock *vsock) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) + if (vsock->vqs[i].handle_kick) + vhost_poll_flush(&vsock->vqs[i].poll); + vhost_work_flush(&vsock->dev, &vsock->send_pkt_work); +} + +static void vhost_vsock_reset_orphans(struct sock *sk) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + /* vmci_transport.c doesn't take sk_lock here either. At least we're + * under vsock_table_lock so the sock cannot disappear while we're + * executing. + */ + + /* If the peer is still valid, no need to reset connection */ + if (vhost_vsock_get(vsk->remote_addr.svm_cid)) + return; + + /* If the close timeout is pending, let it expire. This avoids races + * with the timeout callback. + */ + if (vsk->close_work_scheduled) + return; + + sock_set_flag(sk, SOCK_DONE); + vsk->peer_shutdown = SHUTDOWN_MASK; + sk->sk_state = SS_UNCONNECTED; + sk->sk_err = ECONNRESET; + sk->sk_error_report(sk); +} + +static int vhost_vsock_dev_release(struct inode *inode, struct file *file) +{ + struct vhost_vsock *vsock = file->private_data; + + spin_lock_bh(&vhost_vsock_lock); + if (vsock->guest_cid) + hash_del_rcu(&vsock->hash); + spin_unlock_bh(&vhost_vsock_lock); + + /* Wait for other CPUs to finish using vsock */ + synchronize_rcu(); + + /* Iterating over all connections for all CIDs to find orphans is + * inefficient. Room for improvement here. */ + vsock_for_each_connected_socket(vhost_vsock_reset_orphans); + + vhost_vsock_stop(vsock); + vhost_vsock_flush(vsock); + vhost_dev_stop(&vsock->dev); + + spin_lock_bh(&vsock->send_pkt_list_lock); + while (!list_empty(&vsock->send_pkt_list)) { + struct virtio_vsock_pkt *pkt; + + pkt = list_first_entry(&vsock->send_pkt_list, + struct virtio_vsock_pkt, list); + list_del_init(&pkt->list); + virtio_transport_free_pkt(pkt); + } + spin_unlock_bh(&vsock->send_pkt_list_lock); + + vhost_dev_cleanup(&vsock->dev, false); + kfree(vsock->dev.vqs); + vhost_vsock_free(vsock); + return 0; +} + +static int vhost_vsock_set_cid(struct vhost_vsock *vsock, u64 guest_cid) +{ + struct vhost_vsock *other; + + /* Refuse reserved CIDs */ + if (guest_cid <= VMADDR_CID_HOST || + guest_cid == U32_MAX) + return -EINVAL; + + /* 64-bit CIDs are not yet supported */ + if (guest_cid > U32_MAX) + return -EINVAL; + + /* Refuse if CID is already in use */ + spin_lock_bh(&vhost_vsock_lock); + other = vhost_vsock_get(guest_cid); + if (other && other != vsock) { + spin_unlock_bh(&vhost_vsock_lock); + return -EADDRINUSE; + } + + if (vsock->guest_cid) + hash_del_rcu(&vsock->hash); + + vsock->guest_cid = guest_cid; + hash_add_rcu(vhost_vsock_hash, &vsock->hash, guest_cid); + spin_unlock_bh(&vhost_vsock_lock); + + return 0; +} + +static int vhost_vsock_set_features(struct vhost_vsock *vsock, u64 features) +{ + struct vhost_virtqueue *vq; + int i; + + if (features & ~VHOST_VSOCK_FEATURES) + return -EOPNOTSUPP; + + mutex_lock(&vsock->dev.mutex); + if ((features & (1 << VHOST_F_LOG_ALL)) && + !vhost_log_access_ok(&vsock->dev)) { + mutex_unlock(&vsock->dev.mutex); + return -EFAULT; + } + + for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) { + vq = &vsock->vqs[i]; + mutex_lock(&vq->mutex); + vq->acked_features = features; + mutex_unlock(&vq->mutex); + } + mutex_unlock(&vsock->dev.mutex); + return 0; +} + +static long vhost_vsock_dev_ioctl(struct file *f, unsigned int ioctl, + unsigned long arg) +{ + struct vhost_vsock *vsock = f->private_data; + void __user *argp = (void __user *)arg; + u64 guest_cid; + u64 features; + int start; + int r; + + switch (ioctl) { + case VHOST_VSOCK_SET_GUEST_CID: + if (copy_from_user(&guest_cid, argp, sizeof(guest_cid))) + return -EFAULT; + return vhost_vsock_set_cid(vsock, guest_cid); + case VHOST_VSOCK_SET_RUNNING: + if (copy_from_user(&start, argp, sizeof(start))) + return -EFAULT; + if (start) + return vhost_vsock_start(vsock); + else + return vhost_vsock_stop(vsock); + case VHOST_GET_FEATURES: + features = VHOST_VSOCK_FEATURES; + if (copy_to_user(argp, &features, sizeof(features))) + return -EFAULT; + return 0; + case VHOST_SET_FEATURES: + if (copy_from_user(&features, argp, sizeof(features))) + return -EFAULT; + return vhost_vsock_set_features(vsock, features); + default: + mutex_lock(&vsock->dev.mutex); + r = vhost_dev_ioctl(&vsock->dev, ioctl, argp); + if (r == -ENOIOCTLCMD) + r = vhost_vring_ioctl(&vsock->dev, ioctl, argp); + else + vhost_vsock_flush(vsock); + mutex_unlock(&vsock->dev.mutex); + return r; + } +} + +static const struct file_operations vhost_vsock_fops = { + .owner = THIS_MODULE, + .open = vhost_vsock_dev_open, + .release = vhost_vsock_dev_release, + .llseek = noop_llseek, + .unlocked_ioctl = vhost_vsock_dev_ioctl, +}; + +static struct miscdevice vhost_vsock_misc = { + .minor = MISC_DYNAMIC_MINOR, + .name = "vhost-vsock", + .fops = &vhost_vsock_fops, +}; + +static struct virtio_transport vhost_transport = { + .transport = { + .get_local_cid = vhost_transport_get_local_cid, + + .init = virtio_transport_do_socket_init, + .destruct = virtio_transport_destruct, + .release = virtio_transport_release, + .connect = virtio_transport_connect, + .shutdown = virtio_transport_shutdown, + .cancel_pkt = vhost_transport_cancel_pkt, + + .dgram_enqueue = virtio_transport_dgram_enqueue, + .dgram_dequeue = virtio_transport_dgram_dequeue, + .dgram_bind = virtio_transport_dgram_bind, + .dgram_allow = virtio_transport_dgram_allow, + + .stream_enqueue = virtio_transport_stream_enqueue, + .stream_dequeue = virtio_transport_stream_dequeue, + .stream_has_data = virtio_transport_stream_has_data, + .stream_has_space = virtio_transport_stream_has_space, + .stream_rcvhiwat = virtio_transport_stream_rcvhiwat, + .stream_is_active = virtio_transport_stream_is_active, + .stream_allow = virtio_transport_stream_allow, + + .notify_poll_in = virtio_transport_notify_poll_in, + .notify_poll_out = virtio_transport_notify_poll_out, + .notify_recv_init = virtio_transport_notify_recv_init, + .notify_recv_pre_block = virtio_transport_notify_recv_pre_block, + .notify_recv_pre_dequeue = virtio_transport_notify_recv_pre_dequeue, + .notify_recv_post_dequeue = virtio_transport_notify_recv_post_dequeue, + .notify_send_init = virtio_transport_notify_send_init, + .notify_send_pre_block = virtio_transport_notify_send_pre_block, + .notify_send_pre_enqueue = virtio_transport_notify_send_pre_enqueue, + .notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue, + + .set_buffer_size = virtio_transport_set_buffer_size, + .set_min_buffer_size = virtio_transport_set_min_buffer_size, + .set_max_buffer_size = virtio_transport_set_max_buffer_size, + .get_buffer_size = virtio_transport_get_buffer_size, + .get_min_buffer_size = virtio_transport_get_min_buffer_size, + .get_max_buffer_size = virtio_transport_get_max_buffer_size, + }, + + .send_pkt = vhost_transport_send_pkt, +}; + +static int __init vhost_vsock_init(void) +{ + int ret; + + ret = vsock_core_init(&vhost_transport.transport); + if (ret < 0) + return ret; + return misc_register(&vhost_vsock_misc); +}; + +static void __exit vhost_vsock_exit(void) +{ + misc_deregister(&vhost_vsock_misc); + vsock_core_exit(); +}; + +module_init(vhost_vsock_init); +module_exit(vhost_vsock_exit); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Asias He"); +MODULE_DESCRIPTION("vhost transport for vsock "); diff --git a/drivers/video/console/fbcon.c b/drivers/video/console/fbcon.c index 4e3c78d88832..c03c5b9602bb 100644 --- a/drivers/video/console/fbcon.c +++ b/drivers/video/console/fbcon.c @@ -3032,7 +3032,7 @@ static int fbcon_fb_unbind(int idx) for (i = first_fb_vc; i <= last_fb_vc; i++) { if (con2fb_map[i] != idx && con2fb_map[i] != -1) { - new_idx = i; + new_idx = con2fb_map[i]; break; } } diff --git a/drivers/video/fbdev/clps711x-fb.c b/drivers/video/fbdev/clps711x-fb.c index 649b32f78c08..c55109524fd5 100644 --- a/drivers/video/fbdev/clps711x-fb.c +++ b/drivers/video/fbdev/clps711x-fb.c @@ -287,14 +287,17 @@ static int clps711x_fb_probe(struct platform_device *pdev) } ret = of_get_fb_videomode(disp, &cfb->mode, OF_USE_NATIVE_MODE); - if (ret) + if (ret) { + of_node_put(disp); goto out_fb_release; + } of_property_read_u32(disp, "ac-prescale", &cfb->ac_prescale); cfb->cmap_invert = of_property_read_bool(disp, "cmap-invert"); ret = of_property_read_u32(disp, "bits-per-pixel", &info->var.bits_per_pixel); + of_node_put(disp); if (ret) goto out_fb_release; diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c index c682f441282a..f5088bdbf8ee 100644 --- a/drivers/video/fbdev/core/fbmem.c +++ b/drivers/video/fbdev/core/fbmem.c @@ -426,6 +426,9 @@ static void fb_do_show_logo(struct fb_info *info, struct fb_image *image, { unsigned int x; + if (image->width > info->var.xres || image->height > info->var.yres) + return; + if (rotate == FB_ROTATE_UR) { for (x = 0; x < num && image->dx + image->width <= info->var.xres; @@ -434,7 +437,9 @@ static void fb_do_show_logo(struct fb_info *info, struct fb_image *image, image->dx += image->width + 8; } } else if (rotate == FB_ROTATE_UD) { - for (x = 0; x < num; x++) { + u32 dx = image->dx; + + for (x = 0; x < num && image->dx <= dx; x++) { info->fbops->fb_imageblit(info, image); image->dx -= image->width + 8; } @@ -446,7 +451,9 @@ static void fb_do_show_logo(struct fb_info *info, struct fb_image *image, image->dy += image->height + 8; } } else if (rotate == FB_ROTATE_CCW) { - for (x = 0; x < num; x++) { + u32 dy = image->dy; + + for (x = 0; x < num && image->dy <= dy; x++) { info->fbops->fb_imageblit(info, image); image->dy -= image->height + 8; } diff --git a/drivers/video/fbdev/matrox/matroxfb_Ti3026.c b/drivers/video/fbdev/matrox/matroxfb_Ti3026.c index 195ad7cac1ba..68fa037d8cbc 100644 --- a/drivers/video/fbdev/matrox/matroxfb_Ti3026.c +++ b/drivers/video/fbdev/matrox/matroxfb_Ti3026.c @@ -372,7 +372,7 @@ static int Ti3026_init(struct matrox_fb_info *minfo, struct my_timming *m) DBG(__func__) - memcpy(hw->DACreg, MGADACbpp32, sizeof(hw->DACreg)); + memcpy(hw->DACreg, MGADACbpp32, sizeof(MGADACbpp32)); switch (minfo->fbcon.var.bits_per_pixel) { case 4: hw->DACreg[POS3026_XLATCHCTRL] = TVP3026_XLATCHCTRL_16_1; /* or _8_1, they are same */ hw->DACreg[POS3026_XTRUECOLORCTRL] = TVP3026_XTRUECOLORCTRL_PSEUDOCOLOR; diff --git a/drivers/video/fbdev/omap2/omapfb/omapfb-ioctl.c b/drivers/video/fbdev/omap2/omapfb/omapfb-ioctl.c index 34ab4f950f0a..0c1c34ff40a9 100644 --- a/drivers/video/fbdev/omap2/omapfb/omapfb-ioctl.c +++ b/drivers/video/fbdev/omap2/omapfb/omapfb-ioctl.c @@ -609,6 +609,8 @@ int omapfb_ioctl(struct fb_info *fbi, unsigned int cmd, unsigned long arg) int r = 0; + memset(&p, 0, sizeof(p)); + switch (cmd) { case OMAPFB_SYNC_GFX: DBG("ioctl SYNC_GFX\n"); diff --git a/drivers/video/fbdev/pvr2fb.c b/drivers/video/fbdev/pvr2fb.c index 0e24eb9c219c..750a384bf191 100644 --- a/drivers/video/fbdev/pvr2fb.c +++ b/drivers/video/fbdev/pvr2fb.c @@ -687,7 +687,7 @@ static ssize_t pvr2fb_write(struct fb_info *info, const char *buf, return -ENOMEM; ret = get_user_pages_unlocked(current, current->mm, (unsigned long)buf, - nr_pages, WRITE, 0, pages); + nr_pages, pages, FOLL_WRITE); if (ret < nr_pages) { nr_pages = ret; diff --git a/drivers/video/rockchip/rga/rga_mmu_info.c b/drivers/video/rockchip/rga/rga_mmu_info.c index 966b7d1d0864..363ba9ebd1ca 100644 --- a/drivers/video/rockchip/rga/rga_mmu_info.c +++ b/drivers/video/rockchip/rga/rga_mmu_info.c @@ -329,6 +329,10 @@ static int rga_MapUserMemory(struct page **pages, do { down_read(¤t->mm->mmap_sem); +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 168) && LINUX_VERSION_CODE < KERNEL_VERSION(4, 5, 0) + result = get_user_pages(current, current->mm, Memory << PAGE_SHIFT, + pageCount, FOLL_WRITE, pages, NULL); +#else result = get_user_pages(current, current->mm, Memory << PAGE_SHIFT, @@ -338,6 +342,7 @@ static int rga_MapUserMemory(struct page **pages, pages, NULL ); +#endif up_read(¤t->mm->mmap_sem); #if 0 diff --git a/drivers/video/rockchip/rga2/rga2_mmu_info.c b/drivers/video/rockchip/rga2/rga2_mmu_info.c index 8619a85b9611..63337753d578 100644 --- a/drivers/video/rockchip/rga2/rga2_mmu_info.c +++ b/drivers/video/rockchip/rga2/rga2_mmu_info.c @@ -426,7 +426,11 @@ static int rga2_MapUserMemory(struct page **pages, uint32_t *pageTable, status = 0; Address = 0; down_read(¤t->mm->mmap_sem); -#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 168) && LINUX_VERSION_CODE < KERNEL_VERSION(4, 5, 0) + result = get_user_pages(current, current->mm, Memory << PAGE_SHIFT, + pageCount, writeFlag ? FOLL_WRITE : 0, + pages, NULL); +#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) result = get_user_pages(current, current->mm, Memory << PAGE_SHIFT, pageCount, writeFlag, 0, pages, NULL); #else diff --git a/drivers/virt/fsl_hypervisor.c b/drivers/virt/fsl_hypervisor.c index 32c8fc5f7a5c..590a0f51a249 100644 --- a/drivers/virt/fsl_hypervisor.c +++ b/drivers/virt/fsl_hypervisor.c @@ -246,8 +246,8 @@ static long ioctl_memcpy(struct fsl_hv_ioctl_memcpy __user *p) down_read(¤t->mm->mmap_sem); num_pinned = get_user_pages(current, current->mm, param.local_vaddr - lb_offset, num_pages, - (param.source == -1) ? READ : WRITE, - 0, pages, NULL); + (param.source == -1) ? 0 : FOLL_WRITE, + pages, NULL); up_read(¤t->mm->mmap_sem); if (num_pinned != num_pages) { diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig index cab9f3f63a38..77590320d44c 100644 --- a/drivers/virtio/Kconfig +++ b/drivers/virtio/Kconfig @@ -60,7 +60,7 @@ config VIRTIO_INPUT config VIRTIO_MMIO tristate "Platform bus driver for memory mapped virtio devices" - depends on HAS_IOMEM + depends on HAS_IOMEM && HAS_DMA select VIRTIO ---help--- This drivers provides support for memory mapped virtio diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index cbe9e2295752..e3ac3e157485 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -396,7 +396,7 @@ static int init_vqs(struct virtio_balloon *vb) { struct virtqueue *vqs[3]; vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, stats_request }; - const char *names[] = { "inflate", "deflate", "stats" }; + static const char * const names[] = { "inflate", "deflate", "stats" }; int err, nvqs; /* diff --git a/drivers/virtio/virtio_input.c b/drivers/virtio/virtio_input.c index c96944b59856..350a2a5a49db 100644 --- a/drivers/virtio/virtio_input.c +++ b/drivers/virtio/virtio_input.c @@ -170,7 +170,7 @@ static int virtinput_init_vqs(struct virtio_input *vi) struct virtqueue *vqs[2]; vq_callback_t *cbs[] = { virtinput_recv_events, virtinput_recv_status }; - static const char *names[] = { "events", "status" }; + static const char * const names[] = { "events", "status" }; int err; err = vi->vdev->config->find_vqs(vi->vdev, 2, vqs, cbs, names); diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c index f499d9da7237..745c6ee1bb3e 100644 --- a/drivers/virtio/virtio_mmio.c +++ b/drivers/virtio/virtio_mmio.c @@ -482,7 +482,7 @@ error_available: static int vm_find_vqs(struct virtio_device *vdev, unsigned nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], - const char *names[]) + const char * const names[]) { struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev); unsigned int irq = platform_get_irq(vm_dev->pdev, 0); diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c index 2046a68ad0ba..f6bed86c17f9 100644 --- a/drivers/virtio/virtio_pci_common.c +++ b/drivers/virtio/virtio_pci_common.c @@ -296,7 +296,7 @@ void vp_del_vqs(struct virtio_device *vdev) static int vp_try_to_find_vqs(struct virtio_device *vdev, unsigned nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], - const char *names[], + const char * const names[], bool use_msix, bool per_vq_vectors) { @@ -376,7 +376,7 @@ error_find: int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], - const char *names[]) + const char * const names[]) { int err; diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h index b976d968e793..2cc252270b2d 100644 --- a/drivers/virtio/virtio_pci_common.h +++ b/drivers/virtio/virtio_pci_common.h @@ -139,7 +139,7 @@ void vp_del_vqs(struct virtio_device *vdev); int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], - const char *names[]); + const char * const names[]); const char *vp_bus_name(struct virtio_device *vdev); /* Setup the affinity for a virtqueue: diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c index 4469202eaa8e..631021cfc740 100644 --- a/drivers/virtio/virtio_pci_modern.c +++ b/drivers/virtio/virtio_pci_modern.c @@ -423,7 +423,7 @@ err_new_queue: static int vp_modern_find_vqs(struct virtio_device *vdev, unsigned nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], - const char *names[]) + const char * const names[]) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); struct virtqueue *vq; diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index a01a41a41269..761f28ffd40e 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -24,6 +24,8 @@ #include #include #include +#include +#include #ifdef DEBUG /* For development, we want to crash whenever the ring is screwed. */ @@ -54,6 +56,11 @@ #define END_USE(vq) #endif +struct vring_desc_state { + void *data; /* Data for callback. */ + struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ +}; + struct vring_virtqueue { struct virtqueue vq; @@ -98,12 +105,131 @@ struct vring_virtqueue { ktime_t last_add_time; #endif - /* Tokens for callbacks. */ - void *data[]; + /* Per-descriptor state. */ + struct vring_desc_state desc_state[]; }; #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) +/* + * Modern virtio devices have feature bits to specify whether they need a + * quirk and bypass the IOMMU. If not there, just use the DMA API. + * + * If there, the interaction between virtio and DMA API is messy. + * + * On most systems with virtio, physical addresses match bus addresses, + * and it doesn't particularly matter whether we use the DMA API. + * + * On some systems, including Xen and any system with a physical device + * that speaks virtio behind a physical IOMMU, we must use the DMA API + * for virtio DMA to work at all. + * + * On other systems, including SPARC and PPC64, virtio-pci devices are + * enumerated as though they are behind an IOMMU, but the virtio host + * ignores the IOMMU, so we must either pretend that the IOMMU isn't + * there or somehow map everything as the identity. + * + * For the time being, we preserve historic behavior and bypass the DMA + * API. + * + * TODO: install a per-device DMA ops structure that does the right thing + * taking into account all the above quirks, and use the DMA API + * unconditionally on data path. + */ + +static bool vring_use_dma_api(struct virtio_device *vdev) +{ + if (!virtio_has_iommu_quirk(vdev)) + return true; + + /* Otherwise, we are left to guess. */ + /* + * In theory, it's possible to have a buggy QEMU-supposed + * emulated Q35 IOMMU and Xen enabled at the same time. On + * such a configuration, virtio has never worked and will + * not work without an even larger kludge. Instead, enable + * the DMA API if we're a Xen guest, which at least allows + * all of the sensible Xen configurations to work correctly. + */ + if (xen_domain()) + return true; + + return false; +} + +/* + * The DMA ops on various arches are rather gnarly right now, and + * making all of the arch DMA ops work on the vring device itself + * is a mess. For now, we use the parent device for DMA ops. + */ +struct device *vring_dma_dev(const struct vring_virtqueue *vq) +{ + return vq->vq.vdev->dev.parent; +} + +/* Map one sg entry. */ +static dma_addr_t vring_map_one_sg(const struct vring_virtqueue *vq, + struct scatterlist *sg, + enum dma_data_direction direction) +{ + if (!vring_use_dma_api(vq->vq.vdev)) + return (dma_addr_t)sg_phys(sg); + + /* + * We can't use dma_map_sg, because we don't use scatterlists in + * the way it expects (we don't guarantee that the scatterlist + * will exist for the lifetime of the mapping). + */ + return dma_map_page(vring_dma_dev(vq), + sg_page(sg), sg->offset, sg->length, + direction); +} + +static dma_addr_t vring_map_single(const struct vring_virtqueue *vq, + void *cpu_addr, size_t size, + enum dma_data_direction direction) +{ + if (!vring_use_dma_api(vq->vq.vdev)) + return (dma_addr_t)virt_to_phys(cpu_addr); + + return dma_map_single(vring_dma_dev(vq), + cpu_addr, size, direction); +} + +static void vring_unmap_one(const struct vring_virtqueue *vq, + struct vring_desc *desc) +{ + u16 flags; + + if (!vring_use_dma_api(vq->vq.vdev)) + return; + + flags = virtio16_to_cpu(vq->vq.vdev, desc->flags); + + if (flags & VRING_DESC_F_INDIRECT) { + dma_unmap_single(vring_dma_dev(vq), + virtio64_to_cpu(vq->vq.vdev, desc->addr), + virtio32_to_cpu(vq->vq.vdev, desc->len), + (flags & VRING_DESC_F_WRITE) ? + DMA_FROM_DEVICE : DMA_TO_DEVICE); + } else { + dma_unmap_page(vring_dma_dev(vq), + virtio64_to_cpu(vq->vq.vdev, desc->addr), + virtio32_to_cpu(vq->vq.vdev, desc->len), + (flags & VRING_DESC_F_WRITE) ? + DMA_FROM_DEVICE : DMA_TO_DEVICE); + } +} + +static int vring_mapping_error(const struct vring_virtqueue *vq, + dma_addr_t addr) +{ + if (!vring_use_dma_api(vq->vq.vdev)) + return 0; + + return dma_mapping_error(vring_dma_dev(vq), addr); +} + static struct vring_desc *alloc_indirect(struct virtqueue *_vq, unsigned int total_sg, gfp_t gfp) { @@ -137,7 +263,7 @@ static inline int virtqueue_add(struct virtqueue *_vq, struct vring_virtqueue *vq = to_vvq(_vq); struct scatterlist *sg; struct vring_desc *desc; - unsigned int i, n, avail, descs_used, uninitialized_var(prev); + unsigned int i, n, avail, descs_used, uninitialized_var(prev), err_idx; int head; bool indirect; @@ -177,21 +303,15 @@ static inline int virtqueue_add(struct virtqueue *_vq, if (desc) { /* Use a single buffer which doesn't continue */ - vq->vring.desc[head].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_INDIRECT); - vq->vring.desc[head].addr = cpu_to_virtio64(_vq->vdev, virt_to_phys(desc)); - /* avoid kmemleak false positive (hidden by virt_to_phys) */ - kmemleak_ignore(desc); - vq->vring.desc[head].len = cpu_to_virtio32(_vq->vdev, total_sg * sizeof(struct vring_desc)); - + indirect = true; /* Set up rest to use this indirect table. */ i = 0; descs_used = 1; - indirect = true; } else { + indirect = false; desc = vq->vring.desc; i = head; descs_used = total_sg; - indirect = false; } if (vq->vq.num_free < descs_used) { @@ -208,13 +328,14 @@ static inline int virtqueue_add(struct virtqueue *_vq, return -ENOSPC; } - /* We're about to use some buffers from the free list. */ - vq->vq.num_free -= descs_used; - for (n = 0; n < out_sgs; n++) { for (sg = sgs[n]; sg; sg = sg_next(sg)) { + dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE); + if (vring_mapping_error(vq, addr)) + goto unmap_release; + desc[i].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_NEXT); - desc[i].addr = cpu_to_virtio64(_vq->vdev, sg_phys(sg)); + desc[i].addr = cpu_to_virtio64(_vq->vdev, addr); desc[i].len = cpu_to_virtio32(_vq->vdev, sg->length); prev = i; i = virtio16_to_cpu(_vq->vdev, desc[i].next); @@ -222,8 +343,12 @@ static inline int virtqueue_add(struct virtqueue *_vq, } for (; n < (out_sgs + in_sgs); n++) { for (sg = sgs[n]; sg; sg = sg_next(sg)) { + dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE); + if (vring_mapping_error(vq, addr)) + goto unmap_release; + desc[i].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_NEXT | VRING_DESC_F_WRITE); - desc[i].addr = cpu_to_virtio64(_vq->vdev, sg_phys(sg)); + desc[i].addr = cpu_to_virtio64(_vq->vdev, addr); desc[i].len = cpu_to_virtio32(_vq->vdev, sg->length); prev = i; i = virtio16_to_cpu(_vq->vdev, desc[i].next); @@ -232,14 +357,33 @@ static inline int virtqueue_add(struct virtqueue *_vq, /* Last one doesn't continue. */ desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT); + if (indirect) { + /* Now that the indirect table is filled in, map it. */ + dma_addr_t addr = vring_map_single( + vq, desc, total_sg * sizeof(struct vring_desc), + DMA_TO_DEVICE); + if (vring_mapping_error(vq, addr)) + goto unmap_release; + + vq->vring.desc[head].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_INDIRECT); + vq->vring.desc[head].addr = cpu_to_virtio64(_vq->vdev, addr); + + vq->vring.desc[head].len = cpu_to_virtio32(_vq->vdev, total_sg * sizeof(struct vring_desc)); + } + + /* We're using some buffers from the free list. */ + vq->vq.num_free -= descs_used; + /* Update free pointer */ if (indirect) vq->free_head = virtio16_to_cpu(_vq->vdev, vq->vring.desc[head].next); else vq->free_head = i; - /* Set token. */ - vq->data[head] = data; + /* Store token and indirect buffer state. */ + vq->desc_state[head].data = data; + if (indirect) + vq->desc_state[head].indir_desc = desc; /* Put entry in available array (but don't update avail->idx until they * do sync). */ @@ -262,6 +406,24 @@ static inline int virtqueue_add(struct virtqueue *_vq, virtqueue_kick(_vq); return 0; + +unmap_release: + err_idx = i; + i = head; + + for (n = 0; n < total_sg; n++) { + if (i == err_idx) + break; + vring_unmap_one(vq, &desc[i]); + i = vq->vring.desc[i].next; + } + + vq->vq.num_free += total_sg; + + if (indirect) + kfree(desc); + + return -EIO; } /** @@ -432,27 +594,43 @@ EXPORT_SYMBOL_GPL(virtqueue_kick); static void detach_buf(struct vring_virtqueue *vq, unsigned int head) { - unsigned int i; + unsigned int i, j; + u16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT); /* Clear data ptr. */ - vq->data[head] = NULL; + vq->desc_state[head].data = NULL; - /* Put back on free list: find end */ + /* Put back on free list: unmap first-level descriptors and find end */ i = head; - /* Free the indirect table */ - if (vq->vring.desc[i].flags & cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_INDIRECT)) - kfree(phys_to_virt(virtio64_to_cpu(vq->vq.vdev, vq->vring.desc[i].addr))); - - while (vq->vring.desc[i].flags & cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT)) { + while (vq->vring.desc[i].flags & nextflag) { + vring_unmap_one(vq, &vq->vring.desc[i]); i = virtio16_to_cpu(vq->vq.vdev, vq->vring.desc[i].next); vq->vq.num_free++; } + vring_unmap_one(vq, &vq->vring.desc[i]); vq->vring.desc[i].next = cpu_to_virtio16(vq->vq.vdev, vq->free_head); vq->free_head = head; + /* Plus final descriptor */ vq->vq.num_free++; + + /* Free the indirect table, if any, now that it's unmapped. */ + if (vq->desc_state[head].indir_desc) { + struct vring_desc *indir_desc = vq->desc_state[head].indir_desc; + u32 len = virtio32_to_cpu(vq->vq.vdev, vq->vring.desc[head].len); + + BUG_ON(!(vq->vring.desc[head].flags & + cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_INDIRECT))); + BUG_ON(len == 0 || len % sizeof(struct vring_desc)); + + for (j = 0; j < len / sizeof(struct vring_desc); j++) + vring_unmap_one(vq, &indir_desc[j]); + + kfree(vq->desc_state[head].indir_desc); + vq->desc_state[head].indir_desc = NULL; + } } static inline bool more_used(const struct vring_virtqueue *vq) @@ -507,13 +685,13 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) BAD_RING(vq, "id %u out of range\n", i); return NULL; } - if (unlikely(!vq->data[i])) { + if (unlikely(!vq->desc_state[i].data)) { BAD_RING(vq, "id %u is not a head!\n", i); return NULL; } /* detach_buf clears data, so grab it now. */ - ret = vq->data[i]; + ret = vq->desc_state[i].data; detach_buf(vq, i); vq->last_used_idx++; /* If we expect an interrupt for the next entry, tell host @@ -687,10 +865,10 @@ void *virtqueue_detach_unused_buf(struct virtqueue *_vq) START_USE(vq); for (i = 0; i < vq->vring.num; i++) { - if (!vq->data[i]) + if (!vq->desc_state[i].data) continue; /* detach_buf clears data, so grab it now. */ - buf = vq->data[i]; + buf = vq->desc_state[i].data; detach_buf(vq, i); vq->avail_idx_shadow--; vq->vring.avail->idx = cpu_to_virtio16(_vq->vdev, vq->avail_idx_shadow); @@ -744,7 +922,8 @@ struct virtqueue *vring_new_virtqueue(unsigned int index, return NULL; } - vq = kmalloc(sizeof(*vq) + sizeof(void *)*num, GFP_KERNEL); + vq = kmalloc(sizeof(*vq) + num * sizeof(struct vring_desc_state), + GFP_KERNEL); if (!vq) return NULL; @@ -779,11 +958,9 @@ struct virtqueue *vring_new_virtqueue(unsigned int index, /* Put everything in free lists. */ vq->free_head = 0; - for (i = 0; i < num-1; i++) { + for (i = 0; i < num-1; i++) vq->vring.desc[i].next = cpu_to_virtio16(vdev, i + 1); - vq->data[i] = NULL; - } - vq->data[i] = NULL; + memset(vq->desc_state, 0, num * sizeof(struct vring_desc_state)); return &vq->vq; } @@ -809,6 +986,8 @@ void vring_transport_features(struct virtio_device *vdev) break; case VIRTIO_F_VERSION_1: break; + case VIRTIO_F_IOMMU_PLATFORM: + break; default: /* We don't understand this bit. */ __virtio_clear_bit(vdev, i); diff --git a/drivers/xen/xlate_mmu.c b/drivers/xen/xlate_mmu.c index 5063c5e796b7..84a1fab0dd6b 100644 --- a/drivers/xen/xlate_mmu.c +++ b/drivers/xen/xlate_mmu.c @@ -34,6 +34,7 @@ #include #include +#include #include #include #include diff --git a/fs/9p/cache.c b/fs/9p/cache.c index a69260f27555..103ca5e1267b 100644 --- a/fs/9p/cache.c +++ b/fs/9p/cache.c @@ -243,14 +243,14 @@ void v9fs_cache_inode_set_cookie(struct inode *inode, struct file *filp) if (!v9inode->fscache) return; - spin_lock(&v9inode->fscache_lock); + mutex_lock(&v9inode->fscache_lock); if ((filp->f_flags & O_ACCMODE) != O_RDONLY) v9fs_cache_inode_flush_cookie(inode); else v9fs_cache_inode_get_cookie(inode); - spin_unlock(&v9inode->fscache_lock); + mutex_unlock(&v9inode->fscache_lock); } void v9fs_cache_inode_reset_cookie(struct inode *inode) @@ -264,7 +264,7 @@ void v9fs_cache_inode_reset_cookie(struct inode *inode) old = v9inode->fscache; - spin_lock(&v9inode->fscache_lock); + mutex_lock(&v9inode->fscache_lock); fscache_relinquish_cookie(v9inode->fscache, 1); v9ses = v9fs_inode2v9ses(inode); @@ -274,7 +274,7 @@ void v9fs_cache_inode_reset_cookie(struct inode *inode) p9_debug(P9_DEBUG_FSC, "inode %p revalidating cookie old %p new %p\n", inode, old, v9inode->fscache); - spin_unlock(&v9inode->fscache_lock); + mutex_unlock(&v9inode->fscache_lock); } int __v9fs_fscache_release_page(struct page *page, gfp_t gfp) diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c index 6caca025019d..1e9bb8db7b48 100644 --- a/fs/9p/v9fs.c +++ b/fs/9p/v9fs.c @@ -59,6 +59,8 @@ enum { Opt_cache_loose, Opt_fscache, Opt_mmap, /* Access options */ Opt_access, Opt_posixacl, + /* Lock timeout option */ + Opt_locktimeout, /* Error token */ Opt_err }; @@ -78,6 +80,7 @@ static const match_table_t tokens = { {Opt_cachetag, "cachetag=%s"}, {Opt_access, "access=%s"}, {Opt_posixacl, "posixacl"}, + {Opt_locktimeout, "locktimeout=%u"}, {Opt_err, NULL} }; @@ -126,6 +129,7 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts) #ifdef CONFIG_9P_FSCACHE v9ses->cachetag = NULL; #endif + v9ses->session_lock_timeout = P9_LOCK_TIMEOUT; if (!opts) return 0; @@ -298,6 +302,23 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts) #endif break; + case Opt_locktimeout: + r = match_int(&args[0], &option); + if (r < 0) { + p9_debug(P9_DEBUG_ERROR, + "integer field, but no integer?\n"); + ret = r; + continue; + } + if (option < 1) { + p9_debug(P9_DEBUG_ERROR, + "locktimeout must be a greater than zero integer.\n"); + ret = -EINVAL; + continue; + } + v9ses->session_lock_timeout = (long)option * HZ; + break; + default: continue; } diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h index 0923f2cf3c80..3775f275ede3 100644 --- a/fs/9p/v9fs.h +++ b/fs/9p/v9fs.h @@ -116,6 +116,7 @@ struct v9fs_session_info { struct list_head slist; /* list of sessions registered with v9fs */ struct backing_dev_info bdi; struct rw_semaphore rename_sem; + long session_lock_timeout; /* retry interval for blocking locks */ }; /* cache_validity flags */ @@ -123,7 +124,7 @@ struct v9fs_session_info { struct v9fs_inode { #ifdef CONFIG_9P_FSCACHE - spinlock_t fscache_lock; + struct mutex fscache_lock; struct fscache_cookie *fscache; #endif struct p9_qid qid; diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h index 5a0db6dec8d1..aaee1e6584e6 100644 --- a/fs/9p/v9fs_vfs.h +++ b/fs/9p/v9fs_vfs.h @@ -40,6 +40,9 @@ */ #define P9_LOCK_TIMEOUT (30*HZ) +/* flags for v9fs_stat2inode() & v9fs_stat2inode_dotl() */ +#define V9FS_STAT2INODE_KEEP_ISIZE 1 + extern struct file_system_type v9fs_fs_type; extern const struct address_space_operations v9fs_addr_operations; extern const struct file_operations v9fs_file_operations; @@ -61,8 +64,10 @@ int v9fs_init_inode(struct v9fs_session_info *v9ses, struct inode *inode, umode_t mode, dev_t); void v9fs_evict_inode(struct inode *inode); ino_t v9fs_qid2ino(struct p9_qid *qid); -void v9fs_stat2inode(struct p9_wstat *, struct inode *, struct super_block *); -void v9fs_stat2inode_dotl(struct p9_stat_dotl *, struct inode *); +void v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode, + struct super_block *sb, unsigned int flags); +void v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode, + unsigned int flags); int v9fs_dir_release(struct inode *inode, struct file *filp); int v9fs_file_open(struct inode *inode, struct file *file); void v9fs_inode2stat(struct inode *inode, struct p9_wstat *stat); @@ -83,4 +88,18 @@ static inline void v9fs_invalidate_inode_attr(struct inode *inode) } int v9fs_open_to_dotl_flags(int flags); + +static inline void v9fs_i_size_write(struct inode *inode, loff_t i_size) +{ + /* + * 32-bit need the lock, concurrent updates could break the + * sequences and make i_size_read() loop forever. + * 64-bit updates are atomic and can skip the locking. + */ + if (sizeof(i_size) > sizeof(long)) + spin_lock(&inode->i_lock); + i_size_write(inode, i_size); + if (sizeof(i_size) > sizeof(long)) + spin_unlock(&inode->i_lock); +} #endif diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c index 7d889f56b8e7..05769219d2c2 100644 --- a/fs/9p/vfs_dir.c +++ b/fs/9p/vfs_dir.c @@ -105,7 +105,6 @@ static int v9fs_dir_readdir(struct file *file, struct dir_context *ctx) int err = 0; struct p9_fid *fid; int buflen; - int reclen = 0; struct p9_rdir *rdir; struct kvec kvec; @@ -138,11 +137,10 @@ static int v9fs_dir_readdir(struct file *file, struct dir_context *ctx) while (rdir->head < rdir->tail) { err = p9stat_read(fid->clnt, rdir->buf + rdir->head, rdir->tail - rdir->head, &st); - if (err) { + if (err <= 0) { p9_debug(P9_DEBUG_VFS, "returned %d\n", err); return -EIO; } - reclen = st.size+2; over = !dir_emit(ctx, st.name, strlen(st.name), v9fs_qid2ino(&st.qid), dt_type(&st)); @@ -150,8 +148,8 @@ static int v9fs_dir_readdir(struct file *file, struct dir_context *ctx) if (over) return 0; - rdir->head += reclen; - ctx->pos += reclen; + rdir->head += err; + ctx->pos += err; } } } diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index e7b3d2c4472d..373cc50544e9 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -154,6 +154,7 @@ static int v9fs_file_do_lock(struct file *filp, int cmd, struct file_lock *fl) uint8_t status = P9_LOCK_ERROR; int res = 0; unsigned char fl_type; + struct v9fs_session_info *v9ses; fid = filp->private_data; BUG_ON(fid == NULL); @@ -189,6 +190,8 @@ static int v9fs_file_do_lock(struct file *filp, int cmd, struct file_lock *fl) if (IS_SETLKW(cmd)) flock.flags = P9_LOCK_FLAGS_BLOCK; + v9ses = v9fs_inode2v9ses(file_inode(filp)); + /* * if its a blocked request and we get P9_LOCK_BLOCKED as the status * for lock request, keep on trying @@ -202,7 +205,8 @@ static int v9fs_file_do_lock(struct file *filp, int cmd, struct file_lock *fl) break; if (status == P9_LOCK_BLOCKED && !IS_SETLKW(cmd)) break; - if (schedule_timeout_interruptible(P9_LOCK_TIMEOUT) != 0) + if (schedule_timeout_interruptible(v9ses->session_lock_timeout) + != 0) break; /* * p9_client_lock_dotl overwrites flock.client_id with the @@ -442,7 +446,11 @@ v9fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) i_size = i_size_read(inode); if (iocb->ki_pos > i_size) { inode_add_bytes(inode, iocb->ki_pos - i_size); - i_size_write(inode, iocb->ki_pos); + /* + * Need to serialize against i_size_write() in + * v9fs_stat2inode() + */ + v9fs_i_size_write(inode, iocb->ki_pos); } return retval; } diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 73f1d1b3a51c..2de1505aedfd 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -244,7 +244,7 @@ struct inode *v9fs_alloc_inode(struct super_block *sb) return NULL; #ifdef CONFIG_9P_FSCACHE v9inode->fscache = NULL; - spin_lock_init(&v9inode->fscache_lock); + mutex_init(&v9inode->fscache_lock); #endif v9inode->writeback_fid = NULL; v9inode->cache_validity = 0; @@ -538,7 +538,7 @@ static struct inode *v9fs_qid_iget(struct super_block *sb, if (retval) goto error; - v9fs_stat2inode(st, inode, sb); + v9fs_stat2inode(st, inode, sb, 0); v9fs_cache_inode_get_cookie(inode); unlock_new_inode(inode); return inode; @@ -1074,7 +1074,7 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, if (IS_ERR(st)) return PTR_ERR(st); - v9fs_stat2inode(st, d_inode(dentry), d_inode(dentry)->i_sb); + v9fs_stat2inode(st, d_inode(dentry), d_inode(dentry)->i_sb, 0); generic_fillattr(d_inode(dentry), stat); p9stat_free(st); @@ -1152,12 +1152,13 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr) * @stat: Plan 9 metadata (mistat) structure * @inode: inode to populate * @sb: superblock of filesystem + * @flags: control flags (e.g. V9FS_STAT2INODE_KEEP_ISIZE) * */ void v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode, - struct super_block *sb) + struct super_block *sb, unsigned int flags) { umode_t mode; char ext[32]; @@ -1198,10 +1199,11 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode, mode = p9mode2perm(v9ses, stat); mode |= inode->i_mode & ~S_IALLUGO; inode->i_mode = mode; - i_size_write(inode, stat->length); + if (!(flags & V9FS_STAT2INODE_KEEP_ISIZE)) + v9fs_i_size_write(inode, stat->length); /* not real number of blocks, but 512 byte ones ... */ - inode->i_blocks = (i_size_read(inode) + 512 - 1) >> 9; + inode->i_blocks = (stat->length + 512 - 1) >> 9; v9inode->cache_validity &= ~V9FS_INO_INVALID_ATTR; } @@ -1389,9 +1391,9 @@ int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode) { int umode; dev_t rdev; - loff_t i_size; struct p9_wstat *st; struct v9fs_session_info *v9ses; + unsigned int flags; v9ses = v9fs_inode2v9ses(inode); st = p9_client_stat(fid); @@ -1404,16 +1406,13 @@ int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode) if ((inode->i_mode & S_IFMT) != (umode & S_IFMT)) goto out; - spin_lock(&inode->i_lock); /* * We don't want to refresh inode->i_size, * because we may have cached data */ - i_size = inode->i_size; - v9fs_stat2inode(st, inode, inode->i_sb); - if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) - inode->i_size = i_size; - spin_unlock(&inode->i_lock); + flags = (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) ? + V9FS_STAT2INODE_KEEP_ISIZE : 0; + v9fs_stat2inode(st, inode, inode->i_sb, flags); out: p9stat_free(st); kfree(st); diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index 0b88744c6446..7ae67fcca031 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -143,7 +143,7 @@ static struct inode *v9fs_qid_iget_dotl(struct super_block *sb, if (retval) goto error; - v9fs_stat2inode_dotl(st, inode); + v9fs_stat2inode_dotl(st, inode, 0); v9fs_cache_inode_get_cookie(inode); retval = v9fs_get_acl(inode, fid); if (retval) @@ -498,7 +498,7 @@ v9fs_vfs_getattr_dotl(struct vfsmount *mnt, struct dentry *dentry, if (IS_ERR(st)) return PTR_ERR(st); - v9fs_stat2inode_dotl(st, d_inode(dentry)); + v9fs_stat2inode_dotl(st, d_inode(dentry), 0); generic_fillattr(d_inode(dentry), stat); /* Change block size to what the server returned */ stat->blksize = st->st_blksize; @@ -609,11 +609,13 @@ int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr) * v9fs_stat2inode_dotl - populate an inode structure with stat info * @stat: stat structure * @inode: inode to populate + * @flags: ctrl flags (e.g. V9FS_STAT2INODE_KEEP_ISIZE) * */ void -v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode) +v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode, + unsigned int flags) { umode_t mode; struct v9fs_inode *v9inode = V9FS_I(inode); @@ -633,7 +635,8 @@ v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode) mode |= inode->i_mode & ~S_IALLUGO; inode->i_mode = mode; - i_size_write(inode, stat->st_size); + if (!(flags & V9FS_STAT2INODE_KEEP_ISIZE)) + v9fs_i_size_write(inode, stat->st_size); inode->i_blocks = stat->st_blocks; } else { if (stat->st_result_mask & P9_STATS_ATIME) { @@ -663,8 +666,9 @@ v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode) } if (stat->st_result_mask & P9_STATS_RDEV) inode->i_rdev = new_decode_dev(stat->st_rdev); - if (stat->st_result_mask & P9_STATS_SIZE) - i_size_write(inode, stat->st_size); + if (!(flags & V9FS_STAT2INODE_KEEP_ISIZE) && + stat->st_result_mask & P9_STATS_SIZE) + v9fs_i_size_write(inode, stat->st_size); if (stat->st_result_mask & P9_STATS_BLOCKS) inode->i_blocks = stat->st_blocks; } @@ -926,9 +930,9 @@ v9fs_vfs_follow_link_dotl(struct dentry *dentry, void **cookie) int v9fs_refresh_inode_dotl(struct p9_fid *fid, struct inode *inode) { - loff_t i_size; struct p9_stat_dotl *st; struct v9fs_session_info *v9ses; + unsigned int flags; v9ses = v9fs_inode2v9ses(inode); st = p9_client_getattr_dotl(fid, P9_STATS_ALL); @@ -940,16 +944,13 @@ int v9fs_refresh_inode_dotl(struct p9_fid *fid, struct inode *inode) if ((inode->i_mode & S_IFMT) != (st->st_mode & S_IFMT)) goto out; - spin_lock(&inode->i_lock); /* * We don't want to refresh inode->i_size, * because we may have cached data */ - i_size = inode->i_size; - v9fs_stat2inode_dotl(st, inode); - if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) - inode->i_size = i_size; - spin_unlock(&inode->i_lock); + flags = (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) ? + V9FS_STAT2INODE_KEEP_ISIZE : 0; + v9fs_stat2inode_dotl(st, inode, flags); out: kfree(st); return 0; diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index bf495cedec26..ccf935d9e722 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -165,7 +165,7 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags, goto release_sb; } d_inode(root)->i_ino = v9fs_qid2ino(&st->qid); - v9fs_stat2inode_dotl(st, d_inode(root)); + v9fs_stat2inode_dotl(st, d_inode(root), 0); kfree(st); } else { struct p9_wstat *st = NULL; @@ -176,7 +176,7 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags, } d_inode(root)->i_ino = v9fs_qid2ino(&st->qid); - v9fs_stat2inode(st, d_inode(root), sb); + v9fs_stat2inode(st, d_inode(root), sb, 0); p9stat_free(st); kfree(st); diff --git a/fs/aio.c b/fs/aio.c index c283eb03cb38..7187d03aa0bc 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include @@ -1063,6 +1064,7 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id) if (!table || id >= table->nr) goto out; + id = array_index_nospec(id, table->nr); ctx = rcu_dereference(table->table[id]); if (ctx && ctx->user_id == ctx_id) { if (percpu_ref_tryget_live(&ctx->users)) diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index 7a5a598a2d94..0d8b9c4f27f2 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -560,7 +560,6 @@ int autofs4_expire_run(struct super_block *sb, pkt.len = dentry->d_name.len; memcpy(pkt.name, dentry->d_name.name, pkt.len); pkt.name[pkt.len] = '\0'; - dput(dentry); if ( copy_to_user(pkt_p, &pkt, sizeof(struct autofs_packet_expire)) ) ret = -EFAULT; @@ -573,6 +572,8 @@ int autofs4_expire_run(struct super_block *sb, complete_all(&ino->expire_complete); spin_unlock(&sbi->fs_lock); + dput(dentry); + return ret; } diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index 1132fe71b312..0fd472d67029 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c @@ -255,8 +255,10 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) } root_inode = autofs4_get_inode(s, S_IFDIR | 0755); root = d_make_root(root_inode); - if (!root) + if (!root) { + ret = -ENOMEM; goto fail_ino; + } pipe = NULL; root->d_fsdata = ino; diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 598343f07569..91abe314a771 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -854,6 +854,7 @@ static int load_elf_binary(struct linux_binprm *bprm) current->flags |= PF_RANDOMIZE; setup_new_exec(bprm); + install_exec_creds(bprm); /* Do this so that we can load the interpreter, if need be. We will change some of these later */ @@ -1088,7 +1089,6 @@ static int load_elf_binary(struct linux_binprm *bprm) goto out; #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */ - install_exec_creds(bprm); retval = create_elf_tables(bprm, &loc->elf_ex, load_addr, interp_load_addr); if (retval < 0) diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 6d1d0b93b1aa..c792df826e12 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -9,7 +9,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ export.o tree-log.o free-space-cache.o zlib.o lzo.o \ compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \ reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \ - uuid-tree.o props.o hash.o + uuid-tree.o props.o hash.o tree-checker.o btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 38ee08675468..8f4baa3cb992 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1726,20 +1726,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, return err; } -/* - * The leaf data grows from end-to-front in the node. - * this returns the address of the start of the last item, - * which is the stop of the leaf data stack - */ -static inline unsigned int leaf_data_end(struct btrfs_root *root, - struct extent_buffer *leaf) -{ - u32 nr = btrfs_header_nritems(leaf); - if (nr == 0) - return BTRFS_LEAF_DATA_SIZE(root); - return btrfs_item_offset_nr(leaf, nr - 1); -} - /* * search for key in the extent_buffer. The items start at offset p, diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index e847573c6db0..4a91d3119e59 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -35,6 +35,7 @@ #include #include #include +#include #include "extent_io.h" #include "extent_map.h" #include "async-thread.h" @@ -897,6 +898,7 @@ struct btrfs_balance_item { #define BTRFS_FILE_EXTENT_INLINE 0 #define BTRFS_FILE_EXTENT_REG 1 #define BTRFS_FILE_EXTENT_PREALLOC 2 +#define BTRFS_FILE_EXTENT_TYPES 2 struct btrfs_file_extent_item { /* @@ -2283,7 +2285,7 @@ do { \ #define BTRFS_INODE_ROOT_ITEM_INIT (1 << 31) struct btrfs_map_token { - struct extent_buffer *eb; + const struct extent_buffer *eb; char *kaddr; unsigned long offset; }; @@ -2314,18 +2316,19 @@ static inline void btrfs_init_map_token (struct btrfs_map_token *token) sizeof(((type *)0)->member))) #define DECLARE_BTRFS_SETGET_BITS(bits) \ -u##bits btrfs_get_token_##bits(struct extent_buffer *eb, void *ptr, \ - unsigned long off, \ - struct btrfs_map_token *token); \ -void btrfs_set_token_##bits(struct extent_buffer *eb, void *ptr, \ +u##bits btrfs_get_token_##bits(const struct extent_buffer *eb, \ + const void *ptr, unsigned long off, \ + struct btrfs_map_token *token); \ +void btrfs_set_token_##bits(struct extent_buffer *eb, const void *ptr, \ unsigned long off, u##bits val, \ struct btrfs_map_token *token); \ -static inline u##bits btrfs_get_##bits(struct extent_buffer *eb, void *ptr, \ +static inline u##bits btrfs_get_##bits(const struct extent_buffer *eb, \ + const void *ptr, \ unsigned long off) \ { \ return btrfs_get_token_##bits(eb, ptr, off, NULL); \ } \ -static inline void btrfs_set_##bits(struct extent_buffer *eb, void *ptr, \ +static inline void btrfs_set_##bits(struct extent_buffer *eb, void *ptr,\ unsigned long off, u##bits val) \ { \ btrfs_set_token_##bits(eb, ptr, off, val, NULL); \ @@ -2337,7 +2340,8 @@ DECLARE_BTRFS_SETGET_BITS(32) DECLARE_BTRFS_SETGET_BITS(64) #define BTRFS_SETGET_FUNCS(name, type, member, bits) \ -static inline u##bits btrfs_##name(struct extent_buffer *eb, type *s) \ +static inline u##bits btrfs_##name(const struct extent_buffer *eb, \ + const type *s) \ { \ BUILD_BUG_ON(sizeof(u##bits) != sizeof(((type *)0))->member); \ return btrfs_get_##bits(eb, s, offsetof(type, member)); \ @@ -2348,7 +2352,8 @@ static inline void btrfs_set_##name(struct extent_buffer *eb, type *s, \ BUILD_BUG_ON(sizeof(u##bits) != sizeof(((type *)0))->member); \ btrfs_set_##bits(eb, s, offsetof(type, member), val); \ } \ -static inline u##bits btrfs_token_##name(struct extent_buffer *eb, type *s, \ +static inline u##bits btrfs_token_##name(const struct extent_buffer *eb,\ + const type *s, \ struct btrfs_map_token *token) \ { \ BUILD_BUG_ON(sizeof(u##bits) != sizeof(((type *)0))->member); \ @@ -2363,9 +2368,9 @@ static inline void btrfs_set_token_##name(struct extent_buffer *eb, \ } #define BTRFS_SETGET_HEADER_FUNCS(name, type, member, bits) \ -static inline u##bits btrfs_##name(struct extent_buffer *eb) \ +static inline u##bits btrfs_##name(const struct extent_buffer *eb) \ { \ - type *p = page_address(eb->pages[0]); \ + const type *p = page_address(eb->pages[0]); \ u##bits res = le##bits##_to_cpu(p->member); \ return res; \ } \ @@ -2377,7 +2382,7 @@ static inline void btrfs_set_##name(struct extent_buffer *eb, \ } #define BTRFS_SETGET_STACK_FUNCS(name, type, member, bits) \ -static inline u##bits btrfs_##name(type *s) \ +static inline u##bits btrfs_##name(const type *s) \ { \ return le##bits##_to_cpu(s->member); \ } \ @@ -2678,7 +2683,7 @@ static inline unsigned long btrfs_node_key_ptr_offset(int nr) sizeof(struct btrfs_key_ptr) * nr; } -void btrfs_node_key(struct extent_buffer *eb, +void btrfs_node_key(const struct extent_buffer *eb, struct btrfs_disk_key *disk_key, int nr); static inline void btrfs_set_node_key(struct extent_buffer *eb, @@ -2707,28 +2712,28 @@ static inline struct btrfs_item *btrfs_item_nr(int nr) return (struct btrfs_item *)btrfs_item_nr_offset(nr); } -static inline u32 btrfs_item_end(struct extent_buffer *eb, +static inline u32 btrfs_item_end(const struct extent_buffer *eb, struct btrfs_item *item) { return btrfs_item_offset(eb, item) + btrfs_item_size(eb, item); } -static inline u32 btrfs_item_end_nr(struct extent_buffer *eb, int nr) +static inline u32 btrfs_item_end_nr(const struct extent_buffer *eb, int nr) { return btrfs_item_end(eb, btrfs_item_nr(nr)); } -static inline u32 btrfs_item_offset_nr(struct extent_buffer *eb, int nr) +static inline u32 btrfs_item_offset_nr(const struct extent_buffer *eb, int nr) { return btrfs_item_offset(eb, btrfs_item_nr(nr)); } -static inline u32 btrfs_item_size_nr(struct extent_buffer *eb, int nr) +static inline u32 btrfs_item_size_nr(const struct extent_buffer *eb, int nr) { return btrfs_item_size(eb, btrfs_item_nr(nr)); } -static inline void btrfs_item_key(struct extent_buffer *eb, +static inline void btrfs_item_key(const struct extent_buffer *eb, struct btrfs_disk_key *disk_key, int nr) { struct btrfs_item *item = btrfs_item_nr(nr); @@ -2764,8 +2769,8 @@ BTRFS_SETGET_STACK_FUNCS(stack_dir_name_len, struct btrfs_dir_item, BTRFS_SETGET_STACK_FUNCS(stack_dir_transid, struct btrfs_dir_item, transid, 64); -static inline void btrfs_dir_item_key(struct extent_buffer *eb, - struct btrfs_dir_item *item, +static inline void btrfs_dir_item_key(const struct extent_buffer *eb, + const struct btrfs_dir_item *item, struct btrfs_disk_key *key) { read_eb_member(eb, item, struct btrfs_dir_item, location, key); @@ -2773,7 +2778,7 @@ static inline void btrfs_dir_item_key(struct extent_buffer *eb, static inline void btrfs_set_dir_item_key(struct extent_buffer *eb, struct btrfs_dir_item *item, - struct btrfs_disk_key *key) + const struct btrfs_disk_key *key) { write_eb_member(eb, item, struct btrfs_dir_item, location, key); } @@ -2785,8 +2790,8 @@ BTRFS_SETGET_FUNCS(free_space_bitmaps, struct btrfs_free_space_header, BTRFS_SETGET_FUNCS(free_space_generation, struct btrfs_free_space_header, generation, 64); -static inline void btrfs_free_space_key(struct extent_buffer *eb, - struct btrfs_free_space_header *h, +static inline void btrfs_free_space_key(const struct extent_buffer *eb, + const struct btrfs_free_space_header *h, struct btrfs_disk_key *key) { read_eb_member(eb, h, struct btrfs_free_space_header, location, key); @@ -2794,7 +2799,7 @@ static inline void btrfs_free_space_key(struct extent_buffer *eb, static inline void btrfs_set_free_space_key(struct extent_buffer *eb, struct btrfs_free_space_header *h, - struct btrfs_disk_key *key) + const struct btrfs_disk_key *key) { write_eb_member(eb, h, struct btrfs_free_space_header, location, key); } @@ -2821,25 +2826,25 @@ static inline void btrfs_cpu_key_to_disk(struct btrfs_disk_key *disk, disk->objectid = cpu_to_le64(cpu->objectid); } -static inline void btrfs_node_key_to_cpu(struct extent_buffer *eb, - struct btrfs_key *key, int nr) +static inline void btrfs_node_key_to_cpu(const struct extent_buffer *eb, + struct btrfs_key *key, int nr) { struct btrfs_disk_key disk_key; btrfs_node_key(eb, &disk_key, nr); btrfs_disk_key_to_cpu(key, &disk_key); } -static inline void btrfs_item_key_to_cpu(struct extent_buffer *eb, - struct btrfs_key *key, int nr) +static inline void btrfs_item_key_to_cpu(const struct extent_buffer *eb, + struct btrfs_key *key, int nr) { struct btrfs_disk_key disk_key; btrfs_item_key(eb, &disk_key, nr); btrfs_disk_key_to_cpu(key, &disk_key); } -static inline void btrfs_dir_item_key_to_cpu(struct extent_buffer *eb, - struct btrfs_dir_item *item, - struct btrfs_key *key) +static inline void btrfs_dir_item_key_to_cpu(const struct extent_buffer *eb, + const struct btrfs_dir_item *item, + struct btrfs_key *key) { struct btrfs_disk_key disk_key; btrfs_dir_item_key(eb, item, &disk_key); @@ -2872,7 +2877,7 @@ BTRFS_SETGET_STACK_FUNCS(stack_header_nritems, struct btrfs_header, nritems, 32); BTRFS_SETGET_STACK_FUNCS(stack_header_bytenr, struct btrfs_header, bytenr, 64); -static inline int btrfs_header_flag(struct extent_buffer *eb, u64 flag) +static inline int btrfs_header_flag(const struct extent_buffer *eb, u64 flag) { return (btrfs_header_flags(eb) & flag) == flag; } @@ -2891,7 +2896,7 @@ static inline int btrfs_clear_header_flag(struct extent_buffer *eb, u64 flag) return (flags & flag) == flag; } -static inline int btrfs_header_backref_rev(struct extent_buffer *eb) +static inline int btrfs_header_backref_rev(const struct extent_buffer *eb) { u64 flags = btrfs_header_flags(eb); return flags >> BTRFS_BACKREF_REV_SHIFT; @@ -2911,12 +2916,12 @@ static inline unsigned long btrfs_header_fsid(void) return offsetof(struct btrfs_header, fsid); } -static inline unsigned long btrfs_header_chunk_tree_uuid(struct extent_buffer *eb) +static inline unsigned long btrfs_header_chunk_tree_uuid(const struct extent_buffer *eb) { return offsetof(struct btrfs_header, chunk_tree_uuid); } -static inline int btrfs_is_leaf(struct extent_buffer *eb) +static inline int btrfs_is_leaf(const struct extent_buffer *eb) { return btrfs_header_level(eb) == 0; } @@ -2950,12 +2955,12 @@ BTRFS_SETGET_STACK_FUNCS(root_stransid, struct btrfs_root_item, BTRFS_SETGET_STACK_FUNCS(root_rtransid, struct btrfs_root_item, rtransid, 64); -static inline bool btrfs_root_readonly(struct btrfs_root *root) +static inline bool btrfs_root_readonly(const struct btrfs_root *root) { return (root->root_item.flags & cpu_to_le64(BTRFS_ROOT_SUBVOL_RDONLY)) != 0; } -static inline bool btrfs_root_dead(struct btrfs_root *root) +static inline bool btrfs_root_dead(const struct btrfs_root *root) { return (root->root_item.flags & cpu_to_le64(BTRFS_ROOT_SUBVOL_DEAD)) != 0; } @@ -3012,51 +3017,51 @@ BTRFS_SETGET_STACK_FUNCS(backup_num_devices, struct btrfs_root_backup, /* struct btrfs_balance_item */ BTRFS_SETGET_FUNCS(balance_flags, struct btrfs_balance_item, flags, 64); -static inline void btrfs_balance_data(struct extent_buffer *eb, - struct btrfs_balance_item *bi, +static inline void btrfs_balance_data(const struct extent_buffer *eb, + const struct btrfs_balance_item *bi, struct btrfs_disk_balance_args *ba) { read_eb_member(eb, bi, struct btrfs_balance_item, data, ba); } static inline void btrfs_set_balance_data(struct extent_buffer *eb, - struct btrfs_balance_item *bi, - struct btrfs_disk_balance_args *ba) + struct btrfs_balance_item *bi, + const struct btrfs_disk_balance_args *ba) { write_eb_member(eb, bi, struct btrfs_balance_item, data, ba); } -static inline void btrfs_balance_meta(struct extent_buffer *eb, - struct btrfs_balance_item *bi, +static inline void btrfs_balance_meta(const struct extent_buffer *eb, + const struct btrfs_balance_item *bi, struct btrfs_disk_balance_args *ba) { read_eb_member(eb, bi, struct btrfs_balance_item, meta, ba); } static inline void btrfs_set_balance_meta(struct extent_buffer *eb, - struct btrfs_balance_item *bi, - struct btrfs_disk_balance_args *ba) + struct btrfs_balance_item *bi, + const struct btrfs_disk_balance_args *ba) { write_eb_member(eb, bi, struct btrfs_balance_item, meta, ba); } -static inline void btrfs_balance_sys(struct extent_buffer *eb, - struct btrfs_balance_item *bi, +static inline void btrfs_balance_sys(const struct extent_buffer *eb, + const struct btrfs_balance_item *bi, struct btrfs_disk_balance_args *ba) { read_eb_member(eb, bi, struct btrfs_balance_item, sys, ba); } static inline void btrfs_set_balance_sys(struct extent_buffer *eb, - struct btrfs_balance_item *bi, - struct btrfs_disk_balance_args *ba) + struct btrfs_balance_item *bi, + const struct btrfs_disk_balance_args *ba) { write_eb_member(eb, bi, struct btrfs_balance_item, sys, ba); } static inline void btrfs_disk_balance_args_to_cpu(struct btrfs_balance_args *cpu, - struct btrfs_disk_balance_args *disk) + const struct btrfs_disk_balance_args *disk) { memset(cpu, 0, sizeof(*cpu)); @@ -3076,7 +3081,7 @@ btrfs_disk_balance_args_to_cpu(struct btrfs_balance_args *cpu, static inline void btrfs_cpu_balance_args_to_disk(struct btrfs_disk_balance_args *disk, - struct btrfs_balance_args *cpu) + const struct btrfs_balance_args *cpu) { memset(disk, 0, sizeof(*disk)); @@ -3144,7 +3149,7 @@ BTRFS_SETGET_STACK_FUNCS(super_magic, struct btrfs_super_block, magic, 64); BTRFS_SETGET_STACK_FUNCS(super_uuid_tree_generation, struct btrfs_super_block, uuid_tree_generation, 64); -static inline int btrfs_super_csum_size(struct btrfs_super_block *s) +static inline int btrfs_super_csum_size(const struct btrfs_super_block *s) { u16 t = btrfs_super_csum_type(s); /* @@ -3158,6 +3163,21 @@ static inline unsigned long btrfs_leaf_data(struct extent_buffer *l) return offsetof(struct btrfs_leaf, items); } +/* + * The leaf data grows from end-to-front in the node. + * this returns the address of the start of the last item, + * which is the stop of the leaf data stack + */ +static inline unsigned int leaf_data_end(const struct btrfs_root *root, + const struct extent_buffer *leaf) +{ + u32 nr = btrfs_header_nritems(leaf); + + if (nr == 0) + return BTRFS_LEAF_DATA_SIZE(root); + return btrfs_item_offset_nr(leaf, nr - 1); +} + /* struct btrfs_file_extent_item */ BTRFS_SETGET_FUNCS(file_extent_type, struct btrfs_file_extent_item, type, 8); BTRFS_SETGET_STACK_FUNCS(stack_file_extent_disk_bytenr, @@ -3174,7 +3194,7 @@ BTRFS_SETGET_STACK_FUNCS(stack_file_extent_compression, struct btrfs_file_extent_item, compression, 8); static inline unsigned long -btrfs_file_extent_inline_start(struct btrfs_file_extent_item *e) +btrfs_file_extent_inline_start(const struct btrfs_file_extent_item *e) { return (unsigned long)e + BTRFS_FILE_EXTENT_INLINE_DATA_START; } @@ -3208,8 +3228,9 @@ BTRFS_SETGET_FUNCS(file_extent_other_encoding, struct btrfs_file_extent_item, * size of any extent headers. If a file is compressed on disk, this is * the compressed size */ -static inline u32 btrfs_file_extent_inline_item_len(struct extent_buffer *eb, - struct btrfs_item *e) +static inline u32 btrfs_file_extent_inline_item_len( + const struct extent_buffer *eb, + struct btrfs_item *e) { return btrfs_item_size(eb, e) - BTRFS_FILE_EXTENT_INLINE_DATA_START; } @@ -3217,9 +3238,9 @@ static inline u32 btrfs_file_extent_inline_item_len(struct extent_buffer *eb, /* this returns the number of file bytes represented by the inline item. * If an item is compressed, this is the uncompressed size */ -static inline u32 btrfs_file_extent_inline_len(struct extent_buffer *eb, - int slot, - struct btrfs_file_extent_item *fi) +static inline u32 btrfs_file_extent_inline_len(const struct extent_buffer *eb, + int slot, + const struct btrfs_file_extent_item *fi) { struct btrfs_map_token token; @@ -3241,8 +3262,8 @@ static inline u32 btrfs_file_extent_inline_len(struct extent_buffer *eb, /* btrfs_dev_stats_item */ -static inline u64 btrfs_dev_stats_value(struct extent_buffer *eb, - struct btrfs_dev_stats_item *ptr, +static inline u64 btrfs_dev_stats_value(const struct extent_buffer *eb, + const struct btrfs_dev_stats_item *ptr, int index) { u64 val; diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 176a27bc63aa..81e5bc62e8e3 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -620,7 +620,7 @@ static void btrfs_dev_replace_update_device_in_mapping_tree( em = lookup_extent_mapping(em_tree, start, (u64)-1); if (!em) break; - map = (struct map_lookup *)em->bdev; + map = em->map_lookup; for (i = 0; i < map->num_stripes; i++) if (srcdev == map->stripes[i].dev) map->stripes[i].dev = tgtdev; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 1f21c6c33228..78722aaffecd 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -49,6 +49,7 @@ #include "raid56.h" #include "sysfs.h" #include "qgroup.h" +#include "tree-checker.h" #ifdef CONFIG_X86 #include @@ -522,72 +523,6 @@ static int check_tree_block_fsid(struct btrfs_fs_info *fs_info, return ret; } -#define CORRUPT(reason, eb, root, slot) \ - btrfs_crit(root->fs_info, "corrupt leaf, %s: block=%llu," \ - "root=%llu, slot=%d", reason, \ - btrfs_header_bytenr(eb), root->objectid, slot) - -static noinline int check_leaf(struct btrfs_root *root, - struct extent_buffer *leaf) -{ - struct btrfs_key key; - struct btrfs_key leaf_key; - u32 nritems = btrfs_header_nritems(leaf); - int slot; - - if (nritems == 0) - return 0; - - /* Check the 0 item */ - if (btrfs_item_offset_nr(leaf, 0) + btrfs_item_size_nr(leaf, 0) != - BTRFS_LEAF_DATA_SIZE(root)) { - CORRUPT("invalid item offset size pair", leaf, root, 0); - return -EIO; - } - - /* - * Check to make sure each items keys are in the correct order and their - * offsets make sense. We only have to loop through nritems-1 because - * we check the current slot against the next slot, which verifies the - * next slot's offset+size makes sense and that the current's slot - * offset is correct. - */ - for (slot = 0; slot < nritems - 1; slot++) { - btrfs_item_key_to_cpu(leaf, &leaf_key, slot); - btrfs_item_key_to_cpu(leaf, &key, slot + 1); - - /* Make sure the keys are in the right order */ - if (btrfs_comp_cpu_keys(&leaf_key, &key) >= 0) { - CORRUPT("bad key order", leaf, root, slot); - return -EIO; - } - - /* - * Make sure the offset and ends are right, remember that the - * item data starts at the end of the leaf and grows towards the - * front. - */ - if (btrfs_item_offset_nr(leaf, slot) != - btrfs_item_end_nr(leaf, slot + 1)) { - CORRUPT("slot offset bad", leaf, root, slot); - return -EIO; - } - - /* - * Check to make sure that we don't point outside of the leaf, - * just incase all the items are consistent to eachother, but - * all point outside of the leaf. - */ - if (btrfs_item_end_nr(leaf, slot) > - BTRFS_LEAF_DATA_SIZE(root)) { - CORRUPT("slot end outside of leaf", leaf, root, slot); - return -EIO; - } - } - - return 0; -} - static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, u64 phy_offset, struct page *page, u64 start, u64 end, int mirror) @@ -654,11 +589,14 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, * that we don't try and read the other copies of this block, just * return -EIO. */ - if (found_level == 0 && check_leaf(root, eb)) { + if (found_level == 0 && btrfs_check_leaf_full(root, eb)) { set_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags); ret = -EIO; } + if (found_level > 0 && btrfs_check_node(root, eb)) + ret = -EIO; + if (!ret) set_extent_buffer_uptodate(eb); err: @@ -3958,7 +3896,13 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) buf->len, root->fs_info->dirty_metadata_batch); #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY - if (btrfs_header_level(buf) == 0 && check_leaf(root, buf)) { + /* + * Since btrfs_mark_buffer_dirty() can be called with item pointer set + * but item data not updated. + * So here we should only check item pointers, not item data. + */ + if (btrfs_header_level(buf) == 0 && + btrfs_check_leaf_relaxed(root, buf)) { btrfs_print_leaf(root, buf); ASSERT(0); } @@ -4167,6 +4111,14 @@ static void btrfs_destroy_all_ordered_extents(struct btrfs_fs_info *fs_info) spin_lock(&fs_info->ordered_root_lock); } spin_unlock(&fs_info->ordered_root_lock); + + /* + * We need this here because if we've been flipped read-only we won't + * get sync() from the umount, so we need to make sure any ordered + * extents that haven't had their dirty pages IO start writeout yet + * actually get run and error out properly. + */ + btrfs_wait_ordered_roots(fs_info, -1); } static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 13ff0fdae03e..978bbfed5a2c 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2342,7 +2342,13 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans, ins.type = BTRFS_EXTENT_ITEM_KEY; } - BUG_ON(node->ref_mod != 1); + if (node->ref_mod != 1) { + btrfs_err(root->fs_info, + "btree block(%llu) has %d references rather than 1: action %d ref_root %llu parent %llu", + node->bytenr, node->ref_mod, node->action, ref_root, + parent); + return -EIO; + } if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) { BUG_ON(!extent_op || !extent_op->update_flags); ret = alloc_reserved_tree_block(trans, root, @@ -9481,6 +9487,8 @@ static int find_first_block_group(struct btrfs_root *root, int ret = 0; struct btrfs_key found_key; struct extent_buffer *leaf; + struct btrfs_block_group_item bg; + u64 flags; int slot; ret = btrfs_search_slot(NULL, root, key, path, 0, 0); @@ -9502,7 +9510,47 @@ static int find_first_block_group(struct btrfs_root *root, if (found_key.objectid >= key->objectid && found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) { - ret = 0; + struct extent_map_tree *em_tree; + struct extent_map *em; + + em_tree = &root->fs_info->mapping_tree.map_tree; + read_lock(&em_tree->lock); + em = lookup_extent_mapping(em_tree, found_key.objectid, + found_key.offset); + read_unlock(&em_tree->lock); + if (!em) { + btrfs_err(root->fs_info, + "logical %llu len %llu found bg but no related chunk", + found_key.objectid, found_key.offset); + ret = -ENOENT; + } else if (em->start != found_key.objectid || + em->len != found_key.offset) { + btrfs_err(root->fs_info, + "block group %llu len %llu mismatch with chunk %llu len %llu", + found_key.objectid, found_key.offset, + em->start, em->len); + ret = -EUCLEAN; + } else { + read_extent_buffer(leaf, &bg, + btrfs_item_ptr_offset(leaf, slot), + sizeof(bg)); + flags = btrfs_block_group_flags(&bg) & + BTRFS_BLOCK_GROUP_TYPE_MASK; + + if (flags != (em->map_lookup->type & + BTRFS_BLOCK_GROUP_TYPE_MASK)) { + btrfs_err(root->fs_info, +"block group %llu len %llu type flags 0x%llx mismatch with chunk type flags 0x%llx", + found_key.objectid, + found_key.offset, flags, + (BTRFS_BLOCK_GROUP_TYPE_MASK & + em->map_lookup->type)); + ret = -EUCLEAN; + } else { + ret = 0; + } + } + free_extent_map(em); goto out; } path->slots[0]++; @@ -9717,6 +9765,62 @@ btrfs_create_block_group_cache(struct btrfs_root *root, u64 start, u64 size) return cache; } + +/* + * Iterate all chunks and verify that each of them has the corresponding block + * group + */ +static int check_chunk_block_group_mappings(struct btrfs_fs_info *fs_info) +{ + struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; + struct extent_map *em; + struct btrfs_block_group_cache *bg; + u64 start = 0; + int ret = 0; + + while (1) { + read_lock(&map_tree->map_tree.lock); + /* + * lookup_extent_mapping will return the first extent map + * intersecting the range, so setting @len to 1 is enough to + * get the first chunk. + */ + em = lookup_extent_mapping(&map_tree->map_tree, start, 1); + read_unlock(&map_tree->map_tree.lock); + if (!em) + break; + + bg = btrfs_lookup_block_group(fs_info, em->start); + if (!bg) { + btrfs_err(fs_info, + "chunk start=%llu len=%llu doesn't have corresponding block group", + em->start, em->len); + ret = -EUCLEAN; + free_extent_map(em); + break; + } + if (bg->key.objectid != em->start || + bg->key.offset != em->len || + (bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK) != + (em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK)) { + btrfs_err(fs_info, +"chunk start=%llu len=%llu flags=0x%llx doesn't match block group start=%llu len=%llu flags=0x%llx", + em->start, em->len, + em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK, + bg->key.objectid, bg->key.offset, + bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK); + ret = -EUCLEAN; + free_extent_map(em); + btrfs_put_block_group(bg); + break; + } + start = em->start + em->len; + free_extent_map(em); + btrfs_put_block_group(bg); + } + return ret; +} + int btrfs_read_block_groups(struct btrfs_root *root) { struct btrfs_path *path; @@ -9903,7 +10007,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) } init_global_block_rsv(info); - ret = 0; + ret = check_chunk_block_group_mappings(info); error: btrfs_free_path(path); return ret; @@ -10388,7 +10492,7 @@ btrfs_start_trans_remove_block_group(struct btrfs_fs_info *fs_info, * more device items and remove one chunk item), but this is done at * btrfs_remove_chunk() through a call to check_system_chunk(). */ - map = (struct map_lookup *)em->bdev; + map = em->map_lookup; num_items = 3 + map->num_stripes; free_extent_map(em); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 7de8d545f4d6..315f21191643 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3106,11 +3106,11 @@ static int __do_readpage(struct extent_io_tree *tree, */ if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) && prev_em_start && *prev_em_start != (u64)-1 && - *prev_em_start != em->orig_start) + *prev_em_start != em->start) force_bio_submit = true; if (prev_em_start) - *prev_em_start = em->orig_start; + *prev_em_start = em->start; free_extent_map(em); em = NULL; @@ -3847,8 +3847,10 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb, struct block_device *bdev = fs_info->fs_devices->latest_bdev; struct extent_io_tree *tree = &BTRFS_I(fs_info->btree_inode)->io_tree; u64 offset = eb->start; + u32 nritems; unsigned long i, num_pages; unsigned long bio_flags = 0; + unsigned long start, end; int rw = (epd->sync_io ? WRITE_SYNC : WRITE) | REQ_META; int ret = 0; @@ -3858,6 +3860,23 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb, if (btrfs_header_owner(eb) == BTRFS_TREE_LOG_OBJECTID) bio_flags = EXTENT_BIO_TREE_LOG; + /* set btree blocks beyond nritems with 0 to avoid stale content. */ + nritems = btrfs_header_nritems(eb); + if (btrfs_header_level(eb) > 0) { + end = btrfs_node_key_ptr_offset(nritems); + + memset_extent_buffer(eb, 0, end, eb->len - end); + } else { + /* + * leaf: + * header 0 1 2 .. N ... data_N .. data_2 data_1 data_0 + */ + start = btrfs_item_nr_offset(nritems); + end = btrfs_leaf_data(eb) + + leaf_data_end(fs_info->tree_root, eb); + memset_extent_buffer(eb, 0, start, end - start); + } + for (i = 0; i < num_pages; i++) { struct page *p = eb->pages[i]; @@ -5351,9 +5370,8 @@ unlock_exit: return ret; } -void read_extent_buffer(struct extent_buffer *eb, void *dstv, - unsigned long start, - unsigned long len) +void read_extent_buffer(const struct extent_buffer *eb, void *dstv, + unsigned long start, unsigned long len) { size_t cur; size_t offset; @@ -5382,9 +5400,9 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv, } } -int read_extent_buffer_to_user(struct extent_buffer *eb, void __user *dstv, - unsigned long start, - unsigned long len) +int read_extent_buffer_to_user(const struct extent_buffer *eb, + void __user *dstv, + unsigned long start, unsigned long len) { size_t cur; size_t offset; @@ -5419,10 +5437,10 @@ int read_extent_buffer_to_user(struct extent_buffer *eb, void __user *dstv, return ret; } -int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, - unsigned long min_len, char **map, - unsigned long *map_start, - unsigned long *map_len) +int map_private_extent_buffer(const struct extent_buffer *eb, + unsigned long start, unsigned long min_len, + char **map, unsigned long *map_start, + unsigned long *map_len) { size_t offset = start & (PAGE_CACHE_SIZE - 1); char *kaddr; @@ -5457,9 +5475,8 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, return 0; } -int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, - unsigned long start, - unsigned long len) +int memcmp_extent_buffer(const struct extent_buffer *eb, const void *ptrv, + unsigned long start, unsigned long len) { size_t cur; size_t offset; diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index f4c1ae11855f..751435967724 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -308,14 +308,13 @@ static inline void extent_buffer_get(struct extent_buffer *eb) atomic_inc(&eb->refs); } -int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, - unsigned long start, - unsigned long len); -void read_extent_buffer(struct extent_buffer *eb, void *dst, +int memcmp_extent_buffer(const struct extent_buffer *eb, const void *ptrv, + unsigned long start, unsigned long len); +void read_extent_buffer(const struct extent_buffer *eb, void *dst, unsigned long start, unsigned long len); -int read_extent_buffer_to_user(struct extent_buffer *eb, void __user *dst, - unsigned long start, +int read_extent_buffer_to_user(const struct extent_buffer *eb, + void __user *dst, unsigned long start, unsigned long len); void write_extent_buffer(struct extent_buffer *eb, const void *src, unsigned long start, unsigned long len); @@ -334,10 +333,10 @@ int set_extent_buffer_uptodate(struct extent_buffer *eb); int clear_extent_buffer_uptodate(struct extent_buffer *eb); int extent_buffer_uptodate(struct extent_buffer *eb); int extent_buffer_under_io(struct extent_buffer *eb); -int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset, - unsigned long min_len, char **map, - unsigned long *map_start, - unsigned long *map_len); +int map_private_extent_buffer(const struct extent_buffer *eb, + unsigned long offset, unsigned long min_len, + char **map, unsigned long *map_start, + unsigned long *map_len); int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end); int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end); int extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end, diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 6a98bddd8f33..84fb56d5c018 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -76,7 +76,7 @@ void free_extent_map(struct extent_map *em) WARN_ON(extent_map_in_tree(em)); WARN_ON(!list_empty(&em->list)); if (test_bit(EXTENT_FLAG_FS_MAPPING, &em->flags)) - kfree(em->bdev); + kfree(em->map_lookup); kmem_cache_free(extent_map_cache, em); } } diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index b2991fd8583e..eb8b8fae036b 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -32,7 +32,15 @@ struct extent_map { u64 block_len; u64 generation; unsigned long flags; - struct block_device *bdev; + union { + struct block_device *bdev; + + /* + * used for chunk mappings + * flags & EXTENT_FLAG_FS_MAPPING must be set + */ + struct map_lookup *map_lookup; + }; atomic_t refs; unsigned int compress_type; struct list_head list; diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index b9fa99577bf7..2d2a76906786 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -2420,8 +2420,9 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio, bitmap_clear(rbio->dbitmap, pagenr, 1); kunmap(p); - for (stripe = 0; stripe < rbio->real_stripes; stripe++) + for (stripe = 0; stripe < nr_data; stripe++) kunmap(page_in_rbio(rbio, stripe, pagenr, 0)); + kunmap(p_page); } __free_page(p_page); diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 6dca9f937bf6..cc9ccc42f469 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -3460,7 +3460,7 @@ static noinline_for_stack int scrub_chunk(struct scrub_ctx *sctx, return ret; } - map = (struct map_lookup *)em->bdev; + map = em->map_lookup; if (em->start != chunk_offset) goto out; diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 83c73738165e..40d1ab957fb6 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -3232,7 +3232,8 @@ static void free_pending_move(struct send_ctx *sctx, struct pending_dir_move *m) kfree(m); } -static void tail_append_pending_moves(struct pending_dir_move *moves, +static void tail_append_pending_moves(struct send_ctx *sctx, + struct pending_dir_move *moves, struct list_head *stack) { if (list_empty(&moves->list)) { @@ -3243,6 +3244,10 @@ static void tail_append_pending_moves(struct pending_dir_move *moves, list_add_tail(&moves->list, stack); list_splice_tail(&list, stack); } + if (!RB_EMPTY_NODE(&moves->node)) { + rb_erase(&moves->node, &sctx->pending_dir_moves); + RB_CLEAR_NODE(&moves->node); + } } static int apply_children_dir_moves(struct send_ctx *sctx) @@ -3257,7 +3262,7 @@ static int apply_children_dir_moves(struct send_ctx *sctx) return 0; INIT_LIST_HEAD(&stack); - tail_append_pending_moves(pm, &stack); + tail_append_pending_moves(sctx, pm, &stack); while (!list_empty(&stack)) { pm = list_first_entry(&stack, struct pending_dir_move, list); @@ -3268,7 +3273,7 @@ static int apply_children_dir_moves(struct send_ctx *sctx) goto out; pm = get_pending_dir_moves(sctx, parent_ino); if (pm) - tail_append_pending_moves(pm, &stack); + tail_append_pending_moves(sctx, pm, &stack); } return 0; diff --git a/fs/btrfs/struct-funcs.c b/fs/btrfs/struct-funcs.c index b976597b0721..63ffd213b0b7 100644 --- a/fs/btrfs/struct-funcs.c +++ b/fs/btrfs/struct-funcs.c @@ -50,8 +50,8 @@ static inline void put_unaligned_le8(u8 val, void *p) */ #define DEFINE_BTRFS_SETGET_BITS(bits) \ -u##bits btrfs_get_token_##bits(struct extent_buffer *eb, void *ptr, \ - unsigned long off, \ +u##bits btrfs_get_token_##bits(const struct extent_buffer *eb, \ + const void *ptr, unsigned long off, \ struct btrfs_map_token *token) \ { \ unsigned long part_offset = (unsigned long)ptr; \ @@ -90,7 +90,8 @@ u##bits btrfs_get_token_##bits(struct extent_buffer *eb, void *ptr, \ return res; \ } \ void btrfs_set_token_##bits(struct extent_buffer *eb, \ - void *ptr, unsigned long off, u##bits val, \ + const void *ptr, unsigned long off, \ + u##bits val, \ struct btrfs_map_token *token) \ { \ unsigned long part_offset = (unsigned long)ptr; \ @@ -133,7 +134,7 @@ DEFINE_BTRFS_SETGET_BITS(16) DEFINE_BTRFS_SETGET_BITS(32) DEFINE_BTRFS_SETGET_BITS(64) -void btrfs_node_key(struct extent_buffer *eb, +void btrfs_node_key(const struct extent_buffer *eb, struct btrfs_disk_key *disk_key, int nr) { unsigned long ptr = btrfs_node_key_ptr_offset(nr); diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c new file mode 100644 index 000000000000..5b98f3c76ce4 --- /dev/null +++ b/fs/btrfs/tree-checker.c @@ -0,0 +1,649 @@ +/* + * Copyright (C) Qu Wenruo 2017. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program. + */ + +/* + * The module is used to catch unexpected/corrupted tree block data. + * Such behavior can be caused either by a fuzzed image or bugs. + * + * The objective is to do leaf/node validation checks when tree block is read + * from disk, and check *every* possible member, so other code won't + * need to checking them again. + * + * Due to the potential and unwanted damage, every checker needs to be + * carefully reviewed otherwise so it does not prevent mount of valid images. + */ + +#include "ctree.h" +#include "tree-checker.h" +#include "disk-io.h" +#include "compression.h" +#include "hash.h" +#include "volumes.h" + +#define CORRUPT(reason, eb, root, slot) \ + btrfs_crit(root->fs_info, \ + "corrupt %s, %s: block=%llu, root=%llu, slot=%d", \ + btrfs_header_level(eb) == 0 ? "leaf" : "node", \ + reason, btrfs_header_bytenr(eb), root->objectid, slot) + +/* + * Error message should follow the following format: + * corrupt : , [, ] + * + * @type: leaf or node + * @identifier: the necessary info to locate the leaf/node. + * It's recommened to decode key.objecitd/offset if it's + * meaningful. + * @reason: describe the error + * @bad_value: optional, it's recommened to output bad value and its + * expected value (range). + * + * Since comma is used to separate the components, only space is allowed + * inside each component. + */ + +/* + * Append generic "corrupt leaf/node root=%llu block=%llu slot=%d: " to @fmt. + * Allows callers to customize the output. + */ +__printf(4, 5) +static void generic_err(const struct btrfs_root *root, + const struct extent_buffer *eb, int slot, + const char *fmt, ...) +{ + struct va_format vaf; + va_list args; + + va_start(args, fmt); + + vaf.fmt = fmt; + vaf.va = &args; + + btrfs_crit(root->fs_info, + "corrupt %s: root=%llu block=%llu slot=%d, %pV", + btrfs_header_level(eb) == 0 ? "leaf" : "node", + root->objectid, btrfs_header_bytenr(eb), slot, &vaf); + va_end(args); +} + +static int check_extent_data_item(struct btrfs_root *root, + struct extent_buffer *leaf, + struct btrfs_key *key, int slot) +{ + struct btrfs_file_extent_item *fi; + u32 sectorsize = root->sectorsize; + u32 item_size = btrfs_item_size_nr(leaf, slot); + + if (!IS_ALIGNED(key->offset, sectorsize)) { + CORRUPT("unaligned key offset for file extent", + leaf, root, slot); + return -EUCLEAN; + } + + fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); + + if (btrfs_file_extent_type(leaf, fi) > BTRFS_FILE_EXTENT_TYPES) { + CORRUPT("invalid file extent type", leaf, root, slot); + return -EUCLEAN; + } + + /* + * Support for new compression/encrption must introduce incompat flag, + * and must be caught in open_ctree(). + */ + if (btrfs_file_extent_compression(leaf, fi) > BTRFS_COMPRESS_TYPES) { + CORRUPT("invalid file extent compression", leaf, root, slot); + return -EUCLEAN; + } + if (btrfs_file_extent_encryption(leaf, fi)) { + CORRUPT("invalid file extent encryption", leaf, root, slot); + return -EUCLEAN; + } + if (btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_INLINE) { + /* Inline extent must have 0 as key offset */ + if (key->offset) { + CORRUPT("inline extent has non-zero key offset", + leaf, root, slot); + return -EUCLEAN; + } + + /* Compressed inline extent has no on-disk size, skip it */ + if (btrfs_file_extent_compression(leaf, fi) != + BTRFS_COMPRESS_NONE) + return 0; + + /* Uncompressed inline extent size must match item size */ + if (item_size != BTRFS_FILE_EXTENT_INLINE_DATA_START + + btrfs_file_extent_ram_bytes(leaf, fi)) { + CORRUPT("plaintext inline extent has invalid size", + leaf, root, slot); + return -EUCLEAN; + } + return 0; + } + + /* Regular or preallocated extent has fixed item size */ + if (item_size != sizeof(*fi)) { + CORRUPT( + "regluar or preallocated extent data item size is invalid", + leaf, root, slot); + return -EUCLEAN; + } + if (!IS_ALIGNED(btrfs_file_extent_ram_bytes(leaf, fi), sectorsize) || + !IS_ALIGNED(btrfs_file_extent_disk_bytenr(leaf, fi), sectorsize) || + !IS_ALIGNED(btrfs_file_extent_disk_num_bytes(leaf, fi), sectorsize) || + !IS_ALIGNED(btrfs_file_extent_offset(leaf, fi), sectorsize) || + !IS_ALIGNED(btrfs_file_extent_num_bytes(leaf, fi), sectorsize)) { + CORRUPT( + "regular or preallocated extent data item has unaligned value", + leaf, root, slot); + return -EUCLEAN; + } + + return 0; +} + +static int check_csum_item(struct btrfs_root *root, struct extent_buffer *leaf, + struct btrfs_key *key, int slot) +{ + u32 sectorsize = root->sectorsize; + u32 csumsize = btrfs_super_csum_size(root->fs_info->super_copy); + + if (key->objectid != BTRFS_EXTENT_CSUM_OBJECTID) { + CORRUPT("invalid objectid for csum item", leaf, root, slot); + return -EUCLEAN; + } + if (!IS_ALIGNED(key->offset, sectorsize)) { + CORRUPT("unaligned key offset for csum item", leaf, root, slot); + return -EUCLEAN; + } + if (!IS_ALIGNED(btrfs_item_size_nr(leaf, slot), csumsize)) { + CORRUPT("unaligned csum item size", leaf, root, slot); + return -EUCLEAN; + } + return 0; +} + +/* + * Customized reported for dir_item, only important new info is key->objectid, + * which represents inode number + */ +__printf(4, 5) +static void dir_item_err(const struct btrfs_root *root, + const struct extent_buffer *eb, int slot, + const char *fmt, ...) +{ + struct btrfs_key key; + struct va_format vaf; + va_list args; + + btrfs_item_key_to_cpu(eb, &key, slot); + va_start(args, fmt); + + vaf.fmt = fmt; + vaf.va = &args; + + btrfs_crit(root->fs_info, + "corrupt %s: root=%llu block=%llu slot=%d ino=%llu, %pV", + btrfs_header_level(eb) == 0 ? "leaf" : "node", root->objectid, + btrfs_header_bytenr(eb), slot, key.objectid, &vaf); + va_end(args); +} + +static int check_dir_item(struct btrfs_root *root, + struct extent_buffer *leaf, + struct btrfs_key *key, int slot) +{ + struct btrfs_dir_item *di; + u32 item_size = btrfs_item_size_nr(leaf, slot); + u32 cur = 0; + + di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); + while (cur < item_size) { + u32 name_len; + u32 data_len; + u32 max_name_len; + u32 total_size; + u32 name_hash; + u8 dir_type; + + /* header itself should not cross item boundary */ + if (cur + sizeof(*di) > item_size) { + dir_item_err(root, leaf, slot, + "dir item header crosses item boundary, have %zu boundary %u", + cur + sizeof(*di), item_size); + return -EUCLEAN; + } + + /* dir type check */ + dir_type = btrfs_dir_type(leaf, di); + if (dir_type >= BTRFS_FT_MAX) { + dir_item_err(root, leaf, slot, + "invalid dir item type, have %u expect [0, %u)", + dir_type, BTRFS_FT_MAX); + return -EUCLEAN; + } + + if (key->type == BTRFS_XATTR_ITEM_KEY && + dir_type != BTRFS_FT_XATTR) { + dir_item_err(root, leaf, slot, + "invalid dir item type for XATTR key, have %u expect %u", + dir_type, BTRFS_FT_XATTR); + return -EUCLEAN; + } + if (dir_type == BTRFS_FT_XATTR && + key->type != BTRFS_XATTR_ITEM_KEY) { + dir_item_err(root, leaf, slot, + "xattr dir type found for non-XATTR key"); + return -EUCLEAN; + } + if (dir_type == BTRFS_FT_XATTR) + max_name_len = XATTR_NAME_MAX; + else + max_name_len = BTRFS_NAME_LEN; + + /* Name/data length check */ + name_len = btrfs_dir_name_len(leaf, di); + data_len = btrfs_dir_data_len(leaf, di); + if (name_len > max_name_len) { + dir_item_err(root, leaf, slot, + "dir item name len too long, have %u max %u", + name_len, max_name_len); + return -EUCLEAN; + } + if (name_len + data_len > BTRFS_MAX_XATTR_SIZE(root)) { + dir_item_err(root, leaf, slot, + "dir item name and data len too long, have %u max %zu", + name_len + data_len, + BTRFS_MAX_XATTR_SIZE(root)); + return -EUCLEAN; + } + + if (data_len && dir_type != BTRFS_FT_XATTR) { + dir_item_err(root, leaf, slot, + "dir item with invalid data len, have %u expect 0", + data_len); + return -EUCLEAN; + } + + total_size = sizeof(*di) + name_len + data_len; + + /* header and name/data should not cross item boundary */ + if (cur + total_size > item_size) { + dir_item_err(root, leaf, slot, + "dir item data crosses item boundary, have %u boundary %u", + cur + total_size, item_size); + return -EUCLEAN; + } + + /* + * Special check for XATTR/DIR_ITEM, as key->offset is name + * hash, should match its name + */ + if (key->type == BTRFS_DIR_ITEM_KEY || + key->type == BTRFS_XATTR_ITEM_KEY) { + char namebuf[max(BTRFS_NAME_LEN, XATTR_NAME_MAX)]; + + read_extent_buffer(leaf, namebuf, + (unsigned long)(di + 1), name_len); + name_hash = btrfs_name_hash(namebuf, name_len); + if (key->offset != name_hash) { + dir_item_err(root, leaf, slot, + "name hash mismatch with key, have 0x%016x expect 0x%016llx", + name_hash, key->offset); + return -EUCLEAN; + } + } + cur += total_size; + di = (struct btrfs_dir_item *)((void *)di + total_size); + } + return 0; +} + +__printf(4, 5) +__cold +static void block_group_err(const struct btrfs_fs_info *fs_info, + const struct extent_buffer *eb, int slot, + const char *fmt, ...) +{ + struct btrfs_key key; + struct va_format vaf; + va_list args; + + btrfs_item_key_to_cpu(eb, &key, slot); + va_start(args, fmt); + + vaf.fmt = fmt; + vaf.va = &args; + + btrfs_crit(fs_info, + "corrupt %s: root=%llu block=%llu slot=%d bg_start=%llu bg_len=%llu, %pV", + btrfs_header_level(eb) == 0 ? "leaf" : "node", + btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot, + key.objectid, key.offset, &vaf); + va_end(args); +} + +static int check_block_group_item(struct btrfs_fs_info *fs_info, + struct extent_buffer *leaf, + struct btrfs_key *key, int slot) +{ + struct btrfs_block_group_item bgi; + u32 item_size = btrfs_item_size_nr(leaf, slot); + u64 flags; + u64 type; + + /* + * Here we don't really care about alignment since extent allocator can + * handle it. We care more about the size, as if one block group is + * larger than maximum size, it's must be some obvious corruption. + */ + if (key->offset > BTRFS_MAX_DATA_CHUNK_SIZE || key->offset == 0) { + block_group_err(fs_info, leaf, slot, + "invalid block group size, have %llu expect (0, %llu]", + key->offset, BTRFS_MAX_DATA_CHUNK_SIZE); + return -EUCLEAN; + } + + if (item_size != sizeof(bgi)) { + block_group_err(fs_info, leaf, slot, + "invalid item size, have %u expect %zu", + item_size, sizeof(bgi)); + return -EUCLEAN; + } + + read_extent_buffer(leaf, &bgi, btrfs_item_ptr_offset(leaf, slot), + sizeof(bgi)); + if (btrfs_block_group_chunk_objectid(&bgi) != + BTRFS_FIRST_CHUNK_TREE_OBJECTID) { + block_group_err(fs_info, leaf, slot, + "invalid block group chunk objectid, have %llu expect %llu", + btrfs_block_group_chunk_objectid(&bgi), + BTRFS_FIRST_CHUNK_TREE_OBJECTID); + return -EUCLEAN; + } + + if (btrfs_block_group_used(&bgi) > key->offset) { + block_group_err(fs_info, leaf, slot, + "invalid block group used, have %llu expect [0, %llu)", + btrfs_block_group_used(&bgi), key->offset); + return -EUCLEAN; + } + + flags = btrfs_block_group_flags(&bgi); + if (hweight64(flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) > 1) { + block_group_err(fs_info, leaf, slot, +"invalid profile flags, have 0x%llx (%lu bits set) expect no more than 1 bit set", + flags & BTRFS_BLOCK_GROUP_PROFILE_MASK, + hweight64(flags & BTRFS_BLOCK_GROUP_PROFILE_MASK)); + return -EUCLEAN; + } + + type = flags & BTRFS_BLOCK_GROUP_TYPE_MASK; + if (type != BTRFS_BLOCK_GROUP_DATA && + type != BTRFS_BLOCK_GROUP_METADATA && + type != BTRFS_BLOCK_GROUP_SYSTEM && + type != (BTRFS_BLOCK_GROUP_METADATA | + BTRFS_BLOCK_GROUP_DATA)) { + block_group_err(fs_info, leaf, slot, +"invalid type, have 0x%llx (%lu bits set) expect either 0x%llx, 0x%llx, 0x%llx or 0x%llx", + type, hweight64(type), + BTRFS_BLOCK_GROUP_DATA, BTRFS_BLOCK_GROUP_METADATA, + BTRFS_BLOCK_GROUP_SYSTEM, + BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA); + return -EUCLEAN; + } + return 0; +} + +/* + * Common point to switch the item-specific validation. + */ +static int check_leaf_item(struct btrfs_root *root, + struct extent_buffer *leaf, + struct btrfs_key *key, int slot) +{ + int ret = 0; + + switch (key->type) { + case BTRFS_EXTENT_DATA_KEY: + ret = check_extent_data_item(root, leaf, key, slot); + break; + case BTRFS_EXTENT_CSUM_KEY: + ret = check_csum_item(root, leaf, key, slot); + break; + case BTRFS_DIR_ITEM_KEY: + case BTRFS_DIR_INDEX_KEY: + case BTRFS_XATTR_ITEM_KEY: + ret = check_dir_item(root, leaf, key, slot); + break; + case BTRFS_BLOCK_GROUP_ITEM_KEY: + ret = check_block_group_item(root->fs_info, leaf, key, slot); + break; + } + return ret; +} + +static int check_leaf(struct btrfs_root *root, struct extent_buffer *leaf, + bool check_item_data) +{ + struct btrfs_fs_info *fs_info = root->fs_info; + /* No valid key type is 0, so all key should be larger than this key */ + struct btrfs_key prev_key = {0, 0, 0}; + struct btrfs_key key; + u32 nritems = btrfs_header_nritems(leaf); + int slot; + + if (btrfs_header_level(leaf) != 0) { + generic_err(root, leaf, 0, + "invalid level for leaf, have %d expect 0", + btrfs_header_level(leaf)); + return -EUCLEAN; + } + + /* + * Extent buffers from a relocation tree have a owner field that + * corresponds to the subvolume tree they are based on. So just from an + * extent buffer alone we can not find out what is the id of the + * corresponding subvolume tree, so we can not figure out if the extent + * buffer corresponds to the root of the relocation tree or not. So + * skip this check for relocation trees. + */ + if (nritems == 0 && !btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_RELOC)) { + u64 owner = btrfs_header_owner(leaf); + struct btrfs_root *check_root; + + /* These trees must never be empty */ + if (owner == BTRFS_ROOT_TREE_OBJECTID || + owner == BTRFS_CHUNK_TREE_OBJECTID || + owner == BTRFS_EXTENT_TREE_OBJECTID || + owner == BTRFS_DEV_TREE_OBJECTID || + owner == BTRFS_FS_TREE_OBJECTID || + owner == BTRFS_DATA_RELOC_TREE_OBJECTID) { + generic_err(root, leaf, 0, + "invalid root, root %llu must never be empty", + owner); + return -EUCLEAN; + } + key.objectid = owner; + key.type = BTRFS_ROOT_ITEM_KEY; + key.offset = (u64)-1; + + check_root = btrfs_get_fs_root(fs_info, &key, false); + /* + * The only reason we also check NULL here is that during + * open_ctree() some roots has not yet been set up. + */ + if (!IS_ERR_OR_NULL(check_root)) { + struct extent_buffer *eb; + + eb = btrfs_root_node(check_root); + /* if leaf is the root, then it's fine */ + if (leaf != eb) { + CORRUPT("non-root leaf's nritems is 0", + leaf, check_root, 0); + free_extent_buffer(eb); + return -EUCLEAN; + } + free_extent_buffer(eb); + } + return 0; + } + + if (nritems == 0) + return 0; + + /* + * Check the following things to make sure this is a good leaf, and + * leaf users won't need to bother with similar sanity checks: + * + * 1) key ordering + * 2) item offset and size + * No overlap, no hole, all inside the leaf. + * 3) item content + * If possible, do comprehensive sanity check. + * NOTE: All checks must only rely on the item data itself. + */ + for (slot = 0; slot < nritems; slot++) { + u32 item_end_expected; + int ret; + + btrfs_item_key_to_cpu(leaf, &key, slot); + + /* Make sure the keys are in the right order */ + if (btrfs_comp_cpu_keys(&prev_key, &key) >= 0) { + CORRUPT("bad key order", leaf, root, slot); + return -EUCLEAN; + } + + /* + * Make sure the offset and ends are right, remember that the + * item data starts at the end of the leaf and grows towards the + * front. + */ + if (slot == 0) + item_end_expected = BTRFS_LEAF_DATA_SIZE(root); + else + item_end_expected = btrfs_item_offset_nr(leaf, + slot - 1); + if (btrfs_item_end_nr(leaf, slot) != item_end_expected) { + CORRUPT("slot offset bad", leaf, root, slot); + return -EUCLEAN; + } + + /* + * Check to make sure that we don't point outside of the leaf, + * just in case all the items are consistent to each other, but + * all point outside of the leaf. + */ + if (btrfs_item_end_nr(leaf, slot) > + BTRFS_LEAF_DATA_SIZE(root)) { + CORRUPT("slot end outside of leaf", leaf, root, slot); + return -EUCLEAN; + } + + /* Also check if the item pointer overlaps with btrfs item. */ + if (btrfs_item_nr_offset(slot) + sizeof(struct btrfs_item) > + btrfs_item_ptr_offset(leaf, slot)) { + CORRUPT("slot overlap with its data", leaf, root, slot); + return -EUCLEAN; + } + + if (check_item_data) { + /* + * Check if the item size and content meet other + * criteria + */ + ret = check_leaf_item(root, leaf, &key, slot); + if (ret < 0) + return ret; + } + + prev_key.objectid = key.objectid; + prev_key.type = key.type; + prev_key.offset = key.offset; + } + + return 0; +} + +int btrfs_check_leaf_full(struct btrfs_root *root, struct extent_buffer *leaf) +{ + return check_leaf(root, leaf, true); +} + +int btrfs_check_leaf_relaxed(struct btrfs_root *root, + struct extent_buffer *leaf) +{ + return check_leaf(root, leaf, false); +} + +int btrfs_check_node(struct btrfs_root *root, struct extent_buffer *node) +{ + unsigned long nr = btrfs_header_nritems(node); + struct btrfs_key key, next_key; + int slot; + int level = btrfs_header_level(node); + u64 bytenr; + int ret = 0; + + if (level <= 0 || level >= BTRFS_MAX_LEVEL) { + generic_err(root, node, 0, + "invalid level for node, have %d expect [1, %d]", + level, BTRFS_MAX_LEVEL - 1); + return -EUCLEAN; + } + if (nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(root)) { + btrfs_crit(root->fs_info, +"corrupt node: root=%llu block=%llu, nritems too %s, have %lu expect range [1,%zu]", + root->objectid, node->start, + nr == 0 ? "small" : "large", nr, + BTRFS_NODEPTRS_PER_BLOCK(root)); + return -EUCLEAN; + } + + for (slot = 0; slot < nr - 1; slot++) { + bytenr = btrfs_node_blockptr(node, slot); + btrfs_node_key_to_cpu(node, &key, slot); + btrfs_node_key_to_cpu(node, &next_key, slot + 1); + + if (!bytenr) { + generic_err(root, node, slot, + "invalid NULL node pointer"); + ret = -EUCLEAN; + goto out; + } + if (!IS_ALIGNED(bytenr, root->sectorsize)) { + generic_err(root, node, slot, + "unaligned pointer, have %llu should be aligned to %u", + bytenr, root->sectorsize); + ret = -EUCLEAN; + goto out; + } + + if (btrfs_comp_cpu_keys(&key, &next_key) >= 0) { + generic_err(root, node, slot, + "bad key order, current (%llu %u %llu) next (%llu %u %llu)", + key.objectid, key.type, key.offset, + next_key.objectid, next_key.type, + next_key.offset); + ret = -EUCLEAN; + goto out; + } + } +out: + return ret; +} diff --git a/fs/btrfs/tree-checker.h b/fs/btrfs/tree-checker.h new file mode 100644 index 000000000000..3d53e8d6fda0 --- /dev/null +++ b/fs/btrfs/tree-checker.h @@ -0,0 +1,38 @@ +/* + * Copyright (C) Qu Wenruo 2017. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program. + */ + +#ifndef __BTRFS_TREE_CHECKER__ +#define __BTRFS_TREE_CHECKER__ + +#include "ctree.h" +#include "extent_io.h" + +/* + * Comprehensive leaf checker. + * Will check not only the item pointers, but also every possible member + * in item data. + */ +int btrfs_check_leaf_full(struct btrfs_root *root, struct extent_buffer *leaf); + +/* + * Less strict leaf checker. + * Will only check item pointers, not reading item data. + */ +int btrfs_check_leaf_relaxed(struct btrfs_root *root, + struct extent_buffer *leaf); +int btrfs_check_node(struct btrfs_root *root, struct extent_buffer *node); + +#endif diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 63f59f17c97e..c7190f322576 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3321,9 +3321,16 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, } btrfs_release_path(path); - /* find the first key from this transaction again */ + /* + * Find the first key from this transaction again. See the note for + * log_new_dir_dentries, if we're logging a directory recursively we + * won't be holding its i_mutex, which means we can modify the directory + * while we're logging it. If we remove an entry between our first + * search and this search we'll not find the key again and can just + * bail. + */ ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0); - if (WARN_ON(ret != 0)) + if (ret != 0) goto done; /* diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index b4d63a9842fa..d1cca19b29d3 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1184,7 +1184,7 @@ again: struct map_lookup *map; int i; - map = (struct map_lookup *)em->bdev; + map = em->map_lookup; for (i = 0; i < map->num_stripes; i++) { u64 end; @@ -2757,7 +2757,7 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans, free_extent_map(em); return -EINVAL; } - map = (struct map_lookup *)em->bdev; + map = em->map_lookup; lock_chunks(root->fs_info->chunk_root); check_system_chunk(trans, extent_root, map->type); unlock_chunks(root->fs_info->chunk_root); @@ -4540,7 +4540,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, if (type & BTRFS_BLOCK_GROUP_DATA) { max_stripe_size = 1024 * 1024 * 1024; - max_chunk_size = 10 * max_stripe_size; + max_chunk_size = BTRFS_MAX_DATA_CHUNK_SIZE; if (!devs_max) devs_max = BTRFS_MAX_DEVS(info->chunk_root); } else if (type & BTRFS_BLOCK_GROUP_METADATA) { @@ -4731,7 +4731,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, goto error; } set_bit(EXTENT_FLAG_FS_MAPPING, &em->flags); - em->bdev = (struct block_device *)map; + em->map_lookup = map; em->start = start; em->len = num_bytes; em->block_start = 0; @@ -4826,7 +4826,7 @@ int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans, return -EINVAL; } - map = (struct map_lookup *)em->bdev; + map = em->map_lookup; item_size = btrfs_chunk_item_size(map->num_stripes); stripe_size = em->orig_block_len; @@ -4968,7 +4968,7 @@ int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset) if (!em) return 1; - map = (struct map_lookup *)em->bdev; + map = em->map_lookup; for (i = 0; i < map->num_stripes; i++) { if (map->stripes[i].dev->missing) { miss_ndevs++; @@ -5048,7 +5048,7 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len) return 1; } - map = (struct map_lookup *)em->bdev; + map = em->map_lookup; if (map->type & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1)) ret = map->num_stripes; else if (map->type & BTRFS_BLOCK_GROUP_RAID10) @@ -5091,7 +5091,7 @@ unsigned long btrfs_full_stripe_len(struct btrfs_root *root, BUG_ON(!em); BUG_ON(em->start > logical || em->start + em->len < logical); - map = (struct map_lookup *)em->bdev; + map = em->map_lookup; if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) len = map->stripe_len * nr_data_stripes(map); free_extent_map(em); @@ -5112,7 +5112,7 @@ int btrfs_is_parity_mirror(struct btrfs_mapping_tree *map_tree, BUG_ON(!em); BUG_ON(em->start > logical || em->start + em->len < logical); - map = (struct map_lookup *)em->bdev; + map = em->map_lookup; if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) ret = 1; free_extent_map(em); @@ -5271,7 +5271,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, return -EINVAL; } - map = (struct map_lookup *)em->bdev; + map = em->map_lookup; offset = logical - em->start; stripe_len = map->stripe_len; @@ -5813,7 +5813,7 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, free_extent_map(em); return -EIO; } - map = (struct map_lookup *)em->bdev; + map = em->map_lookup; length = em->len; rmap_len = map->stripe_len; @@ -6208,6 +6208,101 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info, return dev; } +/* Return -EIO if any error, otherwise return 0. */ +static int btrfs_check_chunk_valid(struct btrfs_root *root, + struct extent_buffer *leaf, + struct btrfs_chunk *chunk, u64 logical) +{ + u64 length; + u64 stripe_len; + u16 num_stripes; + u16 sub_stripes; + u64 type; + u64 features; + bool mixed = false; + + length = btrfs_chunk_length(leaf, chunk); + stripe_len = btrfs_chunk_stripe_len(leaf, chunk); + num_stripes = btrfs_chunk_num_stripes(leaf, chunk); + sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk); + type = btrfs_chunk_type(leaf, chunk); + + if (!num_stripes) { + btrfs_err(root->fs_info, "invalid chunk num_stripes: %u", + num_stripes); + return -EIO; + } + if (!IS_ALIGNED(logical, root->sectorsize)) { + btrfs_err(root->fs_info, + "invalid chunk logical %llu", logical); + return -EIO; + } + if (btrfs_chunk_sector_size(leaf, chunk) != root->sectorsize) { + btrfs_err(root->fs_info, "invalid chunk sectorsize %u", + btrfs_chunk_sector_size(leaf, chunk)); + return -EIO; + } + if (!length || !IS_ALIGNED(length, root->sectorsize)) { + btrfs_err(root->fs_info, + "invalid chunk length %llu", length); + return -EIO; + } + if (!is_power_of_2(stripe_len)) { + btrfs_err(root->fs_info, "invalid chunk stripe length: %llu", + stripe_len); + return -EIO; + } + if (~(BTRFS_BLOCK_GROUP_TYPE_MASK | BTRFS_BLOCK_GROUP_PROFILE_MASK) & + type) { + btrfs_err(root->fs_info, "unrecognized chunk type: %llu", + ~(BTRFS_BLOCK_GROUP_TYPE_MASK | + BTRFS_BLOCK_GROUP_PROFILE_MASK) & + btrfs_chunk_type(leaf, chunk)); + return -EIO; + } + + if ((type & BTRFS_BLOCK_GROUP_TYPE_MASK) == 0) { + btrfs_err(root->fs_info, "missing chunk type flag: 0x%llx", type); + return -EIO; + } + + if ((type & BTRFS_BLOCK_GROUP_SYSTEM) && + (type & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA))) { + btrfs_err(root->fs_info, + "system chunk with data or metadata type: 0x%llx", type); + return -EIO; + } + + features = btrfs_super_incompat_flags(root->fs_info->super_copy); + if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) + mixed = true; + + if (!mixed) { + if ((type & BTRFS_BLOCK_GROUP_METADATA) && + (type & BTRFS_BLOCK_GROUP_DATA)) { + btrfs_err(root->fs_info, + "mixed chunk type in non-mixed mode: 0x%llx", type); + return -EIO; + } + } + + if ((type & BTRFS_BLOCK_GROUP_RAID10 && sub_stripes != 2) || + (type & BTRFS_BLOCK_GROUP_RAID1 && num_stripes != 2) || + (type & BTRFS_BLOCK_GROUP_RAID5 && num_stripes < 2) || + (type & BTRFS_BLOCK_GROUP_RAID6 && num_stripes < 3) || + (type & BTRFS_BLOCK_GROUP_DUP && num_stripes != 2) || + ((type & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0 && + num_stripes != 1)) { + btrfs_err(root->fs_info, + "invalid num_stripes:sub_stripes %u:%u for profile %llu", + num_stripes, sub_stripes, + type & BTRFS_BLOCK_GROUP_PROFILE_MASK); + return -EIO; + } + + return 0; +} + static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, struct extent_buffer *leaf, struct btrfs_chunk *chunk) @@ -6217,6 +6312,7 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, struct extent_map *em; u64 logical; u64 length; + u64 stripe_len; u64 devid; u8 uuid[BTRFS_UUID_SIZE]; int num_stripes; @@ -6225,6 +6321,12 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, logical = key->offset; length = btrfs_chunk_length(leaf, chunk); + stripe_len = btrfs_chunk_stripe_len(leaf, chunk); + num_stripes = btrfs_chunk_num_stripes(leaf, chunk); + + ret = btrfs_check_chunk_valid(root, leaf, chunk, logical); + if (ret) + return ret; read_lock(&map_tree->map_tree.lock); em = lookup_extent_mapping(&map_tree->map_tree, logical, 1); @@ -6241,7 +6343,6 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, em = alloc_extent_map(); if (!em) return -ENOMEM; - num_stripes = btrfs_chunk_num_stripes(leaf, chunk); map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS); if (!map) { free_extent_map(em); @@ -6249,7 +6350,7 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, } set_bit(EXTENT_FLAG_FS_MAPPING, &em->flags); - em->bdev = (struct block_device *)map; + em->map_lookup = map; em->start = logical; em->len = length; em->orig_start = 0; @@ -6473,6 +6574,7 @@ int btrfs_read_sys_array(struct btrfs_root *root) u32 array_size; u32 len = 0; u32 cur_offset; + u64 type; struct btrfs_key key; ASSERT(BTRFS_SUPER_INFO_SIZE <= root->nodesize); @@ -6539,6 +6641,15 @@ int btrfs_read_sys_array(struct btrfs_root *root) break; } + type = btrfs_chunk_type(sb, chunk); + if ((type & BTRFS_BLOCK_GROUP_SYSTEM) == 0) { + btrfs_err(root->fs_info, + "invalid chunk type %llu in sys_array at offset %u", + type, cur_offset); + ret = -EIO; + break; + } + len = btrfs_chunk_item_size(num_stripes); if (cur_offset + len > array_size) goto out_short_read; @@ -6948,7 +7059,7 @@ void btrfs_update_commit_device_bytes_used(struct btrfs_root *root, /* In order to kick the device replace finish process */ lock_chunks(root); list_for_each_entry(em, &transaction->pending_chunks, list) { - map = (struct map_lookup *)em->bdev; + map = em->map_lookup; for (i = 0; i < map->num_stripes; i++) { dev = map->stripes[i].dev; diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index d5c84f6b1353..3c651df420be 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -24,6 +24,8 @@ #include #include "async-thread.h" +#define BTRFS_MAX_DATA_CHUNK_SIZE (10ULL * SZ_1G) + extern struct mutex uuid_mutex; #define BTRFS_STRIPE_LEN (64 * 1024) diff --git a/fs/buffer.c b/fs/buffer.c index 6f7d519a093b..f278e27bd8c0 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2985,6 +2985,13 @@ void guard_bio_eod(int rw, struct bio *bio) /* Uhhuh. We've got a bio that straddles the device size! */ truncated_bytes = bio->bi_iter.bi_size - (maxsector << 9); + /* + * The bio contains more than one segment which spans EOD, just return + * and let IO layer turn it into an EIO + */ + if (truncated_bytes > bvec->bv_len) + return; + /* Truncate the bio.. */ bio->bi_iter.bi_size -= truncated_bytes; bvec->bv_len -= truncated_bytes; diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index 5b68cf526887..c05ab2ec0fef 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c @@ -963,11 +963,8 @@ error: void cachefiles_uncache_page(struct fscache_object *_object, struct page *page) { struct cachefiles_object *object; - struct cachefiles_cache *cache; object = container_of(_object, struct cachefiles_object, fscache); - cache = container_of(object->fscache.cache, - struct cachefiles_cache, cache); _enter("%p,{%lu}", object, page->index); diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 0e3de1bb6500..e7b54514d99a 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -3243,7 +3243,6 @@ retry: tcap->cap_id = t_cap_id; tcap->seq = t_seq - 1; tcap->issue_seq = t_seq - 1; - tcap->mseq = t_mseq; tcap->issued |= issued; tcap->implemented |= issued; if (cap == ci->i_auth_cap) diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index 4aa7122a8d38..a485d0cdc559 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -611,7 +611,8 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci, capsnap->size); spin_lock(&mdsc->snap_flush_lock); - list_add_tail(&ci->i_snap_flush_item, &mdsc->snap_flush_list); + if (list_empty(&ci->i_snap_flush_item)) + list_add_tail(&ci->i_snap_flush_item, &mdsc->snap_flush_list); spin_unlock(&mdsc->snap_flush_lock); return 1; /* caller may want to ceph_flush_snaps */ } diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c index 7dc886c9a78f..1ea643faf04b 100644 --- a/fs/cifs/cifs_dfs_ref.c +++ b/fs/cifs/cifs_dfs_ref.c @@ -266,9 +266,9 @@ static void dump_referral(const struct dfs_info3_param *ref) { cifs_dbg(FYI, "DFS: ref path: %s\n", ref->path_name); cifs_dbg(FYI, "DFS: node path: %s\n", ref->node_name); - cifs_dbg(FYI, "DFS: fl: %hd, srv_type: %hd\n", + cifs_dbg(FYI, "DFS: fl: %d, srv_type: %d\n", ref->flags, ref->server_type); - cifs_dbg(FYI, "DFS: ref_flags: %hd, path_consumed: %hd\n", + cifs_dbg(FYI, "DFS: ref_flags: %d, path_consumed: %d\n", ref->ref_flag, ref->path_consumed); } diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 1eeb4780c3ed..9cb72fd40eff 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -48,6 +48,7 @@ #include "cifs_unicode.h" #include "cifs_debug.h" #include "cifs_fs_sb.h" +#include "dns_resolve.h" #include "ntlmssp.h" #include "nterr.h" #include "rfc1002pdu.h" @@ -303,6 +304,53 @@ static void cifs_prune_tlinks(struct work_struct *work); static int cifs_setup_volume_info(struct smb_vol *volume_info, char *mount_data, const char *devname); +/* + * Resolve hostname and set ip addr in tcp ses. Useful for hostnames that may + * get their ip addresses changed at some point. + * + * This should be called with server->srv_mutex held. + */ +#ifdef CONFIG_CIFS_DFS_UPCALL +static int reconn_set_ipaddr(struct TCP_Server_Info *server) +{ + int rc; + int len; + char *unc, *ipaddr = NULL; + + if (!server->hostname) + return -EINVAL; + + len = strlen(server->hostname) + 3; + + unc = kmalloc(len, GFP_KERNEL); + if (!unc) { + cifs_dbg(FYI, "%s: failed to create UNC path\n", __func__); + return -ENOMEM; + } + snprintf(unc, len, "\\\\%s", server->hostname); + + rc = dns_resolve_server_name_to_ip(unc, &ipaddr); + kfree(unc); + + if (rc < 0) { + cifs_dbg(FYI, "%s: failed to resolve server part of %s to IP: %d\n", + __func__, server->hostname, rc); + return rc; + } + + rc = cifs_convert_address((struct sockaddr *)&server->dstaddr, ipaddr, + strlen(ipaddr)); + kfree(ipaddr); + + return !rc ? -1 : 0; +} +#else +static inline int reconn_set_ipaddr(struct TCP_Server_Info *server) +{ + return 0; +} +#endif + /* * cifs tcp session reconnection * @@ -400,6 +448,11 @@ cifs_reconnect(struct TCP_Server_Info *server) rc = generic_ip_connect(server); if (rc) { cifs_dbg(FYI, "reconnect error %d\n", rc); + rc = reconn_set_ipaddr(server); + if (rc) { + cifs_dbg(FYI, "%s: failed to resolve hostname: %d\n", + __func__, rc); + } mutex_unlock(&server->srv_mutex); msleep(3000); } else { @@ -1202,6 +1255,11 @@ cifs_parse_devname(const char *devname, struct smb_vol *vol) const char *delims = "/\\"; size_t len; + if (unlikely(!devname || !*devname)) { + cifs_dbg(VFS, "Device name not specified.\n"); + return -EINVAL; + } + /* make sure we have a valid UNC double delimiter prefix */ len = strspn(devname, delims); if (len != 2) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 0141aba9eca6..23a8374fa97f 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -1073,14 +1073,18 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile) /* * Accessing maxBuf is racy with cifs_reconnect - need to store value - * and check it for zero before using. + * and check it before using. */ max_buf = tcon->ses->server->maxBuf; - if (!max_buf) { + if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) { free_xid(xid); return -EINVAL; } + BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) > + PAGE_SIZE); + max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr), + PAGE_SIZE); max_num = (max_buf - sizeof(struct smb_hdr)) / sizeof(LOCKING_ANDX_RANGE); buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL); @@ -1404,12 +1408,16 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, /* * Accessing maxBuf is racy with cifs_reconnect - need to store value - * and check it for zero before using. + * and check it before using. */ max_buf = tcon->ses->server->maxBuf; - if (!max_buf) + if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) return -EINVAL; + BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) > + PAGE_SIZE); + max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr), + PAGE_SIZE); max_num = (max_buf - sizeof(struct smb_hdr)) / sizeof(LOCKING_ANDX_RANGE); buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL); @@ -1566,8 +1574,20 @@ cifs_setlk(struct file *file, struct file_lock *flock, __u32 type, rc = server->ops->mand_unlock_range(cfile, flock, xid); out: - if (flock->fl_flags & FL_POSIX && !rc) + if (flock->fl_flags & FL_POSIX) { + /* + * If this is a request to remove all locks because we + * are closing the file, it doesn't matter if the + * unlocking failed as both cifs.ko and the SMB server + * remove the lock on file close + */ + if (rc) { + cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc); + if (!(flock->fl_flags & FL_CLOSE)) + return rc; + } rc = locks_lock_file_wait(file, flock); + } return rc; } @@ -2745,14 +2765,16 @@ cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from) * these pages but not on the region from pos to ppos+len-1. */ written = cifs_user_writev(iocb, from); - if (written > 0 && CIFS_CACHE_READ(cinode)) { + if (CIFS_CACHE_READ(cinode)) { /* - * Windows 7 server can delay breaking level2 oplock if a write - * request comes - break it on the client to prevent reading - * an old data. + * We have read level caching and we have just sent a write + * request to the server thus making data in the cache stale. + * Zap the cache and set oplock/lease level to NONE to avoid + * reading stale data from the cache. All subsequent read + * operations will read new data from the server. */ cifs_zap_mapping(inode); - cifs_dbg(FYI, "Set no oplock for inode=%p after a write operation\n", + cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n", inode); cinode->oplock = 0; } diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 5c3187df9ab9..d8bd8dd36211 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -759,43 +759,50 @@ cifs_get_inode_info(struct inode **inode, const char *full_path, } else if ((rc == -EACCES) && backup_cred(cifs_sb) && (strcmp(server->vals->version_string, SMB1_VERSION_STRING) == 0)) { - /* - * For SMB2 and later the backup intent flag is already - * sent if needed on open and there is no path based - * FindFirst operation to use to retry with - */ + /* + * For SMB2 and later the backup intent flag is already + * sent if needed on open and there is no path based + * FindFirst operation to use to retry with + */ - srchinf = kzalloc(sizeof(struct cifs_search_info), - GFP_KERNEL); - if (srchinf == NULL) { - rc = -ENOMEM; - goto cgii_exit; - } + srchinf = kzalloc(sizeof(struct cifs_search_info), + GFP_KERNEL); + if (srchinf == NULL) { + rc = -ENOMEM; + goto cgii_exit; + } - srchinf->endOfSearch = false; + srchinf->endOfSearch = false; + if (tcon->unix_ext) + srchinf->info_level = SMB_FIND_FILE_UNIX; + else if ((tcon->ses->capabilities & + tcon->ses->server->vals->cap_nt_find) == 0) + srchinf->info_level = SMB_FIND_FILE_INFO_STANDARD; + else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) srchinf->info_level = SMB_FIND_FILE_ID_FULL_DIR_INFO; + else /* no srvino useful for fallback to some netapp */ + srchinf->info_level = SMB_FIND_FILE_DIRECTORY_INFO; - srchflgs = CIFS_SEARCH_CLOSE_ALWAYS | - CIFS_SEARCH_CLOSE_AT_END | - CIFS_SEARCH_BACKUP_SEARCH; + srchflgs = CIFS_SEARCH_CLOSE_ALWAYS | + CIFS_SEARCH_CLOSE_AT_END | + CIFS_SEARCH_BACKUP_SEARCH; - rc = CIFSFindFirst(xid, tcon, full_path, - cifs_sb, NULL, srchflgs, srchinf, false); - if (!rc) { - data = - (FILE_ALL_INFO *)srchinf->srch_entries_start; + rc = CIFSFindFirst(xid, tcon, full_path, + cifs_sb, NULL, srchflgs, srchinf, false); + if (!rc) { + data = (FILE_ALL_INFO *)srchinf->srch_entries_start; - cifs_dir_info_to_fattr(&fattr, - (FILE_DIRECTORY_INFO *)data, cifs_sb); - fattr.cf_uniqueid = le64_to_cpu( - ((SEARCH_ID_FULL_DIR_INFO *)data)->UniqueId); - validinum = true; + cifs_dir_info_to_fattr(&fattr, + (FILE_DIRECTORY_INFO *)data, cifs_sb); + fattr.cf_uniqueid = le64_to_cpu( + ((SEARCH_ID_FULL_DIR_INFO *)data)->UniqueId); + validinum = true; - cifs_buf_release(srchinf->ntwrk_buf_start); - } - kfree(srchinf); - if (rc) - goto cgii_exit; + cifs_buf_release(srchinf->ntwrk_buf_start); + } + kfree(srchinf); + if (rc) + goto cgii_exit; } else goto cgii_exit; diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 57b039ebfb1f..43fa471c88d7 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -652,7 +652,14 @@ find_cifs_entry(const unsigned int xid, struct cifs_tcon *tcon, loff_t pos, /* scan and find it */ int i; char *cur_ent; - char *end_of_smb = cfile->srch_inf.ntwrk_buf_start + + char *end_of_smb; + + if (cfile->srch_inf.ntwrk_buf_start == NULL) { + cifs_dbg(VFS, "ntwrk_buf_start is NULL during readdir\n"); + return -EIO; + } + + end_of_smb = cfile->srch_inf.ntwrk_buf_start + server->ops->calc_smb_size( cfile->srch_inf.ntwrk_buf_start); diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index efd72e1fae74..f7a9adab0b84 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -305,7 +305,7 @@ coalesce_t2(char *second_buf, struct smb_hdr *target_hdr) remaining = tgt_total_cnt - total_in_tgt; if (remaining < 0) { - cifs_dbg(FYI, "Server sent too much data. tgt_total_cnt=%hu total_in_tgt=%hu\n", + cifs_dbg(FYI, "Server sent too much data. tgt_total_cnt=%hu total_in_tgt=%u\n", tgt_total_cnt, total_in_tgt); return -EPROTO; } diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c index b2aff0c6f22c..dee5250701de 100644 --- a/fs/cifs/smb2file.c +++ b/fs/cifs/smb2file.c @@ -123,12 +123,14 @@ smb2_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, /* * Accessing maxBuf is racy with cifs_reconnect - need to store value - * and check it for zero before using. + * and check it before using. */ max_buf = tcon->ses->server->maxBuf; - if (!max_buf) + if (max_buf < sizeof(struct smb2_lock_element)) return -EINVAL; + BUILD_BUG_ON(sizeof(struct smb2_lock_element) > PAGE_SIZE); + max_buf = min_t(unsigned int, max_buf, PAGE_SIZE); max_num = max_buf / sizeof(struct smb2_lock_element); buf = kcalloc(max_num, sizeof(struct smb2_lock_element), GFP_KERNEL); if (!buf) @@ -265,6 +267,8 @@ smb2_push_mandatory_locks(struct cifsFileInfo *cfile) return -EINVAL; } + BUILD_BUG_ON(sizeof(struct smb2_lock_element) > PAGE_SIZE); + max_buf = min_t(unsigned int, max_buf, PAGE_SIZE); max_num = max_buf / sizeof(struct smb2_lock_element); buf = kcalloc(max_num, sizeof(struct smb2_lock_element), GFP_KERNEL); if (!buf) { diff --git a/fs/cifs/smb2maperror.c b/fs/cifs/smb2maperror.c index 8257a5a97cc0..7e93d5706bf6 100644 --- a/fs/cifs/smb2maperror.c +++ b/fs/cifs/smb2maperror.c @@ -377,8 +377,8 @@ static const struct status_to_posix_error smb2_error_map_table[] = { {STATUS_NONEXISTENT_EA_ENTRY, -EIO, "STATUS_NONEXISTENT_EA_ENTRY"}, {STATUS_NO_EAS_ON_FILE, -ENODATA, "STATUS_NO_EAS_ON_FILE"}, {STATUS_EA_CORRUPT_ERROR, -EIO, "STATUS_EA_CORRUPT_ERROR"}, - {STATUS_FILE_LOCK_CONFLICT, -EIO, "STATUS_FILE_LOCK_CONFLICT"}, - {STATUS_LOCK_NOT_GRANTED, -EIO, "STATUS_LOCK_NOT_GRANTED"}, + {STATUS_FILE_LOCK_CONFLICT, -EACCES, "STATUS_FILE_LOCK_CONFLICT"}, + {STATUS_LOCK_NOT_GRANTED, -EACCES, "STATUS_LOCK_NOT_GRANTED"}, {STATUS_DELETE_PENDING, -ENOENT, "STATUS_DELETE_PENDING"}, {STATUS_CTL_FILE_NOT_SUPPORTED, -ENOSYS, "STATUS_CTL_FILE_NOT_SUPPORTED"}, @@ -1034,7 +1034,8 @@ static const struct status_to_posix_error smb2_error_map_table[] = { {STATUS_UNFINISHED_CONTEXT_DELETED, -EIO, "STATUS_UNFINISHED_CONTEXT_DELETED"}, {STATUS_NO_TGT_REPLY, -EIO, "STATUS_NO_TGT_REPLY"}, - {STATUS_OBJECTID_NOT_FOUND, -EIO, "STATUS_OBJECTID_NOT_FOUND"}, + /* Note that ENOATTTR and ENODATA are the same errno */ + {STATUS_OBJECTID_NOT_FOUND, -ENODATA, "STATUS_OBJECTID_NOT_FOUND"}, {STATUS_NO_IP_ADDRESSES, -EIO, "STATUS_NO_IP_ADDRESSES"}, {STATUS_WRONG_CREDENTIAL_HANDLE, -EIO, "STATUS_WRONG_CREDENTIAL_HANDLE"}, diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 2725085a3f9f..eae3cdffaf7f 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -143,14 +143,14 @@ smb2_wait_mtu_credits(struct TCP_Server_Info *server, unsigned int size, scredits = server->credits; /* can deadlock with reopen */ - if (scredits == 1) { + if (scredits <= 8) { *num = SMB2_MAX_BUFFER_SIZE; *credits = 0; break; } - /* leave one credit for a possible reopen */ - scredits--; + /* leave some credits for reopen and other ops */ + scredits -= 8; *num = min_t(unsigned int, size, scredits * SMB2_MAX_BUFFER_SIZE); diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index f7111bb88ec1..5e21d58c49ef 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -2523,8 +2523,8 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon, if (rc == -ENODATA && rsp->hdr.Status == STATUS_NO_MORE_FILES) { srch_inf->endOfSearch = true; rc = 0; - } - cifs_stats_fail_inc(tcon, SMB2_QUERY_DIRECTORY_HE); + } else + cifs_stats_fail_inc(tcon, SMB2_QUERY_DIRECTORY_HE); goto qdir_exit; } diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index aacb15bd56fe..f087158c5555 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -82,8 +82,8 @@ #define NUMBER_OF_SMB2_COMMANDS 0x0013 -/* 4 len + 52 transform hdr + 64 hdr + 56 create rsp */ -#define MAX_SMB2_HDR_SIZE 0x00b0 +/* 52 transform hdr + 64 hdr + 88 create rsp */ +#define MAX_SMB2_HDR_SIZE 204 #define SMB2_PROTO_NUMBER cpu_to_le32(0x424d53fe) diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 54af10204e83..1cf0a336ec06 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -360,7 +360,7 @@ uncork: if (rc < 0 && rc != -EINTR) cifs_dbg(VFS, "Error %d sending data on socket to server\n", rc); - else + else if (rc > 0) rc = 0; return rc; diff --git a/fs/dcache.c b/fs/dcache.c index b056cf8271a5..695c09652b36 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1155,15 +1155,11 @@ static enum lru_status dentry_lru_isolate_shrink(struct list_head *item, */ void shrink_dcache_sb(struct super_block *sb) { - long freed; - do { LIST_HEAD(dispose); - freed = list_lru_walk(&sb->s_dentry_lru, + list_lru_walk(&sb->s_dentry_lru, dentry_lru_isolate_shrink, &dispose, 1024); - - this_cpu_sub(nr_dentry_unused, freed); shrink_dentry_list(&dispose); cond_resched(); } while (list_lru_count(&sb->s_dentry_lru) > 0); @@ -1514,7 +1510,7 @@ static void check_and_drop(void *_data) { struct detach_data *data = _data; - if (!data->mountpoint && !data->select.found) + if (!data->mountpoint && list_empty(&data->select.dispose)) __d_drop(data->select.start); } @@ -1556,17 +1552,15 @@ void d_invalidate(struct dentry *dentry) d_walk(dentry, &data, detach_and_collect, check_and_drop); - if (data.select.found) + if (!list_empty(&data.select.dispose)) shrink_dentry_list(&data.select.dispose); + else if (!data.mountpoint) + return; if (data.mountpoint) { detach_mounts(data.mountpoint); dput(data.mountpoint); } - - if (!data.mountpoint && !data.select.found) - break; - cond_resched(); } } diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 6c2eaf690abf..a8c94b470b01 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -707,6 +707,13 @@ struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry, struct dentry *dentry = NULL, *trap; struct name_snapshot old_name; + if (IS_ERR(old_dir)) + return old_dir; + if (IS_ERR(new_dir)) + return new_dir; + if (IS_ERR_OR_NULL(old_dentry)) + return old_dentry; + trap = lock_rename(new_dir, old_dir); /* Source or destination directories don't exist? */ if (d_really_is_negative(old_dir) || d_really_is_negative(new_dir)) diff --git a/fs/dlm/ast.c b/fs/dlm/ast.c index dcea1e37a1b7..f18619bc2e09 100644 --- a/fs/dlm/ast.c +++ b/fs/dlm/ast.c @@ -290,6 +290,8 @@ void dlm_callback_suspend(struct dlm_ls *ls) flush_workqueue(ls->ls_callback_wq); } +#define MAX_CB_QUEUE 25 + void dlm_callback_resume(struct dlm_ls *ls) { struct dlm_lkb *lkb, *safe; @@ -300,15 +302,23 @@ void dlm_callback_resume(struct dlm_ls *ls) if (!ls->ls_callback_wq) return; +more: mutex_lock(&ls->ls_cb_mutex); list_for_each_entry_safe(lkb, safe, &ls->ls_cb_delay, lkb_cb_list) { list_del_init(&lkb->lkb_cb_list); queue_work(ls->ls_callback_wq, &lkb->lkb_cb_work); count++; + if (count == MAX_CB_QUEUE) + break; } mutex_unlock(&ls->ls_cb_mutex); if (count) log_rinfo(ls, "dlm_callback_resume %d", count); + if (count == MAX_CB_QUEUE) { + count = 0; + cond_resched(); + goto more; + } } diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index 35502d4046f5..3a7f401e943c 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c @@ -1210,6 +1210,7 @@ static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret) if (rv < 0) { log_error(ls, "create_lkb idr error %d", rv); + dlm_free_lkb(lkb); return rv; } @@ -4177,6 +4178,7 @@ static int receive_convert(struct dlm_ls *ls, struct dlm_message *ms) (unsigned long long)lkb->lkb_recover_seq, ms->m_header.h_nodeid, ms->m_lkid); error = -ENOENT; + dlm_put_lkb(lkb); goto fail; } @@ -4230,6 +4232,7 @@ static int receive_unlock(struct dlm_ls *ls, struct dlm_message *ms) lkb->lkb_id, lkb->lkb_remid, ms->m_header.h_nodeid, ms->m_lkid); error = -ENOENT; + dlm_put_lkb(lkb); goto fail; } @@ -5792,20 +5795,20 @@ int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua, goto out; } } - - /* After ua is attached to lkb it will be freed by dlm_free_lkb(). - When DLM_IFL_USER is set, the dlm knows that this is a userspace - lock and that lkb_astparam is the dlm_user_args structure. */ - error = set_lock_args(mode, &ua->lksb, flags, namelen, timeout_cs, fake_astfn, ua, fake_bastfn, &args); - lkb->lkb_flags |= DLM_IFL_USER; - if (error) { + kfree(ua->lksb.sb_lvbptr); + ua->lksb.sb_lvbptr = NULL; + kfree(ua); __put_lkb(ls, lkb); goto out; } + /* After ua is attached to lkb it will be freed by dlm_free_lkb(). + When DLM_IFL_USER is set, the dlm knows that this is a userspace + lock and that lkb_astparam is the dlm_user_args structure. */ + lkb->lkb_flags |= DLM_IFL_USER; error = request_lock(ls, lkb, name, namelen, &args); switch (error) { diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c index f3e72787e7f9..30e4e01db35a 100644 --- a/fs/dlm/lockspace.c +++ b/fs/dlm/lockspace.c @@ -673,11 +673,11 @@ static int new_lockspace(const char *name, const char *cluster, kfree(ls->ls_recover_buf); out_lkbidr: idr_destroy(&ls->ls_lkbidr); + out_rsbtbl: for (i = 0; i < DLM_REMOVE_NAMES_MAX; i++) { if (ls->ls_remove_names[i]) kfree(ls->ls_remove_names[i]); } - out_rsbtbl: vfree(ls->ls_rsbtbl); out_lsfree: if (do_unreg) diff --git a/fs/drop_caches.c b/fs/drop_caches.c index d72d52b90433..280460fef066 100644 --- a/fs/drop_caches.c +++ b/fs/drop_caches.c @@ -20,8 +20,13 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused) spin_lock(&sb->s_inode_list_lock); list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { spin_lock(&inode->i_lock); + /* + * We must skip inodes in unusual state. We may also skip + * inodes without pages but we deliberately won't in case + * we need to reschedule to avoid softlockups. + */ if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) || - (inode->i_mapping->nrpages == 0)) { + (inode->i_mapping->nrpages == 0 && !need_resched())) { spin_unlock(&inode->i_lock); continue; } @@ -29,6 +34,7 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused) spin_unlock(&inode->i_lock); spin_unlock(&sb->s_inode_list_lock); + cond_resched(); invalidate_mapping_pages(inode->i_mapping, 0, -1); iput(toput_inode); toput_inode = inode; diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 066df649a6b0..ac21caad6729 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1035,7 +1035,7 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k * semantics). All the events that happen during that period of time are * chained in ep->ovflist and requeued later on. */ - if (unlikely(ep->ovflist != EP_UNACTIVE_PTR)) { + if (ep->ovflist != EP_UNACTIVE_PTR) { if (epi->next == EP_UNACTIVE_PTR) { epi->next = ep->ovflist; ep->ovflist = epi; diff --git a/fs/exec.c b/fs/exec.c index 7b4cd71c7b2d..ab6e34e1de21 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -191,6 +191,7 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, { struct page *page; int ret; + unsigned int gup_flags = FOLL_FORCE; #ifdef CONFIG_STACK_GROWSUP if (write) { @@ -199,8 +200,12 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, return NULL; } #endif - ret = get_user_pages(current, bprm->mm, pos, - 1, write, 1, &page, NULL); + + if (write) + gup_flags |= FOLL_WRITE; + + ret = get_user_pages(current, bprm->mm, pos, 1, gup_flags, + &page, NULL); if (ret <= 0) return NULL; @@ -1152,8 +1157,10 @@ EXPORT_SYMBOL(flush_old_exec); void would_dump(struct linux_binprm *bprm, struct file *file) { struct inode *inode = file_inode(file); + if (inode_permission2(file->f_path.mnt, inode, MAY_READ) < 0) { struct user_namespace *old, *user_ns; + bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP; /* Ensure mm->user_ns contains the executable */ diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index 714cd37a6ba3..6599c6124552 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -76,7 +76,7 @@ static bool dentry_connected(struct dentry *dentry) struct dentry *parent = dget_parent(dentry); dput(dentry); - if (IS_ROOT(dentry)) { + if (dentry == parent) { dput(parent); return false; } diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 111a31761ffa..7600c98a8f86 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -724,7 +724,8 @@ static loff_t ext2_max_size(int bits) { loff_t res = EXT2_NDIR_BLOCKS; int meta_blocks; - loff_t upper_limit; + unsigned int upper_limit; + unsigned int ppb = 1 << (bits-2); /* This is calculated to be the largest file size for a * dense, file such that the total number of @@ -738,24 +739,34 @@ static loff_t ext2_max_size(int bits) /* total blocks in file system block size */ upper_limit >>= (bits - 9); - - /* indirect blocks */ - meta_blocks = 1; - /* double indirect blocks */ - meta_blocks += 1 + (1LL << (bits-2)); - /* tripple indirect blocks */ - meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2))); - - upper_limit -= meta_blocks; - upper_limit <<= bits; - + /* Compute how many blocks we can address by block tree */ res += 1LL << (bits-2); res += 1LL << (2*(bits-2)); res += 1LL << (3*(bits-2)); - res <<= bits; - if (res > upper_limit) - res = upper_limit; + /* Does block tree limit file size? */ + if (res < upper_limit) + goto check_lfs; + res = upper_limit; + /* How many metadata blocks are needed for addressing upper_limit? */ + upper_limit -= EXT2_NDIR_BLOCKS; + /* indirect blocks */ + meta_blocks = 1; + upper_limit -= ppb; + /* double indirect blocks */ + if (upper_limit < ppb * ppb) { + meta_blocks += 1 + DIV_ROUND_UP(upper_limit, ppb); + res -= meta_blocks; + goto check_lfs; + } + meta_blocks += 1 + ppb; + upper_limit -= ppb * ppb; + /* tripple indirect blocks for the rest */ + meta_blocks += 1 + DIV_ROUND_UP(upper_limit, ppb) + + DIV_ROUND_UP(upper_limit, ppb*ppb); + res -= meta_blocks; +check_lfs: + res <<= bits; if (res > MAX_LFS_FILESIZE) res = MAX_LFS_FILESIZE; diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index f817ed58f5ad..b40e75dbf48c 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h @@ -372,7 +372,7 @@ static inline void ext4_update_inode_fsync_trans(handle_t *handle, { struct ext4_inode_info *ei = EXT4_I(inode); - if (ext4_handle_valid(handle)) { + if (ext4_handle_valid(handle) && !is_handle_aborted(handle)) { ei->i_sync_tid = handle->h_transaction->t_tid; if (datasync) ei->i_datasync_tid = handle->h_transaction->t_tid; diff --git a/fs/ext4/file.c b/fs/ext4/file.c index debf0707789d..2e5ae183a18a 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -79,7 +79,7 @@ ext4_unaligned_aio(struct inode *inode, struct iov_iter *from, loff_t pos) struct super_block *sb = inode->i_sb; int blockmask = sb->s_blocksize - 1; - if (pos >= i_size_read(inode)) + if (pos >= ALIGN(i_size_read(inode), sb->s_blocksize)) return 0; if ((pos | iov_iter_alignment(from)) & blockmask) diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index 355ef9c36c87..08f3a0c0f468 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c @@ -1323,6 +1323,7 @@ int ext4_ind_remove_space(handle_t *handle, struct inode *inode, ext4_lblk_t offsets[4], offsets2[4]; Indirect chain[4], chain2[4]; Indirect *partial, *partial2; + Indirect *p = NULL, *p2 = NULL; ext4_lblk_t max_block; __le32 nr = 0, nr2 = 0; int n = 0, n2 = 0; @@ -1364,7 +1365,7 @@ int ext4_ind_remove_space(handle_t *handle, struct inode *inode, } - partial = ext4_find_shared(inode, n, offsets, chain, &nr); + partial = p = ext4_find_shared(inode, n, offsets, chain, &nr); if (nr) { if (partial == chain) { /* Shared branch grows from the inode */ @@ -1389,13 +1390,11 @@ int ext4_ind_remove_space(handle_t *handle, struct inode *inode, partial->p + 1, (__le32 *)partial->bh->b_data+addr_per_block, (chain+n-1) - partial); - BUFFER_TRACE(partial->bh, "call brelse"); - brelse(partial->bh); partial--; } end_range: - partial2 = ext4_find_shared(inode, n2, offsets2, chain2, &nr2); + partial2 = p2 = ext4_find_shared(inode, n2, offsets2, chain2, &nr2); if (nr2) { if (partial2 == chain2) { /* @@ -1425,16 +1424,14 @@ end_range: (__le32 *)partial2->bh->b_data, partial2->p, (chain2+n2-1) - partial2); - BUFFER_TRACE(partial2->bh, "call brelse"); - brelse(partial2->bh); partial2--; } goto do_indirects; } /* Punch happened within the same level (n == n2) */ - partial = ext4_find_shared(inode, n, offsets, chain, &nr); - partial2 = ext4_find_shared(inode, n2, offsets2, chain2, &nr2); + partial = p = ext4_find_shared(inode, n, offsets, chain, &nr); + partial2 = p2 = ext4_find_shared(inode, n2, offsets2, chain2, &nr2); /* Free top, but only if partial2 isn't its subtree. */ if (nr) { @@ -1491,11 +1488,7 @@ end_range: partial->p + 1, partial2->p, (chain+n-1) - partial); - BUFFER_TRACE(partial->bh, "call brelse"); - brelse(partial->bh); - BUFFER_TRACE(partial2->bh, "call brelse"); - brelse(partial2->bh); - return 0; + goto cleanup; } /* @@ -1510,8 +1503,6 @@ end_range: partial->p + 1, (__le32 *)partial->bh->b_data+addr_per_block, (chain+n-1) - partial); - BUFFER_TRACE(partial->bh, "call brelse"); - brelse(partial->bh); partial--; } if (partial2 > chain2 && depth2 <= depth) { @@ -1519,11 +1510,21 @@ end_range: (__le32 *)partial2->bh->b_data, partial2->p, (chain2+n2-1) - partial2); - BUFFER_TRACE(partial2->bh, "call brelse"); - brelse(partial2->bh); partial2--; } } + +cleanup: + while (p && p > chain) { + BUFFER_TRACE(p->bh, "call brelse"); + brelse(p->bh); + p--; + } + while (p2 && p2 > chain2) { + BUFFER_TRACE(p2->bh, "call brelse"); + brelse(p2->bh); + p2--; + } return 0; do_indirects: @@ -1531,7 +1532,7 @@ do_indirects: switch (offsets[0]) { default: if (++n >= n2) - return 0; + break; nr = i_data[EXT4_IND_BLOCK]; if (nr) { ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 1); @@ -1539,7 +1540,7 @@ do_indirects: } case EXT4_IND_BLOCK: if (++n >= n2) - return 0; + break; nr = i_data[EXT4_DIND_BLOCK]; if (nr) { ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 2); @@ -1547,7 +1548,7 @@ do_indirects: } case EXT4_DIND_BLOCK: if (++n >= n2) - return 0; + break; nr = i_data[EXT4_TIND_BLOCK]; if (nr) { ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 3); @@ -1556,5 +1557,5 @@ do_indirects: case EXT4_TIND_BLOCK: ; } - return 0; + goto cleanup; } diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index 99ed5b9f2b86..6fbcdf23df90 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -715,8 +715,11 @@ int ext4_try_to_write_inline_data(struct address_space *mapping, if (!PageUptodate(page)) { ret = ext4_read_inline_page(inode, page); - if (ret < 0) + if (ret < 0) { + unlock_page(page); + put_page(page); goto out_up_read; + } } ret = 1; @@ -1872,12 +1875,12 @@ int ext4_inline_data_fiemap(struct inode *inode, physical += (char *)ext4_raw_inode(&iloc) - iloc.bh->b_data; physical += offsetof(struct ext4_inode, i_block); - if (physical) - error = fiemap_fill_next_extent(fieinfo, start, physical, - inline_len, flags); brelse(iloc.bh); out: up_read(&EXT4_I(inode)->xattr_sem); + if (physical) + error = fiemap_fill_next_extent(fieinfo, start, physical, + inline_len, flags); return (error < 0 ? error : 0); } diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 3a2594665b44..49857101bb57 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -603,6 +603,14 @@ resizefs_out: if ((flags & BLKDEV_DISCARD_SECURE) && !blk_queue_secdiscard(q)) return -EOPNOTSUPP; + + /* + * We haven't replayed the journal, so we cannot use our + * block-bitmap-guided storage zapping commands. + */ + if (test_opt(sb, NOLOAD) && ext4_has_feature_journal(sb)) + return -EROFS; + if (copy_from_user(&range, (struct fstrim_range __user *)arg, sizeof(range))) return -EFAULT; diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index bad13f049fb0..5223eb25bf59 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -907,11 +907,18 @@ static int add_new_gdb_meta_bg(struct super_block *sb, memcpy(n_group_desc, o_group_desc, EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *)); n_group_desc[gdb_num] = gdb_bh; + + BUFFER_TRACE(gdb_bh, "get_write_access"); + err = ext4_journal_get_write_access(handle, gdb_bh); + if (err) { + kvfree(n_group_desc); + brelse(gdb_bh); + return err; + } + EXT4_SB(sb)->s_group_desc = n_group_desc; EXT4_SB(sb)->s_gdb_count++; kvfree(o_group_desc); - BUFFER_TRACE(gdb_bh, "get_write_access"); - err = ext4_journal_get_write_access(handle, gdb_bh); return err; } @@ -1600,7 +1607,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) } if (reserved_gdb || gdb_off == 0) { - if (ext4_has_feature_resize_inode(sb) || + if (!ext4_has_feature_resize_inode(sb) || !le16_to_cpu(es->s_reserved_gdt_blocks)) { ext4_warning(sb, "No reserved GDT blocks, can't resize"); @@ -1928,7 +1935,8 @@ retry: le16_to_cpu(es->s_reserved_gdt_blocks); n_group = n_desc_blocks * EXT4_DESC_PER_BLOCK(sb); n_blocks_count = (ext4_fsblk_t)n_group * - EXT4_BLOCKS_PER_GROUP(sb); + EXT4_BLOCKS_PER_GROUP(sb) + + le32_to_cpu(es->s_first_data_block); n_group--; /* set to last group number */ } @@ -2039,6 +2047,10 @@ out: free_flex_gd(flex_gd); if (resize_inode != NULL) iput(resize_inode); - ext4_msg(sb, KERN_INFO, "resized filesystem to %llu", n_blocks_count); + if (err) + ext4_warning(sb, "error (%d) occurred during " + "file system resize", err); + ext4_msg(sb, KERN_INFO, "resized filesystem to %llu", + ext4_blocks_count(es)); return err; } diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 2cd426a4161d..0ec3689a8990 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1048,6 +1048,16 @@ static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid, ext4_nfs_get_inode); } +static int ext4_nfs_commit_metadata(struct inode *inode) +{ + struct writeback_control wbc = { + .sync_mode = WB_SYNC_ALL + }; + + trace_ext4_nfs_commit_metadata(inode); + return ext4_write_inode(inode, &wbc); +} + /* * Try to release metadata pages (indirect blocks, directories) which are * mapped via the block device. Since these pages could have journal heads @@ -1142,6 +1152,7 @@ static const struct export_operations ext4_export_ops = { .fh_to_dentry = ext4_fh_to_dentry, .fh_to_parent = ext4_fh_to_parent, .get_parent = ext4_get_parent, + .commit_metadata = ext4_nfs_commit_metadata, }; enum { @@ -5187,9 +5198,9 @@ static int ext4_quota_enable(struct super_block *sb, int type, int format_id, qf_inode->i_flags |= S_NOQUOTA; lockdep_set_quota_inode(qf_inode, I_DATA_SEM_QUOTA); err = dquot_enable(qf_inode, type, format_id, flags); - iput(qf_inode); if (err) lockdep_set_quota_inode(qf_inode, I_DATA_SEM_NORMAL); + iput(qf_inode); return err; } diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index f82f916e1f2c..7ebf7b958f9e 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -160,7 +160,7 @@ static void *f2fs_acl_to_disk(struct f2fs_sb_info *sbi, return (void *)f2fs_acl; fail: - kfree(f2fs_acl); + kvfree(f2fs_acl); return ERR_PTR(-EINVAL); } @@ -190,7 +190,7 @@ static struct posix_acl *__f2fs_get_acl(struct inode *inode, int type, acl = NULL; else acl = ERR_PTR(retval); - kfree(value); + kvfree(value); if (!IS_ERR(acl)) set_cached_acl(inode, type, acl); @@ -243,7 +243,7 @@ static int __f2fs_set_acl(struct inode *inode, int type, error = f2fs_setxattr(inode, name_index, "", value, size, ipage, 0); - kfree(value); + kvfree(value); if (!error) set_cached_acl(inode, type, acl); @@ -355,12 +355,14 @@ static int f2fs_acl_create(struct inode *dir, umode_t *mode, return PTR_ERR(p); clone = f2fs_acl_clone(p, GFP_NOFS); - if (!clone) - goto no_mem; + if (!clone) { + ret = -ENOMEM; + goto release_acl; + } ret = f2fs_acl_create_masq(clone, mode); if (ret < 0) - goto no_mem_clone; + goto release_clone; if (ret == 0) posix_acl_release(clone); @@ -374,11 +376,11 @@ static int f2fs_acl_create(struct inode *dir, umode_t *mode, return 0; -no_mem_clone: +release_clone: posix_acl_release(clone); -no_mem: +release_acl: posix_acl_release(p); - return -ENOMEM; + return ret; } int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage, diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 4ed2d2a0bf02..06554ea87b0a 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -44,7 +44,7 @@ repeat: cond_resched(); goto repeat; } - f2fs_wait_on_page_writeback(page, META, true); + f2fs_wait_on_page_writeback(page, META, true, true); if (!PageUptodate(page)) SetPageUptodate(page); return page; @@ -307,8 +307,9 @@ static int f2fs_write_meta_pages(struct address_space *mapping, goto skip_write; /* collect a number of dirty meta pages and write together */ - if (wbc->for_kupdate || - get_pages(sbi, F2FS_DIRTY_META) < nr_pages_to_skip(sbi, META)) + if (wbc->sync_mode != WB_SYNC_ALL && + get_pages(sbi, F2FS_DIRTY_META) < + nr_pages_to_skip(sbi, META)) goto skip_write; /* if locked failed, cp will flush dirty pages instead */ @@ -371,9 +372,8 @@ continue_unlock: goto continue_unlock; } - f2fs_wait_on_page_writeback(page, META, true); + f2fs_wait_on_page_writeback(page, META, true, true); - BUG_ON(PageWriteback(page)); if (!clear_page_dirty_for_io(page)) goto continue_unlock; @@ -407,7 +407,7 @@ static int f2fs_set_meta_page_dirty(struct page *page) if (!PageDirty(page)) { __set_page_dirty_nobuffers(page); inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_META); - SetPagePrivate(page); + f2fs_set_page_private(page, 0); f2fs_trace_pid(page); return 1; } @@ -912,7 +912,7 @@ free_fail_no_cp: f2fs_put_page(cp1, 1); f2fs_put_page(cp2, 1); fail_no_cp: - kfree(sbi->ckpt); + kvfree(sbi->ckpt); return -EINVAL; } @@ -958,7 +958,7 @@ void f2fs_update_dirty_page(struct inode *inode, struct page *page) inode_inc_dirty_pages(inode); spin_unlock(&sbi->inode_lock[type]); - SetPagePrivate(page); + f2fs_set_page_private(page, 0); f2fs_trace_pid(page); } @@ -1261,10 +1261,17 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc) else __clear_ckpt_flags(ckpt, CP_DISABLED_FLAG); + if (is_sbi_flag_set(sbi, SBI_CP_DISABLED_QUICK)) + __set_ckpt_flags(ckpt, CP_DISABLED_QUICK_FLAG); + else + __clear_ckpt_flags(ckpt, CP_DISABLED_QUICK_FLAG); + if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH)) __set_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG); - else - __clear_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG); + /* + * TODO: we count on fsck.f2fs to clear this flag until we figure out + * missing cases which clear it incorrectly. + */ if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_REPAIR)) __set_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG); @@ -1291,11 +1298,11 @@ static void commit_checkpoint(struct f2fs_sb_info *sbi, struct page *page = f2fs_grab_meta_page(sbi, blk_addr); int err; - memcpy(page_address(page), src, PAGE_SIZE); - set_page_dirty(page); + f2fs_wait_on_page_writeback(page, META, true, true); - f2fs_wait_on_page_writeback(page, META, true); - f2fs_bug_on(sbi, PageWriteback(page)); + memcpy(page_address(page), src, PAGE_SIZE); + + set_page_dirty(page); if (unlikely(!clear_page_dirty_for_io(page))) f2fs_bug_on(sbi, 1); @@ -1329,11 +1336,9 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) int err; /* Flush all the NAT/SIT pages */ - while (get_pages(sbi, F2FS_DIRTY_META)) { - f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO); - if (unlikely(f2fs_cp_error(sbi))) - break; - } + f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO); + f2fs_bug_on(sbi, get_pages(sbi, F2FS_DIRTY_META) && + !f2fs_cp_error(sbi)); /* * modify checkpoint @@ -1406,14 +1411,6 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) for (i = 0; i < nm_i->nat_bits_blocks; i++) f2fs_update_meta_page(sbi, nm_i->nat_bits + (i << F2FS_BLKSIZE_BITS), blk + i); - - /* Flush all the NAT BITS pages */ - while (get_pages(sbi, F2FS_DIRTY_META)) { - f2fs_sync_meta_pages(sbi, META, LONG_MAX, - FS_CP_META_IO); - if (unlikely(f2fs_cp_error(sbi))) - break; - } } /* write out checkpoint buffer at block 0 */ @@ -1449,6 +1446,8 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) /* Here, we have one bio having CP pack except cp pack 2 page */ f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO); + f2fs_bug_on(sbi, get_pages(sbi, F2FS_DIRTY_META) && + !f2fs_cp_error(sbi)); /* wait for previous submitted meta pages writeback */ f2fs_wait_on_all_pages_writeback(sbi); @@ -1466,7 +1465,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) * invalidate intermediate page cache borrowed from meta inode * which are used for migration of encrypted inode's blocks. */ - if (f2fs_sb_has_encrypt(sbi->sb)) + if (f2fs_sb_has_encrypt(sbi)) invalidate_mapping_pages(META_MAPPING(sbi), MAIN_BLKADDR(sbi), MAX_BLKADDR(sbi) - 1); diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 8286f2114c37..11f38e2a1a27 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -142,6 +142,8 @@ static bool f2fs_bio_post_read_required(struct bio *bio) static void f2fs_read_end_io(struct bio *bio) { + struct page *first_page = bio->bi_io_vec[0].bv_page; + if (time_to_inject(F2FS_P_SB(bio->bi_io_vec->bv_page), FAULT_READ_IO)) { f2fs_show_injection_info(FAULT_READ_IO); bio->bi_error = -EIO; @@ -155,6 +157,13 @@ static void f2fs_read_end_io(struct bio *bio) return; } + if (first_page != NULL && + __read_io_type(first_page) == F2FS_RD_DATA) { + trace_android_fs_dataread_end(first_page->mapping->host, + page_offset(first_page), + bio->bi_iter.bi_size); + } + __read_end_io(bio); } @@ -297,9 +306,10 @@ static inline void __submit_bio(struct f2fs_sb_info *sbi, for (; start < F2FS_IO_SIZE(sbi); start++) { struct page *page = mempool_alloc(sbi->write_io_dummy, - GFP_NOIO | __GFP_ZERO | __GFP_NOFAIL); + GFP_NOIO | __GFP_NOFAIL); f2fs_bug_on(sbi, !page); + zero_user_segment(page, 0, PAGE_SIZE); SetPagePrivate(page); set_page_private(page, (unsigned long)DUMMY_WRITTEN_PAGE); lock_page(page); @@ -321,6 +331,32 @@ submit_io: submit_bio(bio_op(bio), bio); } +static void __f2fs_submit_read_bio(struct f2fs_sb_info *sbi, + struct bio *bio, enum page_type type) +{ + if (trace_android_fs_dataread_start_enabled() && (type == DATA)) { + struct page *first_page = bio->bi_io_vec[0].bv_page; + + if (first_page != NULL && + __read_io_type(first_page) == F2FS_RD_DATA) { + char *path, pathbuf[MAX_TRACE_PATHBUF_LEN]; + + path = android_fstrace_get_pathname(pathbuf, + MAX_TRACE_PATHBUF_LEN, + first_page->mapping->host); + + trace_android_fs_dataread_start( + first_page->mapping->host, + page_offset(first_page), + bio->bi_iter.bi_size, + current->pid, + path, + current->comm); + } + } + __submit_bio(sbi, bio, type); +} + static void __submit_merged_bio(struct f2fs_bio_info *io) { struct f2fs_io_info *fio = &io->fio; @@ -370,29 +406,6 @@ static bool __has_merged_page(struct f2fs_bio_info *io, struct inode *inode, return false; } -static bool has_merged_page(struct f2fs_sb_info *sbi, struct inode *inode, - struct page *page, nid_t ino, - enum page_type type) -{ - enum page_type btype = PAGE_TYPE_OF_BIO(type); - enum temp_type temp; - struct f2fs_bio_info *io; - bool ret = false; - - for (temp = HOT; temp < NR_TEMP_TYPE; temp++) { - io = sbi->write_io[btype] + temp; - - down_read(&io->io_rwsem); - ret = __has_merged_page(io, inode, page, ino); - up_read(&io->io_rwsem); - - /* TODO: use HOT temp only for meta pages now. */ - if (ret || btype == META) - break; - } - return ret; -} - static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type, enum temp_type temp) { @@ -418,13 +431,19 @@ static void __submit_merged_write_cond(struct f2fs_sb_info *sbi, nid_t ino, enum page_type type, bool force) { enum temp_type temp; - - if (!force && !has_merged_page(sbi, inode, page, ino, type)) - return; + bool ret = true; for (temp = HOT; temp < NR_TEMP_TYPE; temp++) { + if (!force) { + enum page_type btype = PAGE_TYPE_OF_BIO(type); + struct f2fs_bio_info *io = sbi->write_io[btype] + temp; - __f2fs_submit_merged_write(sbi, type, temp); + down_read(&io->io_rwsem); + ret = __has_merged_page(io, inode, page, ino); + up_read(&io->io_rwsem); + } + if (ret) + __f2fs_submit_merged_write(sbi, type, temp); /* TODO: use HOT temp only for meta pages now. */ if (type >= META) @@ -485,7 +504,7 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) inc_page_count(fio->sbi, is_read_io(fio->op) ? __read_io_type(page): WB_DATA_TYPE(fio->page)); - __submit_bio(fio->sbi, bio, fio->type); + __f2fs_submit_read_bio(fio->sbi, bio, fio->type); return 0; } @@ -615,7 +634,7 @@ static int f2fs_submit_page_read(struct inode *inode, struct page *page, } ClearPageError(page); inc_page_count(F2FS_I_SB(inode), F2FS_RD_DATA); - __submit_bio(F2FS_I_SB(inode), bio, DATA); + __f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA); return 0; } @@ -641,7 +660,7 @@ static void __set_data_blkaddr(struct dnode_of_data *dn) */ void f2fs_set_data_blkaddr(struct dnode_of_data *dn) { - f2fs_wait_on_page_writeback(dn->node_page, NODE, true); + f2fs_wait_on_page_writeback(dn->node_page, NODE, true, true); __set_data_blkaddr(dn); if (set_page_dirty(dn->node_page)) dn->node_changed = true; @@ -671,7 +690,7 @@ int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count) trace_f2fs_reserve_new_blocks(dn->inode, dn->nid, dn->ofs_in_node, count); - f2fs_wait_on_page_writeback(dn->node_page, NODE, true); + f2fs_wait_on_page_writeback(dn->node_page, NODE, true, true); for (; count > 0; dn->ofs_in_node++) { block_t blkaddr = datablock_addr(dn->inode, @@ -955,6 +974,9 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from) return err; } + if (direct_io && allow_outplace_dio(inode, iocb, from)) + return 0; + if (is_inode_flag_set(inode, FI_NO_PREALLOC)) return 0; @@ -968,6 +990,7 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from) map.m_next_pgofs = NULL; map.m_next_extent = NULL; map.m_seg_type = NO_CHECK_TYPE; + map.m_may_create = true; if (direct_io) { map.m_seg_type = f2fs_rw_hint_to_seg_type(iocb->ki_hint); @@ -1026,7 +1049,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, unsigned int maxblocks = map->m_len; struct dnode_of_data dn; struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - int mode = create ? ALLOC_NODE : LOOKUP_NODE; + int mode = map->m_may_create ? ALLOC_NODE : LOOKUP_NODE; pgoff_t pgofs, end_offset, end; int err = 0, ofs = 1; unsigned int ofs_in_node, last_ofs_in_node; @@ -1046,6 +1069,10 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, end = pgofs + maxblocks; if (!create && f2fs_lookup_extent_cache(inode, pgofs, &ei)) { + if (test_opt(sbi, LFS) && flag == F2FS_GET_BLOCK_DIO && + map->m_may_create) + goto next_dnode; + map->m_pblk = ei.blk + pgofs - ei.fofs; map->m_len = min((pgoff_t)maxblocks, ei.fofs + ei.len - pgofs); map->m_flags = F2FS_MAP_MAPPED; @@ -1060,7 +1087,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, } next_dnode: - if (create) + if (map->m_may_create) __do_map_lock(sbi, flag, true); /* When reading holes, we need its node page */ @@ -1097,11 +1124,13 @@ next_block: if (is_valid_data_blkaddr(sbi, blkaddr)) { /* use out-place-update for driect IO under LFS mode */ - if (test_opt(sbi, LFS) && create && - flag == F2FS_GET_BLOCK_DIO) { + if (test_opt(sbi, LFS) && flag == F2FS_GET_BLOCK_DIO && + map->m_may_create) { err = __allocate_data_block(&dn, map->m_seg_type); - if (!err) + if (!err) { + blkaddr = dn.data_blkaddr; set_inode_flag(inode, FI_APPEND_WRITE); + } } } else { if (create) { @@ -1207,7 +1236,7 @@ skip: f2fs_put_dnode(&dn); - if (create) { + if (map->m_may_create) { __do_map_lock(sbi, flag, false); f2fs_balance_fs(sbi, dn.node_changed); } @@ -1233,7 +1262,7 @@ sync_out: } f2fs_put_dnode(&dn); unlock_out: - if (create) { + if (map->m_may_create) { __do_map_lock(sbi, flag, false); f2fs_balance_fs(sbi, dn.node_changed); } @@ -1255,6 +1284,7 @@ bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len) map.m_next_pgofs = NULL; map.m_next_extent = NULL; map.m_seg_type = NO_CHECK_TYPE; + map.m_may_create = false; last_lblk = F2FS_BLK_ALIGN(pos + len); while (map.m_lblk < last_lblk) { @@ -1269,7 +1299,7 @@ bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len) static int __get_data_block(struct inode *inode, sector_t iblock, struct buffer_head *bh, int create, int flag, - pgoff_t *next_pgofs, int seg_type) + pgoff_t *next_pgofs, int seg_type, bool may_write) { struct f2fs_map_blocks map; int err; @@ -1279,6 +1309,7 @@ static int __get_data_block(struct inode *inode, sector_t iblock, map.m_next_pgofs = next_pgofs; map.m_next_extent = NULL; map.m_seg_type = seg_type; + map.m_may_create = may_write; err = f2fs_map_blocks(inode, &map, create, flag); if (!err) { @@ -1295,16 +1326,25 @@ static int get_data_block(struct inode *inode, sector_t iblock, { return __get_data_block(inode, iblock, bh_result, create, flag, next_pgofs, - NO_CHECK_TYPE); + NO_CHECK_TYPE, create); +} + +static int get_data_block_dio_write(struct inode *inode, sector_t iblock, + struct buffer_head *bh_result, int create) +{ + return __get_data_block(inode, iblock, bh_result, create, + F2FS_GET_BLOCK_DIO, NULL, + f2fs_rw_hint_to_seg_type(inode->i_write_hint), + true); } static int get_data_block_dio(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { return __get_data_block(inode, iblock, bh_result, create, - F2FS_GET_BLOCK_DIO, NULL, - f2fs_rw_hint_to_seg_type( - inode->i_write_hint)); + F2FS_GET_BLOCK_DIO, NULL, + f2fs_rw_hint_to_seg_type(inode->i_write_hint), + false); } static int get_data_block_bmap(struct inode *inode, sector_t iblock, @@ -1316,7 +1356,7 @@ static int get_data_block_bmap(struct inode *inode, sector_t iblock, return __get_data_block(inode, iblock, bh_result, create, F2FS_GET_BLOCK_BMAP, NULL, - NO_CHECK_TYPE); + NO_CHECK_TYPE, create); } static inline sector_t logical_to_blk(struct inode *inode, loff_t offset) @@ -1523,6 +1563,7 @@ static int f2fs_mpage_readpages(struct address_space *mapping, map.m_next_pgofs = NULL; map.m_next_extent = NULL; map.m_seg_type = NO_CHECK_TYPE; + map.m_may_create = false; for (; nr_pages; nr_pages--) { if (pages) { @@ -1542,6 +1583,9 @@ static int f2fs_mpage_readpages(struct address_space *mapping, if (last_block > last_block_in_file) last_block = last_block_in_file; + /* just zeroing out page which is beyond EOF */ + if (block_in_file >= last_block) + goto zero_out; /* * Map blocks using the previous result first. */ @@ -1554,16 +1598,11 @@ static int f2fs_mpage_readpages(struct address_space *mapping, * Then do more f2fs_map_blocks() calls until we are * done with this page. */ - map.m_flags = 0; + map.m_lblk = block_in_file; + map.m_len = last_block - block_in_file; - if (block_in_file < last_block) { - map.m_lblk = block_in_file; - map.m_len = last_block - block_in_file; - - if (f2fs_map_blocks(inode, &map, 0, - F2FS_GET_BLOCK_DEFAULT)) - goto set_error_page; - } + if (f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_DEFAULT)) + goto set_error_page; got_it: if ((map.m_flags & F2FS_MAP_MAPPED)) { block_nr = map.m_pblk + block_in_file - map.m_lblk; @@ -1578,6 +1617,7 @@ got_it: DATA_GENERIC)) goto set_error_page; } else { +zero_out: zero_user_segment(page, 0, PAGE_SIZE); if (!PageUptodate(page)) SetPageUptodate(page); @@ -1592,7 +1632,7 @@ got_it: if (bio && (last_block_in_bio != block_nr - 1 || !__same_bdev(F2FS_I_SB(inode), block_nr, bio))) { submit_and_realloc: - __submit_bio(F2FS_I_SB(inode), bio, DATA); + __f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA); bio = NULL; } if (bio == NULL) { @@ -1624,7 +1664,7 @@ set_error_page: goto next_page; confused: if (bio) { - __submit_bio(F2FS_I_SB(inode), bio, DATA); + __f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA); bio = NULL; } unlock_page(page); @@ -1634,7 +1674,7 @@ next_page: } BUG_ON(pages && !list_empty(pages)); if (bio) - __submit_bio(F2FS_I_SB(inode), bio, DATA); + __f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA); return 0; } @@ -1852,6 +1892,13 @@ got_it: if (fio->need_lock == LOCK_REQ) f2fs_unlock_op(fio->sbi); err = f2fs_inplace_write_data(fio); + if (err) { + if (f2fs_encrypted_file(inode)) + fscrypt_pullback_bio_page(&fio->encrypted_page, + true); + if (PageWriteback(page)) + end_page_writeback(page); + } trace_f2fs_do_write_data_page(fio->page, IPU); set_inode_flag(inode, FI_UPDATE_WRITE); return err; @@ -2139,12 +2186,11 @@ continue_unlock: if (PageWriteback(page)) { if (wbc->sync_mode != WB_SYNC_NONE) f2fs_wait_on_page_writeback(page, - DATA, true); + DATA, true, true); else goto continue_unlock; } - BUG_ON(PageWriteback(page)); if (!clear_page_dirty_for_io(page)) goto continue_unlock; @@ -2303,7 +2349,8 @@ static void f2fs_write_failed(struct address_space *mapping, loff_t to) down_write(&F2FS_I(inode)->i_mmap_sem); truncate_pagecache(inode, i_size); - f2fs_truncate_blocks(inode, i_size, true, true); + if (!IS_NOQUOTA(inode)) + f2fs_truncate_blocks(inode, i_size, true); up_write(&F2FS_I(inode)->i_mmap_sem); up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); @@ -2321,6 +2368,7 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi, bool locked = false; struct extent_info ei = {0,0,0}; int err = 0; + int flag; /* * we already allocated all the blocks, so we don't need to get @@ -2330,9 +2378,15 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi, !is_inode_flag_set(inode, FI_NO_PREALLOC)) return 0; + /* f2fs_lock_op avoids race between write CP and convert_inline_page */ + if (f2fs_has_inline_data(inode) && pos + len > MAX_INLINE_DATA(inode)) + flag = F2FS_GET_BLOCK_DEFAULT; + else + flag = F2FS_GET_BLOCK_PRE_AIO; + if (f2fs_has_inline_data(inode) || (pos & PAGE_MASK) >= i_size_read(inode)) { - __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true); + __do_map_lock(sbi, flag, true); locked = true; } restart: @@ -2370,6 +2424,7 @@ restart: f2fs_put_dnode(&dn); __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true); + WARN_ON(flag != F2FS_GET_BLOCK_PRE_AIO); locked = true; goto restart; } @@ -2383,7 +2438,7 @@ out: f2fs_put_dnode(&dn); unlock_out: if (locked) - __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false); + __do_map_lock(sbi, flag, false); return err; } @@ -2464,7 +2519,7 @@ repeat: } } - f2fs_wait_on_page_writeback(page, DATA, false); + f2fs_wait_on_page_writeback(page, DATA, false, true); if (len == PAGE_SIZE || PageUptodate(page)) return 0; @@ -2556,6 +2611,50 @@ static int check_direct_IO(struct inode *inode, struct iov_iter *iter, return 0; } +static void f2fs_dio_end_io(struct bio *bio) +{ + struct f2fs_private_dio *dio = bio->bi_private; + + dec_page_count(F2FS_I_SB(dio->inode), + dio->write ? F2FS_DIO_WRITE : F2FS_DIO_READ); + + bio->bi_private = dio->orig_private; + bio->bi_end_io = dio->orig_end_io; + + kvfree(dio); + + bio_endio(bio); +} + +static void f2fs_dio_submit_bio(int rw, struct bio *bio, struct inode *inode, + loff_t file_offset) +{ + struct f2fs_private_dio *dio; + bool write = (rw == REQ_OP_WRITE); + + dio = f2fs_kzalloc(F2FS_I_SB(inode), + sizeof(struct f2fs_private_dio), GFP_NOFS); + if (!dio) + goto out; + + dio->inode = inode; + dio->orig_end_io = bio->bi_end_io; + dio->orig_private = bio->bi_private; + dio->write = write; + + bio->bi_end_io = f2fs_dio_end_io; + bio->bi_private = dio; + + inc_page_count(F2FS_I_SB(inode), + write ? F2FS_DIO_WRITE : F2FS_DIO_READ); + + submit_bio(rw, bio); + return; +out: + bio->bi_error = -EIO; + bio_endio(bio); +} + static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset) { @@ -2625,7 +2724,11 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, down_read(&fi->i_gc_rwsem[READ]); } - err = blockdev_direct_IO(iocb, inode, iter, offset, get_data_block_dio); + err = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, + iter, offset, + rw == WRITE ? get_data_block_dio_write : + get_data_block_dio, NULL, f2fs_dio_submit_bio, + DIO_LOCKING | DIO_SKIP_HOLES); if (do_opu) up_read(&fi->i_gc_rwsem[READ]); @@ -2680,12 +2783,10 @@ void f2fs_invalidate_page(struct page *page, unsigned int offset, clear_cold_data(page); - /* This is atomic written page, keep Private */ if (IS_ATOMIC_WRITTEN_PAGE(page)) return f2fs_drop_inmem_page(inode, page); - set_page_private(page, 0); - ClearPagePrivate(page); + f2fs_clear_page_private(page); } int f2fs_release_page(struct page *page, gfp_t wait) @@ -2699,8 +2800,7 @@ int f2fs_release_page(struct page *page, gfp_t wait) return 0; clear_cold_data(page); - set_page_private(page, 0); - ClearPagePrivate(page); + f2fs_clear_page_private(page); return 1; } @@ -2768,12 +2868,8 @@ int f2fs_migrate_page(struct address_space *mapping, return -EAGAIN; } - /* - * A reference is expected if PagePrivate set when move mapping, - * however F2FS breaks this for maintaining dirty page counts when - * truncating pages. So here adjusting the 'extra_count' make it work. - */ - extra_count = (atomic_written ? 1 : 0) - page_has_private(page); + /* one extra reference was held for atomic_write page */ + extra_count = atomic_written ? 1 : 0; rc = migrate_page_move_mapping(mapping, newpage, page, NULL, mode, extra_count); if (rc != MIGRATEPAGE_SUCCESS) { @@ -2794,9 +2890,10 @@ int f2fs_migrate_page(struct address_space *mapping, get_page(newpage); } - if (PagePrivate(page)) - SetPagePrivate(newpage); - set_page_private(newpage, page_private(page)); + if (PagePrivate(page)) { + f2fs_set_page_private(newpage, page_private(page)); + f2fs_clear_page_private(page); + } migrate_page_copy(newpage, page); diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 139b4d5c83d5..d00ba9b711a4 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -53,6 +53,8 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->vw_cnt = atomic_read(&sbi->vw_cnt); si->max_aw_cnt = atomic_read(&sbi->max_aw_cnt); si->max_vw_cnt = atomic_read(&sbi->max_vw_cnt); + si->nr_dio_read = get_pages(sbi, F2FS_DIO_READ); + si->nr_dio_write = get_pages(sbi, F2FS_DIO_WRITE); si->nr_wb_cp_data = get_pages(sbi, F2FS_WB_CP_DATA); si->nr_wb_data = get_pages(sbi, F2FS_WB_DATA); si->nr_rd_data = get_pages(sbi, F2FS_RD_DATA); @@ -62,7 +64,7 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->nr_flushed = atomic_read(&SM_I(sbi)->fcc_info->issued_flush); si->nr_flushing = - atomic_read(&SM_I(sbi)->fcc_info->issing_flush); + atomic_read(&SM_I(sbi)->fcc_info->queued_flush); si->flush_list_empty = llist_empty(&SM_I(sbi)->fcc_info->issue_list); } @@ -70,7 +72,7 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->nr_discarded = atomic_read(&SM_I(sbi)->dcc_info->issued_discard); si->nr_discarding = - atomic_read(&SM_I(sbi)->dcc_info->issing_discard); + atomic_read(&SM_I(sbi)->dcc_info->queued_discard); si->nr_discard_cmd = atomic_read(&SM_I(sbi)->dcc_info->discard_cmd_cnt); si->undiscard_blks = SM_I(sbi)->dcc_info->undiscard_blks; @@ -94,8 +96,10 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->free_secs = free_sections(sbi); si->prefree_count = prefree_segments(sbi); si->dirty_count = dirty_segments(sbi); - si->node_pages = NODE_MAPPING(sbi)->nrpages; - si->meta_pages = META_MAPPING(sbi)->nrpages; + if (sbi->node_inode) + si->node_pages = NODE_MAPPING(sbi)->nrpages; + if (sbi->meta_inode) + si->meta_pages = META_MAPPING(sbi)->nrpages; si->nats = NM_I(sbi)->nat_cnt; si->dirty_nats = NM_I(sbi)->dirty_nat_cnt; si->sits = MAIN_SEGS(sbi); @@ -173,7 +177,6 @@ static void update_sit_info(struct f2fs_sb_info *sbi) static void update_mem_info(struct f2fs_sb_info *sbi) { struct f2fs_stat_info *si = F2FS_STAT(sbi); - unsigned npages; int i; if (si->base_mem) @@ -197,7 +200,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi) si->base_mem += 2 * SIT_VBLOCK_MAP_SIZE * MAIN_SEGS(sbi); si->base_mem += SIT_VBLOCK_MAP_SIZE * MAIN_SEGS(sbi); si->base_mem += SIT_VBLOCK_MAP_SIZE; - if (sbi->segs_per_sec > 1) + if (__is_large_section(sbi)) si->base_mem += MAIN_SECS(sbi) * sizeof(struct sec_entry); si->base_mem += __bitmap_size(sbi, SIT_BITMAP); @@ -256,10 +259,14 @@ get_cache: sizeof(struct extent_node); si->page_mem = 0; - npages = NODE_MAPPING(sbi)->nrpages; - si->page_mem += (unsigned long long)npages << PAGE_SHIFT; - npages = META_MAPPING(sbi)->nrpages; - si->page_mem += (unsigned long long)npages << PAGE_SHIFT; + if (sbi->node_inode) { + unsigned npages = NODE_MAPPING(sbi)->nrpages; + si->page_mem += (unsigned long long)npages << PAGE_SHIFT; + } + if (sbi->meta_inode) { + unsigned npages = META_MAPPING(sbi)->nrpages; + si->page_mem += (unsigned long long)npages << PAGE_SHIFT; + } } static int stat_show(struct seq_file *s, void *v) @@ -374,6 +381,8 @@ static int stat_show(struct seq_file *s, void *v) seq_printf(s, " - Inner Struct Count: tree: %d(%d), node: %d\n", si->ext_tree, si->zombie_tree, si->ext_node); seq_puts(s, "\nBalancing F2FS Async:\n"); + seq_printf(s, " - DIO (R: %4d, W: %4d)\n", + si->nr_dio_read, si->nr_dio_write); seq_printf(s, " - IO_R (Data: %4d, Node: %4d, Meta: %4d\n", si->nr_rd_data, si->nr_rd_node, si->nr_rd_meta); seq_printf(s, " - IO_W (CP: %4d, Data: %4d, Flush: (%4d %4d %4d), " @@ -510,33 +519,19 @@ void f2fs_destroy_stats(struct f2fs_sb_info *sbi) list_del(&si->stat_list); mutex_unlock(&f2fs_stat_mutex); - kfree(si); + kvfree(si); } -int __init f2fs_create_root_stats(void) +void __init f2fs_create_root_stats(void) { - struct dentry *file; - f2fs_debugfs_root = debugfs_create_dir("f2fs", NULL); - if (!f2fs_debugfs_root) - return -ENOMEM; - file = debugfs_create_file("status", S_IRUGO, f2fs_debugfs_root, - NULL, &stat_fops); - if (!file) { - debugfs_remove(f2fs_debugfs_root); - f2fs_debugfs_root = NULL; - return -ENOMEM; - } - - return 0; + debugfs_create_file("status", S_IRUGO, f2fs_debugfs_root, NULL, + &stat_fops); } void f2fs_destroy_root_stats(void) { - if (!f2fs_debugfs_root) - return; - debugfs_remove_recursive(f2fs_debugfs_root); f2fs_debugfs_root = NULL; } diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index eedc07b46a52..988af0b594a7 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -293,7 +293,7 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de, { enum page_type type = f2fs_has_inline_dentry(dir) ? NODE : DATA; lock_page(page); - f2fs_wait_on_page_writeback(page, type, true); + f2fs_wait_on_page_writeback(page, type, true, true); de->ino = cpu_to_le32(inode->i_ino); set_de_type(de, inode->i_mode); set_page_dirty(page); @@ -307,7 +307,7 @@ static void init_dent_inode(const struct qstr *name, struct page *ipage) { struct f2fs_inode *ri; - f2fs_wait_on_page_writeback(ipage, NODE, true); + f2fs_wait_on_page_writeback(ipage, NODE, true, true); /* copy name info. to this inode page */ ri = F2FS_INODE(ipage); @@ -550,7 +550,7 @@ start: ++level; goto start; add_dentry: - f2fs_wait_on_page_writeback(dentry_page, DATA, true); + f2fs_wait_on_page_writeback(dentry_page, DATA, true, true); if (inode) { down_write(&F2FS_I(inode)->i_sem); @@ -705,7 +705,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, return f2fs_delete_inline_entry(dentry, page, dir, inode); lock_page(page); - f2fs_wait_on_page_writeback(page, DATA, true); + f2fs_wait_on_page_writeback(page, DATA, true, true); dentry_blk = page_address(page); bit_pos = dentry - dentry_blk->dentry; @@ -728,7 +728,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, !f2fs_truncate_hole(dir, page->index, page->index + 1)) { f2fs_clear_radix_tree_dirty_tag(page); clear_page_dirty_for_io(page); - ClearPagePrivate(page); + f2fs_clear_page_private(page); ClearPageUptodate(page); clear_cold_data(page); inode_dec_dirty_pages(dir); @@ -800,6 +800,10 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, if (de->name_len == 0) { bit_pos++; ctx->pos = start_pos + bit_pos; + printk_ratelimited( + "%s, invalid namelen(0), ino:%u, run fsck to fix.", + KERN_WARNING, le32_to_cpu(de->ino)); + set_sbi_flag(sbi, SBI_NEED_FSCK); continue; } @@ -808,6 +812,18 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, de_name.name = d->filename[bit_pos]; de_name.len = le16_to_cpu(de->name_len); + /* check memory boundary before moving forward */ + bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); + if (unlikely(bit_pos > d->max || + le16_to_cpu(de->name_len) > F2FS_NAME_LEN)) { + f2fs_msg(sbi->sb, KERN_WARNING, + "%s: corrupted namelen=%d, run fsck to fix.", + __func__, le16_to_cpu(de->name_len)); + set_sbi_flag(sbi, SBI_NEED_FSCK); + err = -EINVAL; + goto out; + } + if (f2fs_encrypted_inode(d->inode)) { int save_len = fstr->len; @@ -830,7 +846,6 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, if (readdir_ra) f2fs_ra_node_page(sbi, le32_to_cpu(de->ino)); - bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); ctx->pos = start_pos + bit_pos; } out: @@ -881,7 +896,7 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx) page_cache_sync_readahead(inode->i_mapping, ra, file, n, min(npages - n, (pgoff_t)MAX_DIR_RA_PAGES)); - dentry_page = f2fs_get_lock_data_page(inode, n, false); + dentry_page = f2fs_find_data_page(inode, n); if (IS_ERR(dentry_page)) { err = PTR_ERR(dentry_page); if (err == -ENOENT) { @@ -899,11 +914,11 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx) err = f2fs_fill_dentries(ctx, &d, n * NR_DENTRY_IN_BLOCK, &fstr); if (err) { - f2fs_put_page(dentry_page, 1); + f2fs_put_page(dentry_page, 0); break; } - f2fs_put_page(dentry_page, 1); + f2fs_put_page(dentry_page, 0); } out_free: fscrypt_fname_free_buffer(&fstr); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index b5233ce704fe..2c10b09d9bcf 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -69,7 +69,7 @@ struct f2fs_fault_info { unsigned int inject_type; }; -extern char *f2fs_fault_name[FAULT_MAX]; +extern const char *f2fs_fault_name[FAULT_MAX]; #define IS_FAULT_SET(fi, type) ((fi)->inject_type & (1 << (type))) #endif @@ -156,12 +156,13 @@ struct f2fs_mount_info { #define F2FS_FEATURE_VERITY 0x0400 /* reserved */ #define F2FS_FEATURE_SB_CHKSUM 0x0800 -#define F2FS_HAS_FEATURE(sb, mask) \ - ((F2FS_SB(sb)->raw_super->feature & cpu_to_le32(mask)) != 0) -#define F2FS_SET_FEATURE(sb, mask) \ - (F2FS_SB(sb)->raw_super->feature |= cpu_to_le32(mask)) -#define F2FS_CLEAR_FEATURE(sb, mask) \ - (F2FS_SB(sb)->raw_super->feature &= ~cpu_to_le32(mask)) +#define __F2FS_HAS_FEATURE(raw_super, mask) \ + ((raw_super->feature & cpu_to_le32(mask)) != 0) +#define F2FS_HAS_FEATURE(sbi, mask) __F2FS_HAS_FEATURE(sbi->raw_super, mask) +#define F2FS_SET_FEATURE(sbi, mask) \ + (sbi->raw_super->feature |= cpu_to_le32(mask)) +#define F2FS_CLEAR_FEATURE(sbi, mask) \ + (sbi->raw_super->feature &= ~cpu_to_le32(mask)) /* bio stuffs */ #define REQ_OP_READ READ @@ -255,6 +256,8 @@ enum { #define DEF_CP_INTERVAL 60 /* 60 secs */ #define DEF_IDLE_INTERVAL 5 /* 5 secs */ #define DEF_DISABLE_INTERVAL 5 /* 5 secs */ +#define DEF_DISABLE_QUICK_INTERVAL 1 /* 1 secs */ +#define DEF_UMOUNT_DISCARD_TIMEOUT 5 /* 5 secs */ struct cp_control { int reason; @@ -318,7 +321,7 @@ struct discard_entry { /* max discard pend list number */ #define MAX_PLIST_NUM 512 #define plist_idx(blk_num) ((blk_num) >= MAX_PLIST_NUM ? \ - (MAX_PLIST_NUM - 1) : (blk_num - 1)) + (MAX_PLIST_NUM - 1) : ((blk_num) - 1)) enum { D_PREP, /* initial */ @@ -349,7 +352,7 @@ struct discard_cmd { struct block_device *bdev; /* bdev */ unsigned short ref; /* reference count */ unsigned char state; /* state */ - unsigned char issuing; /* issuing discard */ + unsigned char queued; /* queued discard */ int error; /* bio error */ spinlock_t lock; /* for state/bio_ref updating */ unsigned short bio_ref; /* bio reference count */ @@ -374,6 +377,7 @@ struct discard_policy { bool sync; /* submit discard with REQ_SYNC flag */ bool ordered; /* issue discard by lba order */ unsigned int granularity; /* discard granularity */ + int timeout; /* discard timeout for put_super */ }; struct discard_cmd_control { @@ -391,7 +395,7 @@ struct discard_cmd_control { unsigned int undiscard_blks; /* # of undiscard blocks */ unsigned int next_pos; /* next discard position */ atomic_t issued_discard; /* # of issued discard */ - atomic_t issing_discard; /* # of issing discard */ + atomic_t queued_discard; /* # of queued discard */ atomic_t discard_cmd_cnt; /* # of cached cmd count */ struct rb_root root; /* root of discard rb-tree */ bool rbtree_check; /* config for consistence check */ @@ -481,6 +485,7 @@ static inline bool __has_cursum_space(struct f2fs_journal *journal, #define F2FS_GOING_DOWN_METASYNC 0x1 /* going down with metadata */ #define F2FS_GOING_DOWN_NOSYNC 0x2 /* going down */ #define F2FS_GOING_DOWN_METAFLUSH 0x3 /* going down with meta flush */ +#define F2FS_GOING_DOWN_NEED_FSCK 0x4 /* going down to trigger fsck */ #if defined(__KERNEL__) && defined(CONFIG_COMPAT) /* @@ -516,7 +521,6 @@ struct f2fs_flush_device { /* for inline stuff */ #define DEF_INLINE_RESERVED_SIZE 1 -#define DEF_MIN_INLINE_SIZE 1 static inline int get_extra_isize(struct inode *inode); static inline int get_inline_xattr_addrs(struct inode *inode); #define MAX_INLINE_DATA(inode) (sizeof(__le32) * \ @@ -619,16 +623,8 @@ struct extent_info { }; struct extent_node { - struct rb_node rb_node; - union { - struct { - unsigned int fofs; - unsigned int len; - u32 blk; - }; - struct extent_info ei; /* extent info */ - - }; + struct rb_node rb_node; /* rb node located in rb-tree */ + struct extent_info ei; /* extent info */ struct list_head list; /* node in global extent list of sbi */ struct extent_tree *et; /* extent tree pointer */ }; @@ -663,6 +659,7 @@ struct f2fs_map_blocks { pgoff_t *m_next_pgofs; /* point next possible non-hole pgofs */ pgoff_t *m_next_extent; /* point to next possible extent */ int m_seg_type; + bool m_may_create; /* indicate it is from write path */ }; /* for flag in get_data_block */ @@ -951,7 +948,7 @@ struct flush_cmd_control { struct task_struct *f2fs_issue_flush; /* flush thread */ wait_queue_head_t flush_wait_queue; /* waiting queue for wake-up */ atomic_t issued_flush; /* # of issued flushes */ - atomic_t issing_flush; /* # of issing flushes */ + atomic_t queued_flush; /* # of queued flushes */ struct llist_head issue_list; /* list for command issue */ struct llist_node *dispatch_list; /* list for command dispatch */ }; @@ -1018,6 +1015,8 @@ enum count_type { F2FS_RD_DATA, F2FS_RD_NODE, F2FS_RD_META, + F2FS_DIO_WRITE, + F2FS_DIO_READ, NR_COUNT_TYPE, }; @@ -1164,6 +1163,7 @@ enum { SBI_IS_SHUTDOWN, /* shutdown by ioctl */ SBI_IS_RECOVERED, /* recovered orphan/data */ SBI_CP_DISABLED, /* CP was disabled last mount */ + SBI_CP_DISABLED_QUICK, /* CP was disabled quickly */ SBI_QUOTA_NEED_FLUSH, /* need to flush quota info in CP */ SBI_QUOTA_SKIP_FLUSH, /* skip flushing quota in current CP */ SBI_QUOTA_NEED_REPAIR, /* quota file may be corrupted */ @@ -1175,6 +1175,7 @@ enum { DISCARD_TIME, GC_TIME, DISABLE_TIME, + UMOUNT_DISCARD_TIMEOUT, MAX_TIME, }; @@ -1232,8 +1233,6 @@ struct f2fs_sb_info { /* for bio operations */ struct f2fs_bio_info *write_io[NR_PAGE_TYPE]; /* for write bios */ - struct mutex wio_mutex[NR_PAGE_TYPE - 1][NR_TEMP_TYPE]; - /* bio ordering for NODE/DATA */ /* keep migration IO order for LFS mode */ struct rw_semaphore io_order_lock; mempool_t *write_io_dummy; /* Dummy pages */ @@ -1305,8 +1304,6 @@ struct f2fs_sb_info { unsigned int nquota_files; /* # of quota sysfile */ - u32 s_next_generation; /* for NFS support */ - /* # of pages, see count_type */ atomic_t nr_pages[NR_COUNT_TYPE]; /* # of allocated blocks */ @@ -1325,6 +1322,7 @@ struct f2fs_sb_info { struct f2fs_gc_kthread *gc_thread; /* GC thread */ unsigned int cur_victim_sec; /* current victim section num */ unsigned int gc_mode; /* current GC state */ + unsigned int next_victim_seg[2]; /* next segment in victim section */ /* for skip statistic */ unsigned long long skipped_atomic_files[2]; /* FG_GC and BG_GC */ unsigned long long skipped_gc_rwsem; /* FG_GC only */ @@ -1334,6 +1332,8 @@ struct f2fs_sb_info { /* maximum # of trials to find a victim segment for SSR and GC */ unsigned int max_victim_search; + /* migration granularity of garbage collection, unit: segment */ + unsigned int migration_granularity; /* * for stat information. @@ -1392,6 +1392,13 @@ struct f2fs_sb_info { __u32 s_chksum_seed; }; +struct f2fs_private_dio { + struct inode *inode; + void *orig_private; + bio_end_io_t *orig_end_io; + bool write; +}; + #ifdef CONFIG_F2FS_FAULT_INJECTION #define f2fs_show_injection_info(type) \ printk_ratelimited("%sF2FS-fs : inject %s in %s of %pF\n", \ @@ -1670,12 +1677,16 @@ static inline void disable_nat_bits(struct f2fs_sb_info *sbi, bool lock) { unsigned long flags; - set_sbi_flag(sbi, SBI_NEED_FSCK); + /* + * In order to re-enable nat_bits we need to call fsck.f2fs by + * set_sbi_flag(sbi, SBI_NEED_FSCK). But it may give huge cost, + * so let's rely on regular fsck or unclean shutdown. + */ if (lock) spin_lock_irqsave(&sbi->cp_lock, flags); __clear_ckpt_flags(F2FS_CKPT(sbi), CP_NAT_BITS_FLAG); - kfree(NM_I(sbi)->nat_bits); + kvfree(NM_I(sbi)->nat_bits); NM_I(sbi)->nat_bits = NULL; if (lock) spin_unlock_irqrestore(&sbi->cp_lock, flags); @@ -1852,13 +1863,12 @@ static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type) { atomic_inc(&sbi->nr_pages[count_type]); - if (count_type == F2FS_DIRTY_DATA || count_type == F2FS_INMEM_PAGES || - count_type == F2FS_WB_CP_DATA || count_type == F2FS_WB_DATA || - count_type == F2FS_RD_DATA || count_type == F2FS_RD_NODE || - count_type == F2FS_RD_META) - return; - - set_sbi_flag(sbi, SBI_IS_DIRTY); + if (count_type == F2FS_DIRTY_DENTS || + count_type == F2FS_DIRTY_NODES || + count_type == F2FS_DIRTY_META || + count_type == F2FS_DIRTY_QDATA || + count_type == F2FS_DIRTY_IMETA) + set_sbi_flag(sbi, SBI_IS_DIRTY); } static inline void inode_inc_dirty_pages(struct inode *inode) @@ -2208,8 +2218,19 @@ static inline bool is_idle(struct f2fs_sb_info *sbi, int type) { if (get_pages(sbi, F2FS_RD_DATA) || get_pages(sbi, F2FS_RD_NODE) || get_pages(sbi, F2FS_RD_META) || get_pages(sbi, F2FS_WB_DATA) || - get_pages(sbi, F2FS_WB_CP_DATA)) + get_pages(sbi, F2FS_WB_CP_DATA) || + get_pages(sbi, F2FS_DIO_READ) || + get_pages(sbi, F2FS_DIO_WRITE)) return false; + + if (SM_I(sbi) && SM_I(sbi)->dcc_info && + atomic_read(&SM_I(sbi)->dcc_info->queued_discard)) + return false; + + if (SM_I(sbi) && SM_I(sbi)->fcc_info && + atomic_read(&SM_I(sbi)->fcc_info->queued_flush)) + return false; + return f2fs_time_over(sbi, type); } @@ -2350,11 +2371,12 @@ static inline void f2fs_change_bit(unsigned int nr, char *addr) #define F2FS_EXTENTS_FL 0x00080000 /* Inode uses extents */ #define F2FS_EA_INODE_FL 0x00200000 /* Inode used for large EA */ #define F2FS_EOFBLOCKS_FL 0x00400000 /* Blocks allocated beyond EOF */ +#define F2FS_NOCOW_FL 0x00800000 /* Do not cow file */ #define F2FS_INLINE_DATA_FL 0x10000000 /* Inode has inline data. */ #define F2FS_PROJINHERIT_FL 0x20000000 /* Create with parents projid */ #define F2FS_RESERVED_FL 0x80000000 /* reserved for ext4 lib */ -#define F2FS_FL_USER_VISIBLE 0x304BDFFF /* User visible flags */ +#define F2FS_FL_USER_VISIBLE 0x30CBDFFF /* User visible flags */ #define F2FS_FL_USER_MODIFIABLE 0x204BC0FF /* User modifiable flags */ /* Flags we can manipulate with through F2FS_IOC_FSSETXATTR */ @@ -2432,6 +2454,7 @@ static inline void __mark_inode_dirty_flag(struct inode *inode, case FI_NEW_INODE: if (set) return; + /* fall through */ case FI_DATA_EXIST: case FI_INLINE_DOTS: case FI_PIN_FILE: @@ -2734,24 +2757,22 @@ static inline bool is_dot_dotdot(const struct qstr *str) static inline bool f2fs_may_extent_tree(struct inode *inode) { - if (!test_opt(F2FS_I_SB(inode), EXTENT_CACHE) || + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + + if (!test_opt(sbi, EXTENT_CACHE) || is_inode_flag_set(inode, FI_NO_EXTENT)) return false; + /* + * for recovered files during mount do not create extents + * if shrinker is not registered. + */ + if (list_empty(&sbi->s_list)) + return false; + return S_ISREG(inode->i_mode); } -static inline void *f2fs_kmalloc(struct f2fs_sb_info *sbi, - size_t size, gfp_t flags) -{ - if (time_to_inject(sbi, FAULT_KMALLOC)) { - f2fs_show_injection_info(FAULT_KMALLOC); - return NULL; - } - - return kmalloc(size, flags); -} - static inline void *kvmalloc(size_t size, gfp_t flags) { void *ret; @@ -2762,6 +2783,23 @@ static inline void *kvmalloc(size_t size, gfp_t flags) return ret; } +static inline void *f2fs_kmalloc(struct f2fs_sb_info *sbi, + size_t size, gfp_t flags) +{ + void *ret; + + if (time_to_inject(sbi, FAULT_KMALLOC)) { + f2fs_show_injection_info(FAULT_KMALLOC); + return NULL; + } + + ret = kmalloc(size, flags); + if (ret) + return ret; + + return kvmalloc(size, flags); +} + static inline void *kvzalloc(size_t size, gfp_t flags) { void *ret; @@ -2815,9 +2853,9 @@ static inline int get_inline_xattr_addrs(struct inode *inode) #define F2FS_OLD_ATTRIBUTE_SIZE (offsetof(struct f2fs_inode, i_addr)) #define F2FS_FITS_IN_INODE(f2fs_inode, extra_isize, field) \ - ((offsetof(typeof(*f2fs_inode), field) + \ + ((offsetof(typeof(*(f2fs_inode)), field) + \ sizeof((f2fs_inode)->field)) \ - <= (F2FS_OLD_ATTRIBUTE_SIZE + extra_isize)) \ + <= (F2FS_OLD_ATTRIBUTE_SIZE + (extra_isize))) \ static inline void f2fs_reset_iostat(struct f2fs_sb_info *sbi) { @@ -2844,8 +2882,10 @@ static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi, spin_unlock(&sbi->iostat_lock); } -#define __is_meta_io(fio) (PAGE_TYPE_OF_BIO(fio->type) == META && \ - (!is_read_io(fio->op) || fio->is_meta)) +#define __is_large_section(sbi) ((sbi)->segs_per_sec > 1) + +#define __is_meta_io(fio) (PAGE_TYPE_OF_BIO((fio)->type) == META && \ + (!is_read_io((fio)->op) || (fio)->is_meta)) bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type); @@ -2877,13 +2917,33 @@ static inline bool is_valid_data_blkaddr(struct f2fs_sb_info *sbi, return true; } +static inline void f2fs_set_page_private(struct page *page, + unsigned long data) +{ + if (PagePrivate(page)) + return; + + get_page(page); + SetPagePrivate(page); + set_page_private(page, data); +} + +static inline void f2fs_clear_page_private(struct page *page) +{ + if (!PagePrivate(page)) + return; + + set_page_private(page, 0); + ClearPagePrivate(page); + f2fs_put_page(page, 0); +} + /* * file.c */ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync); void f2fs_truncate_data_blocks(struct dnode_of_data *dn); -int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock, - bool buf_write); +int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock); int f2fs_truncate(struct inode *inode); int f2fs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat); @@ -3056,7 +3116,7 @@ void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr); bool f2fs_is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr); void f2fs_drop_discard_cmd(struct f2fs_sb_info *sbi); void f2fs_stop_discard_thread(struct f2fs_sb_info *sbi); -bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi); +bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi); void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc); void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi); @@ -3088,7 +3148,7 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, struct f2fs_summary *sum, int type, struct f2fs_io_info *fio, bool add_list); void f2fs_wait_on_page_writeback(struct page *page, - enum page_type type, bool ordered); + enum page_type type, bool ordered, bool locked); void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr); void f2fs_wait_on_block_writeback_range(struct inode *inode, block_t blkaddr, block_t len); @@ -3228,6 +3288,7 @@ struct f2fs_stat_info { int total_count, utilization; int bg_gc, nr_wb_cp_data, nr_wb_data; int nr_rd_data, nr_rd_node, nr_rd_meta; + int nr_dio_read, nr_dio_write; unsigned int io_skip_bggc, other_skip_bggc; int nr_flushing, nr_flushed, flush_list_empty; int nr_discarding, nr_discarded; @@ -3377,7 +3438,7 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) int f2fs_build_stats(struct f2fs_sb_info *sbi); void f2fs_destroy_stats(struct f2fs_sb_info *sbi); -int __init f2fs_create_root_stats(void); +void __init f2fs_create_root_stats(void); void f2fs_destroy_root_stats(void); #else #define stat_inc_cp_count(si) do { } while (0) @@ -3415,7 +3476,7 @@ void f2fs_destroy_root_stats(void); static inline int f2fs_build_stats(struct f2fs_sb_info *sbi) { return 0; } static inline void f2fs_destroy_stats(struct f2fs_sb_info *sbi) { } -static inline int __init f2fs_create_root_stats(void) { return 0; } +static inline void __init f2fs_create_root_stats(void) { } static inline void f2fs_destroy_root_stats(void) { } #endif @@ -3539,9 +3600,9 @@ static inline bool f2fs_post_read_required(struct inode *inode) } #define F2FS_FEATURE_FUNCS(name, flagname) \ -static inline int f2fs_sb_has_##name(struct super_block *sb) \ +static inline int f2fs_sb_has_##name(struct f2fs_sb_info *sbi) \ { \ - return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_##flagname); \ + return F2FS_HAS_FEATURE(sbi, F2FS_FEATURE_##flagname); \ } F2FS_FEATURE_FUNCS(encrypt, ENCRYPT); @@ -3571,7 +3632,7 @@ static inline int get_blkz_type(struct f2fs_sb_info *sbi, static inline bool f2fs_hw_should_discard(struct f2fs_sb_info *sbi) { - return f2fs_sb_has_blkzoned(sbi->sb); + return f2fs_sb_has_blkzoned(sbi); } static inline bool f2fs_hw_support_discard(struct f2fs_sb_info *sbi) @@ -3646,7 +3707,7 @@ static inline bool f2fs_force_buffered_io(struct inode *inode, * for blkzoned device, fallback direct IO to buffered IO, so * all IOs can be serialized by log-structured write. */ - if (f2fs_sb_has_blkzoned(sbi->sb)) + if (f2fs_sb_has_blkzoned(sbi)) return true; if (test_opt(sbi, LFS) && (rw == WRITE) && block_unaligned_IO(inode, iocb, iter)) @@ -3664,12 +3725,10 @@ extern void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate, #define f2fs_build_fault_attr(sbi, rate, type) do { } while (0) #endif -#endif - static inline bool is_journalled_quota(struct f2fs_sb_info *sbi) { #ifdef CONFIG_QUOTA - if (f2fs_sb_has_quota_ino(sbi->sb)) + if (f2fs_sb_has_quota_ino(sbi)) return true; if (F2FS_OPTION(sbi).s_qf_names[USRQUOTA] || F2FS_OPTION(sbi).s_qf_names[GRPQUOTA] || @@ -3678,3 +3737,5 @@ static inline bool is_journalled_quota(struct f2fs_sb_info *sbi) #endif return false; } + +#endif diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 3c61f9619be1..637a9dfbd578 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -85,7 +85,7 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, } /* fill the page */ - f2fs_wait_on_page_writeback(page, DATA, false); + f2fs_wait_on_page_writeback(page, DATA, false, true); /* wait for GCed page writeback via META_MAPPING */ f2fs_wait_on_block_writeback(inode, dn.data_blkaddr); @@ -219,6 +219,9 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end, trace_f2fs_sync_file_enter(inode); + if (S_ISDIR(inode->i_mode)) + goto go_write; + /* if fdatasync is triggered, let's do in-place-update */ if (datasync || get_dirty_pages(inode) <= SM_I(sbi)->min_fsync_blocks) set_inode_flag(inode, FI_NEED_IPU); @@ -578,7 +581,7 @@ static int truncate_partial_data_page(struct inode *inode, u64 from, if (IS_ERR(page)) return PTR_ERR(page) == -ENOENT ? 0 : PTR_ERR(page); truncate_out: - f2fs_wait_on_page_writeback(page, DATA, true); + f2fs_wait_on_page_writeback(page, DATA, true, true); zero_user(page, offset, PAGE_SIZE - offset); /* An encrypted inode should have a key and truncate the last page. */ @@ -589,8 +592,7 @@ truncate_out: return 0; } -int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock, - bool buf_write) +int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct dnode_of_data dn; @@ -598,7 +600,6 @@ int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock, int count = 0, err = 0; struct page *ipage; bool truncate_page = false; - int flag = buf_write ? F2FS_GET_BLOCK_PRE_AIO : F2FS_GET_BLOCK_PRE_DIO; trace_f2fs_truncate_blocks_enter(inode, from); @@ -608,7 +609,7 @@ int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock, goto free_partial; if (lock) - __do_map_lock(sbi, flag, true); + f2fs_lock_op(sbi); ipage = f2fs_get_node_page(sbi, inode->i_ino); if (IS_ERR(ipage)) { @@ -646,7 +647,7 @@ free_next: err = f2fs_truncate_inode_blocks(inode, free_from); out: if (lock) - __do_map_lock(sbi, flag, false); + f2fs_unlock_op(sbi); free_partial: /* lastly zero out the first data page */ if (!err) @@ -681,7 +682,7 @@ int f2fs_truncate(struct inode *inode) return err; } - err = f2fs_truncate_blocks(inode, i_size_read(inode), true, false); + err = f2fs_truncate_blocks(inode, i_size_read(inode), true); if (err) return err; @@ -700,7 +701,7 @@ int f2fs_getattr(struct vfsmount *mnt, unsigned int flags; if (f2fs_has_extra_attr(inode) && - f2fs_sb_has_inode_crtime(inode->i_sb) && + f2fs_sb_has_inode_crtime(F2FS_I_SB(inode)) && F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_crtime)) { stat->result_mask |= STATX_BTIME; stat->btime.tv_sec = fi->i_crtime.tv_sec; @@ -769,7 +770,6 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) { struct inode *inode = d_inode(dentry); int err; - bool size_changed = false; if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) return -EIO; @@ -844,8 +844,6 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) down_write(&F2FS_I(inode)->i_sem); F2FS_I(inode)->last_disk_size = i_size_read(inode); up_write(&F2FS_I(inode)->i_sem); - - size_changed = true; } __setattr_copy(inode, attr); @@ -859,7 +857,7 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) } /* file size may changed here */ - f2fs_mark_inode_dirty_sync(inode, size_changed); + f2fs_mark_inode_dirty_sync(inode, true); /* inode change will produce dirty node pages flushed by checkpoint */ f2fs_balance_fs(F2FS_I_SB(inode), true); @@ -899,7 +897,7 @@ static int fill_zero(struct inode *inode, pgoff_t index, if (IS_ERR(page)) return PTR_ERR(page); - f2fs_wait_on_page_writeback(page, DATA, true); + f2fs_wait_on_page_writeback(page, DATA, true, true); zero_user(page, start, len); set_page_dirty(page); f2fs_put_page(page, 1); @@ -1266,7 +1264,7 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len) new_size = i_size_read(inode) - len; truncate_pagecache(inode, new_size); - ret = f2fs_truncate_blocks(inode, new_size, true, false); + ret = f2fs_truncate_blocks(inode, new_size, true); up_write(&F2FS_I(inode)->i_mmap_sem); if (!ret) f2fs_i_size_write(inode, new_size); @@ -1451,7 +1449,7 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len) f2fs_balance_fs(sbi, true); down_write(&F2FS_I(inode)->i_mmap_sem); - ret = f2fs_truncate_blocks(inode, i_size_read(inode), true, false); + ret = f2fs_truncate_blocks(inode, i_size_read(inode), true); up_write(&F2FS_I(inode)->i_mmap_sem); if (ret) return ret; @@ -1503,7 +1501,8 @@ static int expand_inode_data(struct inode *inode, loff_t offset, { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_map_blocks map = { .m_next_pgofs = NULL, - .m_next_extent = NULL, .m_seg_type = NO_CHECK_TYPE }; + .m_next_extent = NULL, .m_seg_type = NO_CHECK_TYPE, + .m_may_create = true }; pgoff_t pg_end; loff_t new_size = i_size_read(inode); loff_t off_end; @@ -1654,6 +1653,8 @@ static int f2fs_ioc_getflags(struct file *filp, unsigned long arg) flags |= F2FS_ENCRYPT_FL; if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode)) flags |= F2FS_INLINE_DATA_FL; + if (is_inode_flag_set(inode, FI_PIN_FILE)) + flags |= F2FS_NOCOW_FL; flags &= F2FS_FL_USER_VISIBLE; @@ -1746,10 +1747,12 @@ static int f2fs_ioc_start_atomic_write(struct file *filp) down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); - if (!get_dirty_pages(inode)) - goto skip_flush; - - f2fs_msg(F2FS_I_SB(inode)->sb, KERN_WARNING, + /* + * Should wait end_io to count F2FS_WB_CP_DATA correctly by + * f2fs_is_atomic_file. + */ + if (get_dirty_pages(inode)) + f2fs_msg(F2FS_I_SB(inode)->sb, KERN_WARNING, "Unexpected flush for atomic writes: ino=%lu, npages=%u", inode->i_ino, get_dirty_pages(inode)); ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); @@ -1757,7 +1760,7 @@ static int f2fs_ioc_start_atomic_write(struct file *filp) up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); goto out; } -skip_flush: + set_inode_flag(inode, FI_ATOMIC_FILE); clear_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST); up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); @@ -1962,6 +1965,13 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) f2fs_stop_checkpoint(sbi, false); set_sbi_flag(sbi, SBI_IS_SHUTDOWN); break; + case F2FS_GOING_DOWN_NEED_FSCK: + set_sbi_flag(sbi, SBI_NEED_FSCK); + set_sbi_flag(sbi, SBI_CP_DISABLED_QUICK); + set_sbi_flag(sbi, SBI_IS_DIRTY); + /* do checkpoint only */ + ret = f2fs_sync_fs(sb, 1); + goto out; default: ret = -EINVAL; goto out; @@ -1977,6 +1987,9 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) out: if (in != F2FS_GOING_DOWN_FULLSYNC) mnt_drop_write_file(filp); + + trace_f2fs_shutdown(sbi, in, ret); + return ret; } @@ -2030,7 +2043,7 @@ static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); - if (!f2fs_sb_has_encrypt(inode->i_sb)) + if (!f2fs_sb_has_encrypt(F2FS_I_SB(inode))) return -EOPNOTSUPP; f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); @@ -2040,7 +2053,7 @@ static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg) static int f2fs_ioc_get_encryption_policy(struct file *filp, unsigned long arg) { - if (!f2fs_sb_has_encrypt(file_inode(filp)->i_sb)) + if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp)))) return -EOPNOTSUPP; return fscrypt_ioctl_get_policy(filp, (void __user *)arg); } @@ -2051,7 +2064,7 @@ static int f2fs_ioc_get_encryption_pwsalt(struct file *filp, unsigned long arg) struct f2fs_sb_info *sbi = F2FS_I_SB(inode); int err; - if (!f2fs_sb_has_encrypt(inode->i_sb)) + if (!f2fs_sb_has_encrypt(sbi)) return -EOPNOTSUPP; err = mnt_want_write_file(filp); @@ -2155,7 +2168,7 @@ do_more: } ret = f2fs_gc(sbi, range.sync, true, GET_SEGNO(sbi, range.start)); - range.start += sbi->blocks_per_seg; + range.start += BLKS_PER_SEC(sbi); if (range.start <= end) goto do_more; out: @@ -2197,7 +2210,8 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, { struct inode *inode = file_inode(filp); struct f2fs_map_blocks map = { .m_next_extent = NULL, - .m_seg_type = NO_CHECK_TYPE }; + .m_seg_type = NO_CHECK_TYPE , + .m_may_create = false }; struct extent_info ei = {0, 0, 0}; pgoff_t pg_start, pg_end, next_pgofs; unsigned int blk_per_seg = sbi->blocks_per_seg; @@ -2560,7 +2574,7 @@ static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg) return -EFAULT; if (sbi->s_ndevs <= 1 || sbi->s_ndevs - 1 <= range.dev_num || - sbi->segs_per_sec != 1) { + __is_large_section(sbi)) { f2fs_msg(sbi->sb, KERN_WARNING, "Can't flush %u in %d for segs_per_sec %u != 1\n", range.dev_num, sbi->s_ndevs, @@ -2639,8 +2653,8 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) __u32 pin; int ret = 0; - if (!inode_owner_or_capable(inode)) - return -EACCES; + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; if (get_user(pin, (__u32 __user *)arg)) return -EFAULT; @@ -2711,6 +2725,7 @@ int f2fs_precache_extents(struct inode *inode) map.m_next_pgofs = NULL; map.m_next_extent = &m_next_extent; map.m_seg_type = NO_CHECK_TYPE; + map.m_may_create = false; end = F2FS_I_SB(inode)->max_file_blocks; while (map.m_lblk < end) { diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 843430b55a73..3624d1336bd1 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -142,7 +142,7 @@ int f2fs_start_gc_thread(struct f2fs_sb_info *sbi) "f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev)); if (IS_ERR(gc_th->f2fs_gc_task)) { err = PTR_ERR(gc_th->f2fs_gc_task); - kfree(gc_th); + kvfree(gc_th); sbi->gc_thread = NULL; } out: @@ -155,7 +155,7 @@ void f2fs_stop_gc_thread(struct f2fs_sb_info *sbi) if (!gc_th) return; kthread_stop(gc_th->f2fs_gc_task); - kfree(gc_th); + kvfree(gc_th); sbi->gc_thread = NULL; } @@ -323,8 +323,7 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, p.min_cost = get_max_cost(sbi, &p); if (*result != NULL_SEGNO) { - if (IS_DATASEG(get_seg_entry(sbi, *result)->type) && - get_valid_blocks(sbi, *result, false) && + if (get_valid_blocks(sbi, *result, false) && !sec_usage_check(sbi, GET_SEC_FROM_SEG(sbi, *result))) p.min_segno = *result; goto out; @@ -333,6 +332,22 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, if (p.max_search == 0) goto out; + if (__is_large_section(sbi) && p.alloc_mode == LFS) { + if (sbi->next_victim_seg[BG_GC] != NULL_SEGNO) { + p.min_segno = sbi->next_victim_seg[BG_GC]; + *result = p.min_segno; + sbi->next_victim_seg[BG_GC] = NULL_SEGNO; + goto got_result; + } + if (gc_type == FG_GC && + sbi->next_victim_seg[FG_GC] != NULL_SEGNO) { + p.min_segno = sbi->next_victim_seg[FG_GC]; + *result = p.min_segno; + sbi->next_victim_seg[FG_GC] = NULL_SEGNO; + goto got_result; + } + } + last_victim = sm->last_victim[p.gc_mode]; if (p.alloc_mode == LFS && gc_type == FG_GC) { p.min_segno = check_bg_victims(sbi); @@ -395,6 +410,8 @@ next: } if (p.min_segno != NULL_SEGNO) { got_it: + *result = (p.min_segno / p.ofs_unit) * p.ofs_unit; +got_result: if (p.alloc_mode == LFS) { secno = GET_SEC_FROM_SEG(sbi, p.min_segno); if (gc_type == FG_GC) @@ -402,13 +419,13 @@ got_it: else set_bit(secno, dirty_i->victim_secmap); } - *result = (p.min_segno / p.ofs_unit) * p.ofs_unit; + } +out: + if (p.min_segno != NULL_SEGNO) trace_f2fs_get_victim(sbi->sb, type, gc_type, &p, sbi->cur_victim_sec, prefree_segments(sbi), free_segments(sbi)); - } -out: mutex_unlock(&dirty_i->seglist_lock); return (p.min_segno == NULL_SEGNO) ? 0 : 1; @@ -658,6 +675,14 @@ got_it: fio.page = page; fio.new_blkaddr = fio.old_blkaddr = dn.data_blkaddr; + /* + * don't cache encrypted data into meta inode until previous dirty + * data were writebacked to avoid racing between GC and flush. + */ + f2fs_wait_on_page_writeback(page, DATA, true, true); + + f2fs_wait_on_block_writeback(inode, dn.data_blkaddr); + fio.encrypted_page = f2fs_pagecache_get_page(META_MAPPING(sbi), dn.data_blkaddr, FGP_LOCK | FGP_CREAT, GFP_NOFS); @@ -743,7 +768,9 @@ static int move_data_block(struct inode *inode, block_t bidx, * don't cache encrypted data into meta inode until previous dirty * data were writebacked to avoid racing between GC and flush. */ - f2fs_wait_on_page_writeback(page, DATA, true); + f2fs_wait_on_page_writeback(page, DATA, true, true); + + f2fs_wait_on_block_writeback(inode, dn.data_blkaddr); err = f2fs_get_node_info(fio.sbi, dn.nid, &ni); if (err) @@ -802,8 +829,8 @@ static int move_data_block(struct inode *inode, block_t bidx, } write_page: + f2fs_wait_on_page_writeback(fio.encrypted_page, DATA, true, true); set_page_dirty(fio.encrypted_page); - f2fs_wait_on_page_writeback(fio.encrypted_page, DATA, true); if (clear_page_dirty_for_io(fio.encrypted_page)) dec_page_count(fio.sbi, F2FS_DIRTY_META); @@ -811,7 +838,7 @@ write_page: ClearPageError(page); /* allocate block address */ - f2fs_wait_on_page_writeback(dn.node_page, NODE, true); + f2fs_wait_on_page_writeback(dn.node_page, NODE, true, true); fio.op = REQ_OP_WRITE; fio.op_flags = REQ_SYNC | REQ_NOIDLE; @@ -897,8 +924,9 @@ static int move_data_page(struct inode *inode, block_t bidx, int gc_type, bool is_dirty = PageDirty(page); retry: + f2fs_wait_on_page_writeback(page, DATA, true, true); + set_page_dirty(page); - f2fs_wait_on_page_writeback(page, DATA, true); if (clear_page_dirty_for_io(page)) { inode_dec_dirty_pages(inode); f2fs_remove_dirty_inode(inode); @@ -1093,15 +1121,18 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, struct blk_plug plug; unsigned int segno = start_segno; unsigned int end_segno = start_segno + sbi->segs_per_sec; - int seg_freed = 0; + int seg_freed = 0, migrated = 0; unsigned char type = IS_DATASEG(get_seg_entry(sbi, segno)->type) ? SUM_TYPE_DATA : SUM_TYPE_NODE; int submitted = 0; + if (__is_large_section(sbi)) + end_segno = rounddown(end_segno, sbi->segs_per_sec); + /* readahead multi ssa blocks those have contiguous address */ - if (sbi->segs_per_sec > 1) + if (__is_large_section(sbi)) f2fs_ra_meta_pages(sbi, GET_SUM_BLOCK(sbi, segno), - sbi->segs_per_sec, META_SSA, true); + end_segno - segno, META_SSA, true); /* reference all summary page */ while (segno < end_segno) { @@ -1130,10 +1161,13 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, GET_SUM_BLOCK(sbi, segno)); f2fs_put_page(sum_page, 0); - if (get_valid_blocks(sbi, segno, false) == 0 || - !PageUptodate(sum_page) || - unlikely(f2fs_cp_error(sbi))) - goto next; + if (get_valid_blocks(sbi, segno, false) == 0) + goto freed; + if (__is_large_section(sbi) && + migrated >= sbi->migration_granularity) + goto skip; + if (!PageUptodate(sum_page) || unlikely(f2fs_cp_error(sbi))) + goto skip; sum = page_address(sum_page); if (type != GET_SUM_TYPE((&sum->footer))) { @@ -1141,7 +1175,7 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, "type [%d, %d] in SSA and SIT", segno, type, GET_SUM_TYPE((&sum->footer))); set_sbi_flag(sbi, SBI_NEED_FSCK); - goto next; + goto skip; } /* @@ -1160,10 +1194,15 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, stat_inc_seg_count(sbi, type, gc_type); +freed: if (gc_type == FG_GC && get_valid_blocks(sbi, segno, false) == 0) seg_freed++; -next: + migrated++; + + if (__is_large_section(sbi) && segno + 1 < end_segno) + sbi->next_victim_seg[gc_type] = segno + 1; +skip: f2fs_put_page(sum_page, 0); } @@ -1307,7 +1346,7 @@ void f2fs_build_gc_manager(struct f2fs_sb_info *sbi) sbi->gc_pin_file_threshold = DEF_GC_FAILED_PINNED_FILES; /* give warm/cold data area from slower device */ - if (sbi->s_ndevs && sbi->segs_per_sec == 1) + if (sbi->s_ndevs && !__is_large_section(sbi)) SIT_I(sbi)->last_victim[ALLOC_NEXT] = GET_SEGNO(sbi, FDEV(0).end_blk) + 1; } diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index fdf4abcf2dab..dcc9ba43d627 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -73,7 +73,7 @@ void f2fs_truncate_inline_inode(struct inode *inode, addr = inline_data_addr(inode, ipage); - f2fs_wait_on_page_writeback(ipage, NODE, true); + f2fs_wait_on_page_writeback(ipage, NODE, true, true); memset(addr + from, 0, MAX_INLINE_DATA(inode) - from); set_page_dirty(ipage); @@ -179,7 +179,7 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) fio.old_blkaddr = dn->data_blkaddr; set_inode_flag(dn->inode, FI_HOT_DATA); f2fs_outplace_write_data(dn, &fio); - f2fs_wait_on_page_writeback(page, DATA, true); + f2fs_wait_on_page_writeback(page, DATA, true, true); if (dirty) { inode_dec_dirty_pages(dn->inode); f2fs_remove_dirty_inode(dn->inode); @@ -254,7 +254,7 @@ int f2fs_write_inline_data(struct inode *inode, struct page *page) f2fs_bug_on(F2FS_I_SB(inode), page->index); - f2fs_wait_on_page_writeback(dn.inode_page, NODE, true); + f2fs_wait_on_page_writeback(dn.inode_page, NODE, true, true); src_addr = kmap_atomic(page); dst_addr = inline_data_addr(inode, dn.inode_page); memcpy(dst_addr, src_addr, MAX_INLINE_DATA(inode)); @@ -295,7 +295,7 @@ process_inline: ipage = f2fs_get_node_page(sbi, inode->i_ino); f2fs_bug_on(sbi, IS_ERR(ipage)); - f2fs_wait_on_page_writeback(ipage, NODE, true); + f2fs_wait_on_page_writeback(ipage, NODE, true, true); src_addr = inline_data_addr(inode, npage); dst_addr = inline_data_addr(inode, ipage); @@ -316,7 +316,7 @@ process_inline: clear_inode_flag(inode, FI_INLINE_DATA); f2fs_put_page(ipage, 1); } else if (ri && (ri->i_inline & F2FS_INLINE_DATA)) { - if (f2fs_truncate_blocks(inode, 0, false, false)) + if (f2fs_truncate_blocks(inode, 0, false)) return false; goto process_inline; } @@ -409,7 +409,7 @@ static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage, goto out; } - f2fs_wait_on_page_writeback(page, DATA, true); + f2fs_wait_on_page_writeback(page, DATA, true, true); dentry_blk = page_address(page); @@ -488,7 +488,7 @@ static int f2fs_add_inline_entries(struct inode *dir, void *inline_dentry) return 0; punch_dentry_pages: truncate_inode_pages(&dir->i_data, 0); - f2fs_truncate_blocks(dir, 0, false, false); + f2fs_truncate_blocks(dir, 0, false); f2fs_remove_dirty_inode(dir); return err; } @@ -519,18 +519,18 @@ static int f2fs_move_rehashed_dirents(struct inode *dir, struct page *ipage, stat_dec_inline_dir(dir); clear_inode_flag(dir, FI_INLINE_DENTRY); - kfree(backup_dentry); + kvfree(backup_dentry); return 0; recover: lock_page(ipage); - f2fs_wait_on_page_writeback(ipage, NODE, true); + f2fs_wait_on_page_writeback(ipage, NODE, true, true); memcpy(inline_dentry, backup_dentry, MAX_INLINE_DATA(dir)); f2fs_i_depth_write(dir, 0); f2fs_i_size_write(dir, MAX_INLINE_DATA(dir)); set_page_dirty(ipage); f2fs_put_page(ipage, 1); - kfree(backup_dentry); + kvfree(backup_dentry); return err; } @@ -583,7 +583,7 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name, } } - f2fs_wait_on_page_writeback(ipage, NODE, true); + f2fs_wait_on_page_writeback(ipage, NODE, true, true); name_hash = f2fs_dentry_hash(new_name, NULL); f2fs_update_dentry(ino, mode, &d, new_name, name_hash, bit_pos); @@ -615,7 +615,7 @@ void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, struct page *page, int i; lock_page(page); - f2fs_wait_on_page_writeback(page, NODE, true); + f2fs_wait_on_page_writeback(page, NODE, true, true); inline_dentry = inline_data_addr(dir, page); make_dentry_ptr_inline(dir, &d, inline_dentry); @@ -677,6 +677,12 @@ int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx, if (IS_ERR(ipage)) return PTR_ERR(ipage); + /* + * f2fs_readdir was protected by inode.i_rwsem, it is safe to access + * ipage without page's lock held. + */ + unlock_page(ipage); + inline_dentry = inline_data_addr(inode, ipage); make_dentry_ptr_inline(inode, &d, inline_dentry); @@ -685,7 +691,7 @@ int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx, if (!err) ctx->pos = d.max; - f2fs_put_page(ipage, 1); + f2fs_put_page(ipage, 0); return err < 0 ? err : 0; } diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 1b1f22faff22..d33f860c4e63 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -14,6 +14,7 @@ #include "f2fs.h" #include "node.h" #include "segment.h" +#include "xattr.h" #include @@ -103,7 +104,7 @@ static void __recover_inline_status(struct inode *inode, struct page *ipage) while (start < end) { if (*start++) { - f2fs_wait_on_page_writeback(ipage, NODE, true); + f2fs_wait_on_page_writeback(ipage, NODE, true, true); set_inode_flag(inode, FI_DATA_EXIST); set_raw_inline(inode, F2FS_INODE(ipage)); @@ -118,7 +119,7 @@ static bool f2fs_enable_inode_chksum(struct f2fs_sb_info *sbi, struct page *page { struct f2fs_inode *ri = &F2FS_NODE(page)->i; - if (!f2fs_sb_has_inode_chksum(sbi->sb)) + if (!f2fs_sb_has_inode_chksum(sbi)) return false; if (!IS_INODE(page) || !(ri->i_inline & F2FS_EXTRA_ATTR)) @@ -218,7 +219,7 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) return false; } - if (f2fs_sb_has_flexible_inline_xattr(sbi->sb) + if (f2fs_sb_has_flexible_inline_xattr(sbi) && !f2fs_has_extra_attr(inode)) { set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_msg(sbi->sb, KERN_WARNING, @@ -228,7 +229,7 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) } if (f2fs_has_extra_attr(inode) && - !f2fs_sb_has_extra_attr(sbi->sb)) { + !f2fs_sb_has_extra_attr(sbi)) { set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_msg(sbi->sb, KERN_WARNING, "%s: inode (ino=%lx) is with extra_attr, " @@ -248,6 +249,20 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) return false; } + if (f2fs_has_extra_attr(inode) && + f2fs_sb_has_flexible_inline_xattr(sbi) && + f2fs_has_inline_xattr(inode) && + (!fi->i_inline_xattr_size || + fi->i_inline_xattr_size > MAX_INLINE_XATTR_SIZE)) { + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_msg(sbi->sb, KERN_WARNING, + "%s: inode (ino=%lx) has corrupted " + "i_inline_xattr_size: %d, max: %zu", + __func__, inode->i_ino, fi->i_inline_xattr_size, + MAX_INLINE_XATTR_SIZE); + return false; + } + if (F2FS_I(inode)->extent_tree) { struct extent_info *ei = &F2FS_I(inode)->extent_tree->largest; @@ -340,7 +355,7 @@ static int do_read_inode(struct inode *inode) fi->i_extra_isize = f2fs_has_extra_attr(inode) ? le16_to_cpu(ri->i_extra_isize) : 0; - if (f2fs_sb_has_flexible_inline_xattr(sbi->sb)) { + if (f2fs_sb_has_flexible_inline_xattr(sbi)) { fi->i_inline_xattr_size = le16_to_cpu(ri->i_inline_xattr_size); } else if (f2fs_has_inline_xattr(inode) || f2fs_has_inline_dentry(inode)) { @@ -390,14 +405,14 @@ static int do_read_inode(struct inode *inode) if (fi->i_flags & F2FS_PROJINHERIT_FL) set_inode_flag(inode, FI_PROJ_INHERIT); - if (f2fs_has_extra_attr(inode) && f2fs_sb_has_project_quota(sbi->sb) && + if (f2fs_has_extra_attr(inode) && f2fs_sb_has_project_quota(sbi) && F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_projid)) i_projid = (projid_t)le32_to_cpu(ri->i_projid); else i_projid = F2FS_DEF_PROJID; fi->i_projid = make_kprojid(&init_user_ns, i_projid); - if (f2fs_has_extra_attr(inode) && f2fs_sb_has_inode_crtime(sbi->sb) && + if (f2fs_has_extra_attr(inode) && f2fs_sb_has_inode_crtime(sbi) && F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_crtime)) { fi->i_crtime.tv_sec = le64_to_cpu(ri->i_crtime); fi->i_crtime.tv_nsec = le32_to_cpu(ri->i_crtime_nsec); @@ -497,7 +512,7 @@ void f2fs_update_inode(struct inode *inode, struct page *node_page) struct f2fs_inode *ri; struct extent_tree *et = F2FS_I(inode)->extent_tree; - f2fs_wait_on_page_writeback(node_page, NODE, true); + f2fs_wait_on_page_writeback(node_page, NODE, true, true); set_page_dirty(node_page); f2fs_inode_synced(inode); @@ -542,11 +557,11 @@ void f2fs_update_inode(struct inode *inode, struct page *node_page) if (f2fs_has_extra_attr(inode)) { ri->i_extra_isize = cpu_to_le16(F2FS_I(inode)->i_extra_isize); - if (f2fs_sb_has_flexible_inline_xattr(F2FS_I_SB(inode)->sb)) + if (f2fs_sb_has_flexible_inline_xattr(F2FS_I_SB(inode))) ri->i_inline_xattr_size = cpu_to_le16(F2FS_I(inode)->i_inline_xattr_size); - if (f2fs_sb_has_project_quota(F2FS_I_SB(inode)->sb) && + if (f2fs_sb_has_project_quota(F2FS_I_SB(inode)) && F2FS_FITS_IN_INODE(ri, F2FS_I(inode)->i_extra_isize, i_projid)) { projid_t i_projid; @@ -556,7 +571,7 @@ void f2fs_update_inode(struct inode *inode, struct page *node_page) ri->i_projid = cpu_to_le32(i_projid); } - if (f2fs_sb_has_inode_crtime(F2FS_I_SB(inode)->sb) && + if (f2fs_sb_has_inode_crtime(F2FS_I_SB(inode)) && F2FS_FITS_IN_INODE(ri, F2FS_I(inode)->i_extra_isize, i_crtime)) { ri->i_crtime = diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index e50f43fa6afc..9364ad8a564e 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -50,7 +51,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) inode->i_blocks = 0; inode->i_mtime = inode->i_atime = inode->i_ctime = F2FS_I(inode)->i_crtime = current_time(inode); - inode->i_generation = sbi->s_next_generation++; + inode->i_generation = prandom_u32(); if (S_ISDIR(inode->i_mode)) F2FS_I(inode)->i_current_depth = 1; @@ -61,7 +62,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) goto fail; } - if (f2fs_sb_has_project_quota(sbi->sb) && + if (f2fs_sb_has_project_quota(sbi) && (F2FS_I(dir)->i_flags & F2FS_PROJINHERIT_FL)) F2FS_I(inode)->i_projid = F2FS_I(dir)->i_projid; else @@ -79,7 +80,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) f2fs_may_encrypt(inode)) f2fs_set_encrypted_inode(inode); - if (f2fs_sb_has_extra_attr(sbi->sb)) { + if (f2fs_sb_has_extra_attr(sbi)) { set_inode_flag(inode, FI_EXTRA_ATTR); F2FS_I(inode)->i_extra_isize = F2FS_TOTAL_EXTRA_ATTR_SIZE; } @@ -92,7 +93,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) if (f2fs_may_inline_dentry(inode)) set_inode_flag(inode, FI_INLINE_DENTRY); - if (f2fs_sb_has_flexible_inline_xattr(sbi->sb)) { + if (f2fs_sb_has_flexible_inline_xattr(sbi)) { f2fs_bug_on(sbi, !f2fs_has_extra_attr(inode)); if (f2fs_has_inline_xattr(inode)) xattr_size = F2FS_OPTION(sbi).inline_xattr_size; @@ -632,7 +633,7 @@ out_f2fs_handle_failed_inode: f2fs_handle_failed_inode(inode); out_free_encrypted_link: if (disk_link.name != (unsigned char *)symname) - kfree(disk_link.name); + kvfree(disk_link.name); return err; } diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 08edc0930831..f527c2b08798 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -826,6 +826,7 @@ static int truncate_node(struct dnode_of_data *dn) struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); struct node_info ni; int err; + pgoff_t index; err = f2fs_get_node_info(sbi, dn->nid, &ni); if (err) @@ -845,10 +846,11 @@ static int truncate_node(struct dnode_of_data *dn) clear_node_page_dirty(dn->node_page); set_sbi_flag(sbi, SBI_IS_DIRTY); + index = dn->node_page->index; f2fs_put_page(dn->node_page, 1); invalidate_mapping_pages(NODE_MAPPING(sbi), - dn->node_page->index, dn->node_page->index); + index, index); dn->node_page = NULL; trace_f2fs_truncate_node(dn->inode, dn->nid, ni.blk_addr); @@ -1104,7 +1106,7 @@ skip_partial: ri->i_nid[offset[0] - NODE_DIR1_BLOCK]) { lock_page(page); BUG_ON(page->mapping != NODE_MAPPING(sbi)); - f2fs_wait_on_page_writeback(page, NODE, true); + f2fs_wait_on_page_writeback(page, NODE, true, true); ri->i_nid[offset[0] - NODE_DIR1_BLOCK] = 0; set_page_dirty(page); unlock_page(page); @@ -1232,7 +1234,7 @@ struct page *f2fs_new_node_page(struct dnode_of_data *dn, unsigned int ofs) new_ni.version = 0; set_node_addr(sbi, &new_ni, NEW_ADDR, false); - f2fs_wait_on_page_writeback(page, NODE, true); + f2fs_wait_on_page_writeback(page, NODE, true, true); fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true); set_cold_node(page, S_ISDIR(dn->inode->i_mode)); if (!PageUptodate(page)) @@ -1598,10 +1600,10 @@ int f2fs_move_node_page(struct page *node_page, int gc_type) .for_reclaim = 0, }; - set_page_dirty(node_page); - f2fs_wait_on_page_writeback(node_page, NODE, true); + f2fs_wait_on_page_writeback(node_page, NODE, true, true); + + set_page_dirty(node_page); - f2fs_bug_on(F2FS_P_SB(node_page), PageWriteback(node_page)); if (!clear_page_dirty_for_io(node_page)) { err = -EAGAIN; goto out_page; @@ -1689,8 +1691,7 @@ continue_unlock: goto continue_unlock; } - f2fs_wait_on_page_writeback(page, NODE, true); - BUG_ON(PageWriteback(page)); + f2fs_wait_on_page_writeback(page, NODE, true, true); set_fsync_mark(page, 0); set_dentry_mark(page, 0); @@ -1741,7 +1742,7 @@ continue_unlock: "Retry to write fsync mark: ino=%u, idx=%lx", ino, last_page->index); lock_page(last_page); - f2fs_wait_on_page_writeback(last_page, NODE, true); + f2fs_wait_on_page_writeback(last_page, NODE, true, true); set_page_dirty(last_page); unlock_page(last_page); goto retry; @@ -1822,9 +1823,8 @@ continue_unlock: goto lock_node; } - f2fs_wait_on_page_writeback(page, NODE, true); + f2fs_wait_on_page_writeback(page, NODE, true, true); - BUG_ON(PageWriteback(page)); if (!clear_page_dirty_for_io(page)) goto continue_unlock; @@ -1891,7 +1891,7 @@ int f2fs_wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, get_page(page); spin_unlock_irqrestore(&sbi->fsync_node_lock, flags); - f2fs_wait_on_page_writeback(page, NODE, true); + f2fs_wait_on_page_writeback(page, NODE, true, false); if (TestClearPageError(page)) ret = -EIO; @@ -1925,7 +1925,9 @@ static int f2fs_write_node_pages(struct address_space *mapping, f2fs_balance_fs_bg(sbi); /* collect a number of dirty node pages and write together */ - if (get_pages(sbi, F2FS_DIRTY_NODES) < nr_pages_to_skip(sbi, NODE)) + if (wbc->sync_mode != WB_SYNC_ALL && + get_pages(sbi, F2FS_DIRTY_NODES) < + nr_pages_to_skip(sbi, NODE)) goto skip_write; if (wbc->sync_mode == WB_SYNC_ALL) @@ -1964,7 +1966,7 @@ static int f2fs_set_node_page_dirty(struct page *page) if (!PageDirty(page)) { __set_page_dirty_nobuffers(page); inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES); - SetPagePrivate(page); + f2fs_set_page_private(page, 0); f2fs_trace_pid(page); return 1; } @@ -2472,7 +2474,7 @@ void f2fs_recover_inline_xattr(struct inode *inode, struct page *page) src_addr = inline_xattr_addr(inode, page); inline_size = inline_xattr_size(inode); - f2fs_wait_on_page_writeback(ipage, NODE, true); + f2fs_wait_on_page_writeback(ipage, NODE, true, true); memcpy(dst_addr, src_addr, inline_size); update_inode: f2fs_update_inode(inode, ipage); @@ -2566,17 +2568,17 @@ retry: if (dst->i_inline & F2FS_EXTRA_ATTR) { dst->i_extra_isize = src->i_extra_isize; - if (f2fs_sb_has_flexible_inline_xattr(sbi->sb) && + if (f2fs_sb_has_flexible_inline_xattr(sbi) && F2FS_FITS_IN_INODE(src, le16_to_cpu(src->i_extra_isize), i_inline_xattr_size)) dst->i_inline_xattr_size = src->i_inline_xattr_size; - if (f2fs_sb_has_project_quota(sbi->sb) && + if (f2fs_sb_has_project_quota(sbi) && F2FS_FITS_IN_INODE(src, le16_to_cpu(src->i_extra_isize), i_projid)) dst->i_projid = src->i_projid; - if (f2fs_sb_has_inode_crtime(sbi->sb) && + if (f2fs_sb_has_inode_crtime(sbi) && F2FS_FITS_IN_INODE(src, le16_to_cpu(src->i_extra_isize), i_crtime_nsec)) { dst->i_crtime = src->i_crtime; @@ -3118,17 +3120,17 @@ void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi) for (i = 0; i < nm_i->nat_blocks; i++) kvfree(nm_i->free_nid_bitmap[i]); - kfree(nm_i->free_nid_bitmap); + kvfree(nm_i->free_nid_bitmap); } kvfree(nm_i->free_nid_count); - kfree(nm_i->nat_bitmap); - kfree(nm_i->nat_bits); + kvfree(nm_i->nat_bitmap); + kvfree(nm_i->nat_bits); #ifdef CONFIG_F2FS_CHECK_FS - kfree(nm_i->nat_bitmap_mir); + kvfree(nm_i->nat_bitmap_mir); #endif sbi->nm_info = NULL; - kfree(nm_i); + kvfree(nm_i); } int __init f2fs_create_node_manager_caches(void) diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 1c73d879a9bc..e05af5df5648 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -361,7 +361,7 @@ static inline int set_nid(struct page *p, int off, nid_t nid, bool i) { struct f2fs_node *rn = F2FS_NODE(p); - f2fs_wait_on_page_writeback(p, NODE, true); + f2fs_wait_on_page_writeback(p, NODE, true, true); if (i) rn->i.i_nid[off - NODE_DIR1_BLOCK] = cpu_to_le32(nid); diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 336273a81fba..aa3f5633a20c 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -250,7 +250,7 @@ static int recover_inode(struct inode *inode, struct page *page) i_gid_write(inode, le32_to_cpu(raw->i_gid)); if (raw->i_inline & F2FS_EXTRA_ATTR) { - if (f2fs_sb_has_project_quota(F2FS_I_SB(inode)->sb) && + if (f2fs_sb_has_project_quota(F2FS_I_SB(inode)) && F2FS_FITS_IN_INODE(raw, le16_to_cpu(raw->i_extra_isize), i_projid)) { projid_t i_projid; @@ -531,7 +531,7 @@ retry_dn: goto out; } - f2fs_wait_on_page_writeback(dn.node_page, NODE, true); + f2fs_wait_on_page_writeback(dn.node_page, NODE, true, true); err = f2fs_get_node_info(sbi, dn.nid, &ni); if (err) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index fb4be4629add..cbfa80812a66 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -191,8 +191,7 @@ void f2fs_register_inmem_page(struct inode *inode, struct page *page) f2fs_trace_pid(page); - set_page_private(page, (unsigned long)ATOMIC_WRITTEN_PAGE); - SetPagePrivate(page); + f2fs_set_page_private(page, (unsigned long)ATOMIC_WRITTEN_PAGE); new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS); @@ -215,7 +214,8 @@ void f2fs_register_inmem_page(struct inode *inode, struct page *page) } static int __revoke_inmem_pages(struct inode *inode, - struct list_head *head, bool drop, bool recover) + struct list_head *head, bool drop, bool recover, + bool trylock) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct inmem_pages *cur, *tmp; @@ -227,9 +227,18 @@ static int __revoke_inmem_pages(struct inode *inode, if (drop) trace_f2fs_commit_inmem_page(page, INMEM_DROP); - lock_page(page); + if (trylock) { + /* + * to avoid deadlock in between page lock and + * inmem_lock. + */ + if (!trylock_page(page)) + continue; + } else { + lock_page(page); + } - f2fs_wait_on_page_writeback(page, DATA, true); + f2fs_wait_on_page_writeback(page, DATA, true, true); if (recover) { struct dnode_of_data dn; @@ -270,8 +279,7 @@ next: ClearPageUptodate(page); clear_cold_data(page); } - set_page_private(page, 0); - ClearPagePrivate(page); + f2fs_clear_page_private(page); f2fs_put_page(page, 1); list_del(&cur->list); @@ -318,13 +326,19 @@ void f2fs_drop_inmem_pages(struct inode *inode) struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_inode_info *fi = F2FS_I(inode); - mutex_lock(&fi->inmem_lock); - __revoke_inmem_pages(inode, &fi->inmem_pages, true, false); - spin_lock(&sbi->inode_lock[ATOMIC_FILE]); - if (!list_empty(&fi->inmem_ilist)) - list_del_init(&fi->inmem_ilist); - spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); - mutex_unlock(&fi->inmem_lock); + while (!list_empty(&fi->inmem_pages)) { + mutex_lock(&fi->inmem_lock); + __revoke_inmem_pages(inode, &fi->inmem_pages, + true, false, true); + + if (list_empty(&fi->inmem_pages)) { + spin_lock(&sbi->inode_lock[ATOMIC_FILE]); + if (!list_empty(&fi->inmem_ilist)) + list_del_init(&fi->inmem_ilist); + spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); + } + mutex_unlock(&fi->inmem_lock); + } clear_inode_flag(inode, FI_ATOMIC_FILE); fi->i_gc_failures[GC_FAILURE_ATOMIC] = 0; @@ -354,8 +368,7 @@ void f2fs_drop_inmem_page(struct inode *inode, struct page *page) kmem_cache_free(inmem_entry_slab, cur); ClearPageUptodate(page); - set_page_private(page, 0); - ClearPagePrivate(page); + f2fs_clear_page_private(page); f2fs_put_page(page, 0); trace_f2fs_commit_inmem_page(page, INMEM_INVALIDATE); @@ -387,8 +400,9 @@ static int __f2fs_commit_inmem_pages(struct inode *inode) if (page->mapping == inode->i_mapping) { trace_f2fs_commit_inmem_page(page, INMEM); + f2fs_wait_on_page_writeback(page, DATA, true, true); + set_page_dirty(page); - f2fs_wait_on_page_writeback(page, DATA, true); if (clear_page_dirty_for_io(page)) { inode_dec_dirty_pages(inode); f2fs_remove_dirty_inode(inode); @@ -428,12 +442,15 @@ retry: * recovery or rewrite & commit last transaction. For other * error number, revoking was done by filesystem itself. */ - err = __revoke_inmem_pages(inode, &revoke_list, false, true); + err = __revoke_inmem_pages(inode, &revoke_list, + false, true, false); /* drop all uncommitted pages */ - __revoke_inmem_pages(inode, &fi->inmem_pages, true, false); + __revoke_inmem_pages(inode, &fi->inmem_pages, + true, false, false); } else { - __revoke_inmem_pages(inode, &revoke_list, false, false); + __revoke_inmem_pages(inode, &revoke_list, + false, false, false); } return err; @@ -541,9 +558,13 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) static int __submit_flush_wait(struct f2fs_sb_info *sbi, struct block_device *bdev) { - struct bio *bio = f2fs_bio_alloc(sbi, 0, true); + struct bio *bio; int ret; + bio = f2fs_bio_alloc(sbi, 0, false); + if (!bio) + return -ENOMEM; + bio->bi_rw = REQ_OP_WRITE; bio->bi_bdev = bdev; ret = submit_bio_wait(WRITE_FLUSH, bio); @@ -620,14 +641,16 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino) return 0; if (!test_opt(sbi, FLUSH_MERGE)) { + atomic_inc(&fcc->queued_flush); ret = submit_flush_wait(sbi, ino); + atomic_dec(&fcc->queued_flush); atomic_inc(&fcc->issued_flush); return ret; } - if (atomic_inc_return(&fcc->issing_flush) == 1 || sbi->s_ndevs > 1) { + if (atomic_inc_return(&fcc->queued_flush) == 1 || sbi->s_ndevs > 1) { ret = submit_flush_wait(sbi, ino); - atomic_dec(&fcc->issing_flush); + atomic_dec(&fcc->queued_flush); atomic_inc(&fcc->issued_flush); return ret; @@ -646,14 +669,14 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino) if (fcc->f2fs_issue_flush) { wait_for_completion(&cmd.wait); - atomic_dec(&fcc->issing_flush); + atomic_dec(&fcc->queued_flush); } else { struct llist_node *list; list = llist_del_all(&fcc->issue_list); if (!list) { wait_for_completion(&cmd.wait); - atomic_dec(&fcc->issing_flush); + atomic_dec(&fcc->queued_flush); } else { struct flush_cmd *tmp, *next; @@ -662,7 +685,7 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino) llist_for_each_entry_safe(tmp, next, list, llnode) { if (tmp == &cmd) { cmd.ret = ret; - atomic_dec(&fcc->issing_flush); + atomic_dec(&fcc->queued_flush); continue; } tmp->ret = ret; @@ -691,7 +714,7 @@ int f2fs_create_flush_cmd_control(struct f2fs_sb_info *sbi) if (!fcc) return -ENOMEM; atomic_set(&fcc->issued_flush, 0); - atomic_set(&fcc->issing_flush, 0); + atomic_set(&fcc->queued_flush, 0); init_waitqueue_head(&fcc->flush_wait_queue); init_llist_head(&fcc->issue_list); SM_I(sbi)->fcc_info = fcc; @@ -703,7 +726,7 @@ init_thread: "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev)); if (IS_ERR(fcc->f2fs_issue_flush)) { err = PTR_ERR(fcc->f2fs_issue_flush); - kfree(fcc); + kvfree(fcc); SM_I(sbi)->fcc_info = NULL; return err; } @@ -722,7 +745,7 @@ void f2fs_destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free) kthread_stop(flush_thread); } if (free) { - kfree(fcc); + kvfree(fcc); SM_I(sbi)->fcc_info = NULL; } } @@ -865,6 +888,9 @@ int f2fs_disable_cp_again(struct f2fs_sb_info *sbi) if (holes[DATA] > ovp || holes[NODE] > ovp) return -EAGAIN; + if (is_sbi_flag_set(sbi, SBI_CP_DISABLED_QUICK) && + dirty_segments(sbi) > overprovision_segments(sbi)) + return -EAGAIN; return 0; } @@ -907,7 +933,7 @@ static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi, dc->len = len; dc->ref = 0; dc->state = D_PREP; - dc->issuing = 0; + dc->queued = 0; dc->error = 0; init_completion(&dc->wait); list_add_tail(&dc->list, pend_list); @@ -939,7 +965,7 @@ static void __detach_discard_cmd(struct discard_cmd_control *dcc, struct discard_cmd *dc) { if (dc->state == D_DONE) - atomic_sub(dc->issuing, &dcc->issing_discard); + atomic_sub(dc->queued, &dcc->queued_discard); list_del(&dc->list); rb_erase(&dc->rb_node, &dcc->root); @@ -1114,6 +1140,7 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi, dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST; dpolicy->io_aware_gran = MAX_PLIST_NUM; + dpolicy->timeout = 0; if (discard_type == DPOLICY_BG) { dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME; @@ -1136,6 +1163,8 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi, } else if (discard_type == DPOLICY_UMOUNT) { dpolicy->max_requests = UINT_MAX; dpolicy->io_aware = false; + /* we need to issue all to keep CP_TRIMMED_FLAG */ + dpolicy->granularity = 1; } } @@ -1223,12 +1252,12 @@ submit: dc->bio_ref++; spin_unlock_irqrestore(&dc->lock, flags); - atomic_inc(&dcc->issing_discard); - dc->issuing++; + atomic_inc(&dcc->queued_discard); + dc->queued++; list_move_tail(&dc->list, wait_list); /* sanity check on discard range */ - __check_sit_bitmap(sbi, start, start + len); + __check_sit_bitmap(sbi, lstart, lstart + len); bio->bi_private = dc; bio->bi_end_io = f2fs_submit_discard_endio; @@ -1497,7 +1526,14 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi, int i, issued = 0; bool io_interrupted = false; + if (dpolicy->timeout != 0) + f2fs_update_time(sbi, dpolicy->timeout); + for (i = MAX_PLIST_NUM - 1; i >= 0; i--) { + if (dpolicy->timeout != 0 && + f2fs_time_over(sbi, dpolicy->timeout)) + break; + if (i + 1 < dpolicy->granularity) break; @@ -1684,7 +1720,7 @@ void f2fs_stop_discard_thread(struct f2fs_sb_info *sbi) } /* This comes from f2fs_put_super */ -bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi) +bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; struct discard_policy dpolicy; @@ -1692,6 +1728,7 @@ bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi) __init_discard_policy(sbi, &dpolicy, DPOLICY_UMOUNT, dcc->discard_granularity); + dpolicy.timeout = UMOUNT_DISCARD_TIMEOUT; __issue_discard_cmd(sbi, &dpolicy); dropped = __drop_discard_cmd(sbi); @@ -1725,6 +1762,10 @@ static int issue_discard_thread(void *data) if (dcc->discard_wake) dcc->discard_wake = 0; + /* clean up pending candidates before going to sleep */ + if (atomic_read(&dcc->queued_discard)) + __wait_all_discard_cmd(sbi, NULL); + if (try_to_freeze()) continue; if (f2fs_readonly(sbi->sb)) @@ -1810,7 +1851,7 @@ static int __issue_discard_async(struct f2fs_sb_info *sbi, struct block_device *bdev, block_t blkstart, block_t blklen) { #ifdef CONFIG_BLK_DEV_ZONED - if (f2fs_sb_has_blkzoned(sbi->sb) && + if (f2fs_sb_has_blkzoned(sbi) && bdev_zoned_model(bdev) != BLK_ZONED_NONE) return __f2fs_issue_discard_zone(sbi, bdev, blkstart, blklen); #endif @@ -1958,7 +1999,7 @@ void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi, unsigned int start = 0, end = -1; unsigned int secno, start_segno; bool force = (cpc->reason & CP_DISCARD); - bool need_align = test_opt(sbi, LFS) && sbi->segs_per_sec > 1; + bool need_align = test_opt(sbi, LFS) && __is_large_section(sbi); mutex_lock(&dirty_i->seglist_lock); @@ -1990,7 +2031,7 @@ void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi, (end - 1) <= cpc->trim_end) continue; - if (!test_opt(sbi, LFS) || sbi->segs_per_sec == 1) { + if (!test_opt(sbi, LFS) || !__is_large_section(sbi)) { f2fs_issue_discard(sbi, START_BLOCK(sbi, start), (end - start) << sbi->log_blocks_per_seg); continue; @@ -2022,7 +2063,7 @@ find_next: sbi->blocks_per_seg, cur_pos); len = next_pos - cur_pos; - if (f2fs_sb_has_blkzoned(sbi->sb) || + if (f2fs_sb_has_blkzoned(sbi) || (force && len < cpc->trim_minlen)) goto skip; @@ -2070,7 +2111,7 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi) INIT_LIST_HEAD(&dcc->fstrim_list); mutex_init(&dcc->cmd_lock); atomic_set(&dcc->issued_discard, 0); - atomic_set(&dcc->issing_discard, 0); + atomic_set(&dcc->queued_discard, 0); atomic_set(&dcc->discard_cmd_cnt, 0); dcc->nr_discards = 0; dcc->max_discards = MAIN_SEGS(sbi) << sbi->log_blocks_per_seg; @@ -2086,7 +2127,7 @@ init_thread: "f2fs_discard-%u:%u", MAJOR(dev), MINOR(dev)); if (IS_ERR(dcc->f2fs_issue_discard)) { err = PTR_ERR(dcc->f2fs_issue_discard); - kfree(dcc); + kvfree(dcc); SM_I(sbi)->dcc_info = NULL; return err; } @@ -2103,7 +2144,7 @@ static void destroy_discard_cmd_control(struct f2fs_sb_info *sbi) f2fs_stop_discard_thread(sbi); - kfree(dcc); + kvfree(dcc); SM_I(sbi)->dcc_info = NULL; } @@ -2222,7 +2263,7 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) /* update total number of valid blocks to be written in ckpt area */ SIT_I(sbi)->written_valid_blocks += del; - if (sbi->segs_per_sec > 1) + if (__is_large_section(sbi)) get_sec_entry(sbi, segno)->valid_blocks += del; } @@ -2488,7 +2529,7 @@ static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified) static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type) { /* if segs_per_sec is large than 1, we need to keep original policy. */ - if (sbi->segs_per_sec != 1) + if (__is_large_section(sbi)) return CURSEG_I(sbi, type)->segno; if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) @@ -2798,7 +2839,7 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) struct discard_policy dpolicy; unsigned long long trimmed = 0; int err = 0; - bool need_align = test_opt(sbi, LFS) && sbi->segs_per_sec > 1; + bool need_align = test_opt(sbi, LFS) && __is_large_section(sbi); if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize) return -EINVAL; @@ -3234,10 +3275,10 @@ int f2fs_inplace_write_data(struct f2fs_io_info *fio) stat_inc_inplace_blocks(fio->sbi); err = f2fs_submit_page_bio(fio); - if (!err) + if (!err) { update_device_state(fio); - - f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE); + f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE); + } return err; } @@ -3349,16 +3390,18 @@ void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn, } void f2fs_wait_on_page_writeback(struct page *page, - enum page_type type, bool ordered) + enum page_type type, bool ordered, bool locked) { if (PageWriteback(page)) { struct f2fs_sb_info *sbi = F2FS_P_SB(page); f2fs_submit_merged_write_cond(sbi, NULL, page, 0, type); - if (ordered) + if (ordered) { wait_on_page_writeback(page); - else + f2fs_bug_on(sbi, locked && PageWriteback(page)); + } else { wait_for_stable_page(page); + } } } @@ -3375,7 +3418,7 @@ void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr) cpage = find_lock_page(META_MAPPING(sbi), blkaddr); if (cpage) { - f2fs_wait_on_page_writeback(cpage, DATA, true); + f2fs_wait_on_page_writeback(cpage, DATA, true, true); f2fs_put_page(cpage, 1); } } @@ -3957,7 +4000,7 @@ static int build_sit_info(struct f2fs_sb_info *sbi) if (!sit_i->tmp_map) return -ENOMEM; - if (sbi->segs_per_sec > 1) { + if (__is_large_section(sbi)) { sit_i->sec_entries = f2fs_kvzalloc(sbi, array_size(sizeof(struct sec_entry), MAIN_SECS(sbi)), @@ -4112,7 +4155,7 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) se->valid_blocks; } - if (sbi->segs_per_sec > 1) + if (__is_large_section(sbi)) get_sec_entry(sbi, start)->valid_blocks += se->valid_blocks; } @@ -4156,7 +4199,7 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) sbi->discard_blks -= se->valid_blocks; } - if (sbi->segs_per_sec > 1) { + if (__is_large_section(sbi)) { get_sec_entry(sbi, start)->valid_blocks += se->valid_blocks; get_sec_entry(sbi, start)->valid_blocks -= @@ -4391,7 +4434,7 @@ static void destroy_dirty_segmap(struct f2fs_sb_info *sbi) destroy_victim_secmap(sbi); SM_I(sbi)->dirty_info = NULL; - kfree(dirty_i); + kvfree(dirty_i); } static void destroy_curseg(struct f2fs_sb_info *sbi) @@ -4403,10 +4446,10 @@ static void destroy_curseg(struct f2fs_sb_info *sbi) return; SM_I(sbi)->curseg_array = NULL; for (i = 0; i < NR_CURSEG_TYPE; i++) { - kfree(array[i].sum_blk); - kfree(array[i].journal); + kvfree(array[i].sum_blk); + kvfree(array[i].journal); } - kfree(array); + kvfree(array); } static void destroy_free_segmap(struct f2fs_sb_info *sbi) @@ -4417,7 +4460,7 @@ static void destroy_free_segmap(struct f2fs_sb_info *sbi) SM_I(sbi)->free_info = NULL; kvfree(free_i->free_segmap); kvfree(free_i->free_secmap); - kfree(free_i); + kvfree(free_i); } static void destroy_sit_info(struct f2fs_sb_info *sbi) @@ -4430,26 +4473,26 @@ static void destroy_sit_info(struct f2fs_sb_info *sbi) if (sit_i->sentries) { for (start = 0; start < MAIN_SEGS(sbi); start++) { - kfree(sit_i->sentries[start].cur_valid_map); + kvfree(sit_i->sentries[start].cur_valid_map); #ifdef CONFIG_F2FS_CHECK_FS - kfree(sit_i->sentries[start].cur_valid_map_mir); + kvfree(sit_i->sentries[start].cur_valid_map_mir); #endif - kfree(sit_i->sentries[start].ckpt_valid_map); - kfree(sit_i->sentries[start].discard_map); + kvfree(sit_i->sentries[start].ckpt_valid_map); + kvfree(sit_i->sentries[start].discard_map); } } - kfree(sit_i->tmp_map); + kvfree(sit_i->tmp_map); kvfree(sit_i->sentries); kvfree(sit_i->sec_entries); kvfree(sit_i->dirty_sentries_bitmap); SM_I(sbi)->sit_info = NULL; - kfree(sit_i->sit_bitmap); + kvfree(sit_i->sit_bitmap); #ifdef CONFIG_F2FS_CHECK_FS - kfree(sit_i->sit_bitmap_mir); + kvfree(sit_i->sit_bitmap_mir); #endif - kfree(sit_i); + kvfree(sit_i); } void f2fs_destroy_segment_manager(struct f2fs_sb_info *sbi) @@ -4465,7 +4508,7 @@ void f2fs_destroy_segment_manager(struct f2fs_sb_info *sbi) destroy_free_segmap(sbi); destroy_sit_info(sbi); sbi->sm_info = NULL; - kfree(sm_info); + kvfree(sm_info); } int __init f2fs_create_segment_manager_caches(void) diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index ab3465faddf1..5c7ed0442d6e 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -333,7 +333,7 @@ static inline unsigned int get_valid_blocks(struct f2fs_sb_info *sbi, * In order to get # of valid blocks in a section instantly from many * segments, f2fs manages two counting structures separately. */ - if (use_section && sbi->segs_per_sec > 1) + if (use_section && __is_large_section(sbi)) return get_sec_entry(sbi, segno)->valid_blocks; else return get_seg_entry(sbi, segno)->valid_blocks; @@ -865,7 +865,7 @@ static inline void wake_up_discard_thread(struct f2fs_sb_info *sbi, bool force) } } mutex_unlock(&dcc->cmd_lock); - if (!wakeup) + if (!wakeup || !is_idle(sbi, DISCARD_TIME)) return; wake_up: dcc->discard_wake = 1; diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c index 9e13db994fdf..a467aca29cfe 100644 --- a/fs/f2fs/shrinker.c +++ b/fs/f2fs/shrinker.c @@ -135,6 +135,6 @@ void f2fs_leave_shrinker(struct f2fs_sb_info *sbi) f2fs_shrink_extent_tree(sbi, __count_extent_cache(sbi)); spin_lock(&f2fs_list_lock); - list_del(&sbi->s_list); + list_del_init(&sbi->s_list); spin_unlock(&f2fs_list_lock); } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index cabcd988e78f..463b21e3cd78 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -38,7 +38,7 @@ static struct kmem_cache *f2fs_inode_cachep; #ifdef CONFIG_F2FS_FAULT_INJECTION -char *f2fs_fault_name[FAULT_MAX] = { +const char *f2fs_fault_name[FAULT_MAX] = { [FAULT_KMALLOC] = "kmalloc", [FAULT_KVMALLOC] = "kvmalloc", [FAULT_PAGE_ALLOC] = "page alloc", @@ -260,7 +260,7 @@ static int f2fs_set_qf_name(struct super_block *sb, int qtype, "quota options when quota turned on"); return -EINVAL; } - if (f2fs_sb_has_quota_ino(sb)) { + if (f2fs_sb_has_quota_ino(sbi)) { f2fs_msg(sb, KERN_INFO, "QUOTA feature is enabled, so ignore qf_name"); return 0; @@ -270,7 +270,7 @@ static int f2fs_set_qf_name(struct super_block *sb, int qtype, if (!qname) { f2fs_msg(sb, KERN_ERR, "Not enough memory for storing quotafile name"); - return -EINVAL; + return -ENOMEM; } if (F2FS_OPTION(sbi).s_qf_names[qtype]) { if (strcmp(F2FS_OPTION(sbi).s_qf_names[qtype], qname) == 0) @@ -290,7 +290,7 @@ static int f2fs_set_qf_name(struct super_block *sb, int qtype, set_opt(sbi, QUOTA); return 0; errout: - kfree(qname); + kvfree(qname); return ret; } @@ -303,7 +303,7 @@ static int f2fs_clear_qf_name(struct super_block *sb, int qtype) " when quota turned on"); return -EINVAL; } - kfree(F2FS_OPTION(sbi).s_qf_names[qtype]); + kvfree(F2FS_OPTION(sbi).s_qf_names[qtype]); F2FS_OPTION(sbi).s_qf_names[qtype] = NULL; return 0; } @@ -315,7 +315,7 @@ static int f2fs_check_quota_options(struct f2fs_sb_info *sbi) * 'grpquota' mount options are allowed even without quota feature * to support legacy quotas in quota files. */ - if (test_opt(sbi, PRJQUOTA) && !f2fs_sb_has_project_quota(sbi->sb)) { + if (test_opt(sbi, PRJQUOTA) && !f2fs_sb_has_project_quota(sbi)) { f2fs_msg(sbi->sb, KERN_ERR, "Project quota feature not enabled. " "Cannot enable project quota enforcement."); return -1; @@ -349,7 +349,7 @@ static int f2fs_check_quota_options(struct f2fs_sb_info *sbi) } } - if (f2fs_sb_has_quota_ino(sbi->sb) && F2FS_OPTION(sbi).s_jquota_fmt) { + if (f2fs_sb_has_quota_ino(sbi) && F2FS_OPTION(sbi).s_jquota_fmt) { f2fs_msg(sbi->sb, KERN_INFO, "QUOTA feature is enabled, so ignore jquota_fmt"); F2FS_OPTION(sbi).s_jquota_fmt = 0; @@ -400,10 +400,10 @@ static int parse_options(struct super_block *sb, char *options) set_opt(sbi, BG_GC); set_opt(sbi, FORCE_FG_GC); } else { - kfree(name); + kvfree(name); return -EINVAL; } - kfree(name); + kvfree(name); break; case Opt_disable_roll_forward: set_opt(sbi, DISABLE_ROLL_FORWARD); @@ -418,7 +418,7 @@ static int parse_options(struct super_block *sb, char *options) set_opt(sbi, DISCARD); break; case Opt_nodiscard: - if (f2fs_sb_has_blkzoned(sb)) { + if (f2fs_sb_has_blkzoned(sbi)) { f2fs_msg(sb, KERN_WARNING, "discard is required for zoned block devices"); return -EINVAL; @@ -567,11 +567,11 @@ static int parse_options(struct super_block *sb, char *options) return -ENOMEM; if (strlen(name) == 8 && !strncmp(name, "adaptive", 8)) { - if (f2fs_sb_has_blkzoned(sb)) { + if (f2fs_sb_has_blkzoned(sbi)) { f2fs_msg(sb, KERN_WARNING, "adaptive mode is not allowed with " "zoned block device feature"); - kfree(name); + kvfree(name); return -EINVAL; } set_opt_mode(sbi, F2FS_MOUNT_ADAPTIVE); @@ -579,15 +579,15 @@ static int parse_options(struct super_block *sb, char *options) !strncmp(name, "lfs", 3)) { set_opt_mode(sbi, F2FS_MOUNT_LFS); } else { - kfree(name); + kvfree(name); return -EINVAL; } - kfree(name); + kvfree(name); break; case Opt_io_size_bits: if (args->from && match_int(args, &arg)) return -EINVAL; - if (arg > __ilog2_u32(BIO_MAX_PAGES)) { + if (arg <= 0 || arg > __ilog2_u32(BIO_MAX_PAGES)) { f2fs_msg(sb, KERN_WARNING, "Not support %d, larger than %d", 1 << arg, BIO_MAX_PAGES); @@ -715,10 +715,10 @@ static int parse_options(struct super_block *sb, char *options) !strncmp(name, "fs-based", 8)) { F2FS_OPTION(sbi).whint_mode = WHINT_MODE_FS; } else { - kfree(name); + kvfree(name); return -EINVAL; } - kfree(name); + kvfree(name); break; case Opt_alloc: name = match_strdup(&args[0]); @@ -732,10 +732,10 @@ static int parse_options(struct super_block *sb, char *options) !strncmp(name, "reuse", 5)) { F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_REUSE; } else { - kfree(name); + kvfree(name); return -EINVAL; } - kfree(name); + kvfree(name); break; case Opt_fsync: name = match_strdup(&args[0]); @@ -752,14 +752,14 @@ static int parse_options(struct super_block *sb, char *options) F2FS_OPTION(sbi).fsync_mode = FSYNC_MODE_NOBARRIER; } else { - kfree(name); + kvfree(name); return -EINVAL; } - kfree(name); + kvfree(name); break; case Opt_test_dummy_encryption: #ifdef CONFIG_F2FS_FS_ENCRYPTION - if (!f2fs_sb_has_encrypt(sb)) { + if (!f2fs_sb_has_encrypt(sbi)) { f2fs_msg(sb, KERN_ERR, "Encrypt feature is off"); return -EINVAL; } @@ -784,10 +784,10 @@ static int parse_options(struct super_block *sb, char *options) !strncmp(name, "disable", 7)) { set_opt(sbi, DISABLE_CHECKPOINT); } else { - kfree(name); + kvfree(name); return -EINVAL; } - kfree(name); + kvfree(name); break; default: f2fs_msg(sb, KERN_ERR, @@ -800,13 +800,13 @@ static int parse_options(struct super_block *sb, char *options) if (f2fs_check_quota_options(sbi)) return -EINVAL; #else - if (f2fs_sb_has_quota_ino(sbi->sb) && !f2fs_readonly(sbi->sb)) { + if (f2fs_sb_has_quota_ino(sbi) && !f2fs_readonly(sbi->sb)) { f2fs_msg(sbi->sb, KERN_INFO, "Filesystem with quota feature cannot be mounted RDWR " "without CONFIG_QUOTA"); return -EINVAL; } - if (f2fs_sb_has_project_quota(sbi->sb) && !f2fs_readonly(sbi->sb)) { + if (f2fs_sb_has_project_quota(sbi) && !f2fs_readonly(sbi->sb)) { f2fs_msg(sb, KERN_ERR, "Filesystem with project quota feature cannot be " "mounted RDWR without CONFIG_QUOTA"); @@ -822,8 +822,10 @@ static int parse_options(struct super_block *sb, char *options) } if (test_opt(sbi, INLINE_XATTR_SIZE)) { - if (!f2fs_sb_has_extra_attr(sb) || - !f2fs_sb_has_flexible_inline_xattr(sb)) { + int min_size, max_size; + + if (!f2fs_sb_has_extra_attr(sbi) || + !f2fs_sb_has_flexible_inline_xattr(sbi)) { f2fs_msg(sb, KERN_ERR, "extra_attr or flexible_inline_xattr " "feature is off"); @@ -835,14 +837,15 @@ static int parse_options(struct super_block *sb, char *options) "set with inline_xattr option"); return -EINVAL; } - if (!F2FS_OPTION(sbi).inline_xattr_size || - F2FS_OPTION(sbi).inline_xattr_size >= - DEF_ADDRS_PER_INODE - - F2FS_TOTAL_EXTRA_ATTR_SIZE - - DEF_INLINE_RESERVED_SIZE - - DEF_MIN_INLINE_SIZE) { + + min_size = sizeof(struct f2fs_xattr_header) / sizeof(__le32); + max_size = MAX_INLINE_XATTR_SIZE; + + if (F2FS_OPTION(sbi).inline_xattr_size < min_size || + F2FS_OPTION(sbi).inline_xattr_size > max_size) { f2fs_msg(sb, KERN_ERR, - "inline xattr size is out of range"); + "inline xattr size is out of range: %d ~ %d", + min_size, max_size); return -EINVAL; } } @@ -916,6 +919,10 @@ static int f2fs_drop_inode(struct inode *inode) sb_start_intwrite(inode->i_sb); f2fs_i_size_write(inode, 0); + f2fs_submit_merged_write_cond(F2FS_I_SB(inode), + inode, NULL, 0, DATA); + truncate_inode_pages_final(inode->i_mapping); + if (F2FS_HAS_BLOCKS(inode)) f2fs_truncate(inode); @@ -1018,10 +1025,10 @@ static void destroy_device_list(struct f2fs_sb_info *sbi) for (i = 0; i < sbi->s_ndevs; i++) { blkdev_put(FDEV(i).bdev, FMODE_EXCL); #ifdef CONFIG_BLK_DEV_ZONED - kfree(FDEV(i).blkz_type); + kvfree(FDEV(i).blkz_type); #endif } - kfree(sbi->devs); + kvfree(sbi->devs); } static void f2fs_put_super(struct super_block *sb) @@ -1049,7 +1056,7 @@ static void f2fs_put_super(struct super_block *sb) } /* be sure to wait for any on-going discard commands */ - dropped = f2fs_wait_discard_bios(sbi); + dropped = f2fs_issue_discard_timeout(sbi); if ((f2fs_hw_support_discard(sbi) || f2fs_hw_should_discard(sbi)) && !sbi->discard_blks && !dropped) { @@ -1059,9 +1066,6 @@ static void f2fs_put_super(struct super_block *sb) f2fs_write_checkpoint(sbi, &cpc); } - /* f2fs_write_checkpoint can update stat informaion */ - f2fs_destroy_stats(sbi); - /* * normally superblock is clean, so we need to release this. * In addition, EIO will skip do checkpoint, we need this as well. @@ -1079,32 +1083,41 @@ static void f2fs_put_super(struct super_block *sb) f2fs_bug_on(sbi, sbi->fsync_node_num); iput(sbi->node_inode); + sbi->node_inode = NULL; + iput(sbi->meta_inode); + sbi->meta_inode = NULL; + + /* + * iput() can update stat information, if f2fs_write_checkpoint() + * above failed with error. + */ + f2fs_destroy_stats(sbi); /* destroy f2fs internal modules */ f2fs_destroy_node_manager(sbi); f2fs_destroy_segment_manager(sbi); - kfree(sbi->ckpt); + kvfree(sbi->ckpt); f2fs_unregister_sysfs(sbi); sb->s_fs_info = NULL; if (sbi->s_chksum_driver) crypto_free_shash(sbi->s_chksum_driver); - kfree(sbi->raw_super); + kvfree(sbi->raw_super); destroy_device_list(sbi); if (sbi->write_io_dummy) mempool_destroy(sbi->write_io_dummy); #ifdef CONFIG_QUOTA for (i = 0; i < MAXQUOTAS; i++) - kfree(F2FS_OPTION(sbi).s_qf_names[i]); + kvfree(F2FS_OPTION(sbi).s_qf_names[i]); #endif destroy_percpu_info(sbi); for (i = 0; i < NR_PAGE_TYPE; i++) - kfree(sbi->write_io[i]); - kfree(sbi); + kvfree(sbi->write_io[i]); + kvfree(sbi); } int f2fs_sync_fs(struct super_block *sb, int sync) @@ -1434,7 +1447,7 @@ static void default_options(struct f2fs_sb_info *sbi) clear_opt(sbi, DISABLE_CHECKPOINT); set_opt(sbi, FLUSH_MERGE); set_opt(sbi, DISCARD); - if (f2fs_sb_has_blkzoned(sbi->sb)) + if (f2fs_sb_has_blkzoned(sbi)) set_opt_mode(sbi, F2FS_MOUNT_LFS); else set_opt_mode(sbi, F2FS_MOUNT_ADAPTIVE); @@ -1455,31 +1468,41 @@ static int f2fs_enable_quotas(struct super_block *sb); static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi) { + unsigned int s_flags = sbi->sb->s_flags; struct cp_control cpc; - int err; + int err = 0; + int ret; + if (s_flags & MS_RDONLY) { + f2fs_msg(sbi->sb, KERN_ERR, + "checkpoint=disable on readonly fs"); + return -EINVAL; + } sbi->sb->s_flags |= MS_ACTIVE; - mutex_lock(&sbi->gc_mutex); f2fs_update_time(sbi, DISABLE_TIME); while (!f2fs_time_over(sbi, DISABLE_TIME)) { + mutex_lock(&sbi->gc_mutex); err = f2fs_gc(sbi, true, false, NULL_SEGNO); - if (err == -ENODATA) + if (err == -ENODATA) { + err = 0; break; - if (err && err != -EAGAIN) { - mutex_unlock(&sbi->gc_mutex); - return err; } + if (err && err != -EAGAIN) + break; } - mutex_unlock(&sbi->gc_mutex); - err = sync_filesystem(sbi->sb); - if (err) - return err; + ret = sync_filesystem(sbi->sb); + if (ret || err) { + err = ret ? ret: err; + goto restore_flag; + } - if (f2fs_disable_cp_again(sbi)) - return -EAGAIN; + if (f2fs_disable_cp_again(sbi)) { + err = -EAGAIN; + goto restore_flag; + } mutex_lock(&sbi->gc_mutex); cpc.reason = CP_PAUSE; @@ -1488,7 +1511,9 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi) sbi->unusable_block_count = 0; mutex_unlock(&sbi->gc_mutex); - return 0; +restore_flag: + sbi->sb->s_flags = s_flags; /* Restore MS_RDONLY status */ + return err; } static void f2fs_enable_checkpoint(struct f2fs_sb_info *sbi) @@ -1534,7 +1559,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) GFP_KERNEL); if (!org_mount_opt.s_qf_names[i]) { for (j = 0; j < i; j++) - kfree(org_mount_opt.s_qf_names[j]); + kvfree(org_mount_opt.s_qf_names[j]); return -ENOMEM; } } else { @@ -1578,7 +1603,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) sb->s_flags &= ~MS_RDONLY; if (sb_any_quota_suspended(sb)) { dquot_resume(sb, -1); - } else if (f2fs_sb_has_quota_ino(sb)) { + } else if (f2fs_sb_has_quota_ino(sbi)) { err = f2fs_enable_quotas(sb); if (err) goto restore_opts; @@ -1654,7 +1679,7 @@ skip: #ifdef CONFIG_QUOTA /* Release old quota file names */ for (i = 0; i < MAXQUOTAS; i++) - kfree(org_mount_opt.s_qf_names[i]); + kvfree(org_mount_opt.s_qf_names[i]); #endif /* Update the POSIXACL Flag */ sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | @@ -1675,7 +1700,7 @@ restore_opts: #ifdef CONFIG_QUOTA F2FS_OPTION(sbi).s_jquota_fmt = org_mount_opt.s_jquota_fmt; for (i = 0; i < MAXQUOTAS; i++) { - kfree(F2FS_OPTION(sbi).s_qf_names[i]); + kvfree(F2FS_OPTION(sbi).s_qf_names[i]); F2FS_OPTION(sbi).s_qf_names[i] = org_mount_opt.s_qf_names[i]; } #endif @@ -1820,7 +1845,7 @@ int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly) int enabled = 0; int i, err; - if (f2fs_sb_has_quota_ino(sbi->sb) && rdonly) { + if (f2fs_sb_has_quota_ino(sbi) && rdonly) { err = f2fs_enable_quotas(sbi->sb); if (err) { f2fs_msg(sbi->sb, KERN_ERR, @@ -1851,7 +1876,7 @@ static int f2fs_quota_enable(struct super_block *sb, int type, int format_id, unsigned long qf_inum; int err; - BUG_ON(!f2fs_sb_has_quota_ino(sb)); + BUG_ON(!f2fs_sb_has_quota_ino(F2FS_SB(sb))); qf_inum = f2fs_qf_ino(sb, type); if (!qf_inum) @@ -1996,7 +2021,7 @@ static int f2fs_quota_off(struct super_block *sb, int type) goto out_put; err = dquot_quota_off(sb, type); - if (err || f2fs_sb_has_quota_ino(sb)) + if (err || f2fs_sb_has_quota_ino(F2FS_SB(sb))) goto out_put; inode_lock(inode); @@ -2026,6 +2051,12 @@ void f2fs_quota_off_umount(struct super_block *sb) set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR); } } + /* + * In case of checkpoint=disable, we must flush quota blocks. + * This can cause NULL exception for node_inode in end_io, since + * put_super already dropped it. + */ + sync_filesystem(sb); } static void f2fs_truncate_quota_inode_pages(struct super_block *sb) @@ -2179,7 +2210,7 @@ static int f2fs_set_context(struct inode *inode, const void *ctx, size_t len, * if LOST_FOUND feature is enabled. * */ - if (f2fs_sb_has_lost_found(sbi->sb) && + if (f2fs_sb_has_lost_found(sbi) && inode->i_ino == F2FS_ROOT_INO(sbi)) return -EPERM; @@ -2402,7 +2433,7 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, __u32 crc = 0; /* Check checksum_offset and crc in superblock */ - if (le32_to_cpu(raw_super->feature) & F2FS_FEATURE_SB_CHKSUM) { + if (__F2FS_HAS_FEATURE(raw_super, F2FS_FEATURE_SB_CHKSUM)) { crc_offset = le32_to_cpu(raw_super->checksum_offset); if (crc_offset != offsetof(struct f2fs_super_block, crc)) { @@ -2502,10 +2533,10 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, return 1; } - if (segment_count > (le32_to_cpu(raw_super->block_count) >> 9)) { + if (segment_count > (le64_to_cpu(raw_super->block_count) >> 9)) { f2fs_msg(sb, KERN_INFO, - "Wrong segment_count / block_count (%u > %u)", - segment_count, le32_to_cpu(raw_super->block_count)); + "Wrong segment_count / block_count (%u > %llu)", + segment_count, le64_to_cpu(raw_super->block_count)); return 1; } @@ -2680,7 +2711,7 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi) static void init_sb_info(struct f2fs_sb_info *sbi) { struct f2fs_super_block *raw_super = sbi->raw_super; - int i, j; + int i; sbi->log_sectors_per_block = le32_to_cpu(raw_super->log_sectors_per_block); @@ -2698,7 +2729,10 @@ static void init_sb_info(struct f2fs_sb_info *sbi) sbi->node_ino_num = le32_to_cpu(raw_super->node_ino); sbi->meta_ino_num = le32_to_cpu(raw_super->meta_ino); sbi->cur_victim_sec = NULL_SECNO; + sbi->next_victim_seg[BG_GC] = NULL_SEGNO; + sbi->next_victim_seg[FG_GC] = NULL_SEGNO; sbi->max_victim_search = DEF_MAX_VICTIM_SEARCH; + sbi->migration_granularity = sbi->segs_per_sec; sbi->dir_level = DEF_DIR_LEVEL; sbi->interval_time[CP_TIME] = DEF_CP_INTERVAL; @@ -2706,6 +2740,8 @@ static void init_sb_info(struct f2fs_sb_info *sbi) sbi->interval_time[DISCARD_TIME] = DEF_IDLE_INTERVAL; sbi->interval_time[GC_TIME] = DEF_IDLE_INTERVAL; sbi->interval_time[DISABLE_TIME] = DEF_DISABLE_INTERVAL; + sbi->interval_time[UMOUNT_DISCARD_TIMEOUT] = + DEF_UMOUNT_DISCARD_TIMEOUT; clear_sbi_flag(sbi, SBI_NEED_FSCK); for (i = 0; i < NR_COUNT_TYPE; i++) @@ -2716,9 +2752,6 @@ static void init_sb_info(struct f2fs_sb_info *sbi) INIT_LIST_HEAD(&sbi->s_list); mutex_init(&sbi->umount_mutex); - for (i = 0; i < NR_PAGE_TYPE - 1; i++) - for (j = HOT; j < NR_TEMP_TYPE; j++) - mutex_init(&sbi->wio_mutex[i][j]); init_rwsem(&sbi->io_order_lock); spin_lock_init(&sbi->cp_lock); @@ -2755,7 +2788,7 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi) unsigned int n = 0; int err = -EIO; - if (!f2fs_sb_has_blkzoned(sbi->sb)) + if (!f2fs_sb_has_blkzoned(sbi)) return 0; if (sbi->blocks_per_blkz && sbi->blocks_per_blkz != @@ -2806,7 +2839,7 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi) } } - kfree(zones); + kvfree(zones); return err; } @@ -2866,7 +2899,7 @@ static int read_raw_super_block(struct f2fs_sb_info *sbi, /* No valid superblock */ if (!*raw_super) - kfree(super); + kvfree(super); else err = 0; @@ -2886,7 +2919,7 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover) } /* we should update superblock crc here */ - if (!recover && f2fs_sb_has_sb_chksum(sbi->sb)) { + if (!recover && f2fs_sb_has_sb_chksum(sbi)) { crc = f2fs_crc32(sbi, F2FS_RAW_SUPER(sbi), offsetof(struct f2fs_super_block, crc)); F2FS_RAW_SUPER(sbi)->crc = cpu_to_le32(crc); @@ -2978,7 +3011,7 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi) #ifdef CONFIG_BLK_DEV_ZONED if (bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HM && - !f2fs_sb_has_blkzoned(sbi->sb)) { + !f2fs_sb_has_blkzoned(sbi)) { f2fs_msg(sbi->sb, KERN_ERR, "Zoned block device feature not enabled\n"); return -EINVAL; @@ -3032,10 +3065,11 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) struct f2fs_super_block *raw_super; struct inode *root; int err; - bool retry = true, need_fsck = false; + bool skip_recovery = false, need_fsck = false; char *options = NULL; int recovery, i, valid_super_block; struct curseg_info *seg_i; + int retry_cnt = 1; try_onemore: err = -EINVAL; @@ -3074,7 +3108,7 @@ try_onemore: sbi->raw_super = raw_super; /* precompute checksum seed for metadata */ - if (f2fs_sb_has_inode_chksum(sb)) + if (f2fs_sb_has_inode_chksum(sbi)) sbi->s_chksum_seed = f2fs_chksum(sbi, ~0, raw_super->uuid, sizeof(raw_super->uuid)); @@ -3084,7 +3118,7 @@ try_onemore: * devices, but mandatory for host-managed zoned block devices. */ #ifndef CONFIG_BLK_DEV_ZONED - if (f2fs_sb_has_blkzoned(sb)) { + if (f2fs_sb_has_blkzoned(sbi)) { f2fs_msg(sb, KERN_ERR, "Zoned block device support is not enabled\n"); err = -EOPNOTSUPP; @@ -3107,17 +3141,16 @@ try_onemore: sb->s_maxbytes = sbi->max_file_blocks << le32_to_cpu(raw_super->log_blocksize); sb->s_max_links = F2FS_LINK_MAX; - get_random_bytes(&sbi->s_next_generation, sizeof(u32)); #ifdef CONFIG_QUOTA sb->dq_op = &f2fs_quota_operations; - if (f2fs_sb_has_quota_ino(sb)) + if (f2fs_sb_has_quota_ino(sbi)) sb->s_qcop = &dquot_quotactl_sysfile_ops; else sb->s_qcop = &f2fs_quotactl_ops; sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ; - if (f2fs_sb_has_quota_ino(sbi->sb)) { + if (f2fs_sb_has_quota_ino(sbi)) { for (i = 0; i < MAXQUOTAS; i++) { if (f2fs_qf_ino(sbi->sb, i)) sbi->nquota_files++; @@ -3210,6 +3243,10 @@ try_onemore: if (__is_set_ckpt_flags(F2FS_CKPT(sbi), CP_QUOTA_NEED_FSCK_FLAG)) set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); + if (__is_set_ckpt_flags(F2FS_CKPT(sbi), CP_DISABLED_QUICK_FLAG)) { + set_sbi_flag(sbi, SBI_CP_DISABLED_QUICK); + sbi->interval_time[DISABLE_TIME] = DEF_DISABLE_QUICK_INTERVAL; + } /* Initialize device list */ err = f2fs_scan_devices(sbi); @@ -3268,36 +3305,36 @@ try_onemore: f2fs_build_gc_manager(sbi); + err = f2fs_build_stats(sbi); + if (err) + goto free_nm; + /* get an inode for node space */ sbi->node_inode = f2fs_iget(sb, F2FS_NODE_INO(sbi)); if (IS_ERR(sbi->node_inode)) { f2fs_msg(sb, KERN_ERR, "Failed to read node inode"); err = PTR_ERR(sbi->node_inode); - goto free_nm; + goto free_stats; } - err = f2fs_build_stats(sbi); - if (err) - goto free_node_inode; - /* read root inode and dentry */ root = f2fs_iget(sb, F2FS_ROOT_INO(sbi)); if (IS_ERR(root)) { f2fs_msg(sb, KERN_ERR, "Failed to read root inode"); err = PTR_ERR(root); - goto free_stats; + goto free_node_inode; } if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size || !root->i_nlink) { iput(root); err = -EINVAL; - goto free_stats; + goto free_node_inode; } sb->s_root = d_make_root(root); /* allocate root dentry */ if (!sb->s_root) { err = -ENOMEM; - goto free_root_inode; + goto free_node_inode; } err = f2fs_register_sysfs(sbi); @@ -3306,7 +3343,7 @@ try_onemore: #ifdef CONFIG_QUOTA /* Enable quota usage during mount */ - if (f2fs_sb_has_quota_ino(sb) && !f2fs_readonly(sb)) { + if (f2fs_sb_has_quota_ino(sbi) && !f2fs_readonly(sb)) { err = f2fs_enable_quotas(sb); if (err) f2fs_msg(sb, KERN_ERR, @@ -3319,7 +3356,7 @@ try_onemore: goto free_meta; if (unlikely(is_set_ckpt_flags(sbi, CP_DISABLED_FLAG))) - goto skip_recovery; + goto reset_checkpoint; /* recover fsynced data */ if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) { @@ -3336,11 +3373,13 @@ try_onemore: if (need_fsck) set_sbi_flag(sbi, SBI_NEED_FSCK); - if (!retry) - goto skip_recovery; + if (skip_recovery) + goto reset_checkpoint; err = f2fs_recover_fsync_data(sbi, false); if (err < 0) { + if (err != -ENOMEM) + skip_recovery = true; need_fsck = true; f2fs_msg(sb, KERN_ERR, "Cannot recover all fsync data errno=%d", err); @@ -3356,14 +3395,14 @@ try_onemore: goto free_meta; } } -skip_recovery: +reset_checkpoint: /* f2fs_recover_fsync_data() cleared this already */ clear_sbi_flag(sbi, SBI_POR_DOING); if (test_opt(sbi, DISABLE_CHECKPOINT)) { err = f2fs_disable_checkpoint(sbi); if (err) - goto free_meta; + goto sync_free_meta; } else if (is_set_ckpt_flags(sbi, CP_DISABLED_FLAG)) { f2fs_enable_checkpoint(sbi); } @@ -3376,9 +3415,9 @@ skip_recovery: /* After POR, we can run background GC thread.*/ err = f2fs_start_gc_thread(sbi); if (err) - goto free_meta; + goto sync_free_meta; } - kfree(options); + kvfree(options); /* recover broken superblock */ if (recovery) { @@ -3396,12 +3435,18 @@ skip_recovery: cur_cp_version(F2FS_CKPT(sbi))); f2fs_update_time(sbi, CP_TIME); f2fs_update_time(sbi, REQ_TIME); + clear_sbi_flag(sbi, SBI_CP_DISABLED_QUICK); return 0; +sync_free_meta: + /* safe to flush all the data */ + sync_filesystem(sbi->sb); + retry_cnt = 0; + free_meta: #ifdef CONFIG_QUOTA f2fs_truncate_quota_inode_pages(sb); - if (f2fs_sb_has_quota_ino(sb) && !f2fs_readonly(sb)) + if (f2fs_sb_has_quota_ino(sbi) && !f2fs_readonly(sb)) f2fs_quota_off_umount(sbi->sb); #endif /* @@ -3411,49 +3456,53 @@ free_meta: * falls into an infinite loop in f2fs_sync_meta_pages(). */ truncate_inode_pages_final(META_MAPPING(sbi)); + /* evict some inodes being cached by GC */ + evict_inodes(sb); f2fs_unregister_sysfs(sbi); free_root_inode: dput(sb->s_root); sb->s_root = NULL; -free_stats: - f2fs_destroy_stats(sbi); free_node_inode: f2fs_release_ino_entry(sbi, true); truncate_inode_pages_final(NODE_MAPPING(sbi)); iput(sbi->node_inode); + sbi->node_inode = NULL; +free_stats: + f2fs_destroy_stats(sbi); free_nm: f2fs_destroy_node_manager(sbi); free_sm: f2fs_destroy_segment_manager(sbi); free_devices: destroy_device_list(sbi); - kfree(sbi->ckpt); + kvfree(sbi->ckpt); free_meta_inode: make_bad_inode(sbi->meta_inode); iput(sbi->meta_inode); + sbi->meta_inode = NULL; free_io_dummy: mempool_destroy(sbi->write_io_dummy); free_percpu: destroy_percpu_info(sbi); free_bio_info: for (i = 0; i < NR_PAGE_TYPE; i++) - kfree(sbi->write_io[i]); + kvfree(sbi->write_io[i]); free_options: #ifdef CONFIG_QUOTA for (i = 0; i < MAXQUOTAS; i++) - kfree(F2FS_OPTION(sbi).s_qf_names[i]); + kvfree(F2FS_OPTION(sbi).s_qf_names[i]); #endif - kfree(options); + kvfree(options); free_sb_buf: - kfree(raw_super); + kvfree(raw_super); free_sbi: if (sbi->s_chksum_driver) crypto_free_shash(sbi->s_chksum_driver); - kfree(sbi); + kvfree(sbi); /* give only one another chance */ - if (retry) { - retry = false; + if (retry_cnt > 0 && skip_recovery) { + retry_cnt--; shrink_dcache_sb(sb); goto try_onemore; } @@ -3554,9 +3603,7 @@ static int __init init_f2fs_fs(void) err = register_filesystem(&f2fs_fs_type); if (err) goto free_shrinker; - err = f2fs_create_root_stats(); - if (err) - goto free_filesystem; + f2fs_create_root_stats(); err = f2fs_init_post_read_processing(); if (err) goto free_root_stats; @@ -3564,7 +3611,6 @@ static int __init init_f2fs_fs(void) free_root_stats: f2fs_destroy_root_stats(); -free_filesystem: unregister_filesystem(&f2fs_fs_type); free_shrinker: unregister_shrinker(&f2fs_shrinker_info); diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index db89741b219b..d7b47662a0aa 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -90,34 +90,34 @@ static ssize_t features_show(struct f2fs_attr *a, if (!sb->s_bdev->bd_part) return snprintf(buf, PAGE_SIZE, "0\n"); - if (f2fs_sb_has_encrypt(sb)) + if (f2fs_sb_has_encrypt(sbi)) len += snprintf(buf, PAGE_SIZE - len, "%s", "encryption"); - if (f2fs_sb_has_blkzoned(sb)) + if (f2fs_sb_has_blkzoned(sbi)) len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "blkzoned"); - if (f2fs_sb_has_extra_attr(sb)) + if (f2fs_sb_has_extra_attr(sbi)) len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "extra_attr"); - if (f2fs_sb_has_project_quota(sb)) + if (f2fs_sb_has_project_quota(sbi)) len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "projquota"); - if (f2fs_sb_has_inode_chksum(sb)) + if (f2fs_sb_has_inode_chksum(sbi)) len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "inode_checksum"); - if (f2fs_sb_has_flexible_inline_xattr(sb)) + if (f2fs_sb_has_flexible_inline_xattr(sbi)) len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "flexible_inline_xattr"); - if (f2fs_sb_has_quota_ino(sb)) + if (f2fs_sb_has_quota_ino(sbi)) len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "quota_ino"); - if (f2fs_sb_has_inode_crtime(sb)) + if (f2fs_sb_has_inode_crtime(sbi)) len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "inode_crtime"); - if (f2fs_sb_has_lost_found(sb)) + if (f2fs_sb_has_lost_found(sbi)) len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "lost_found"); - if (f2fs_sb_has_sb_chksum(sb)) + if (f2fs_sb_has_sb_chksum(sbi)) len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "sb_checksum"); len += snprintf(buf + len, PAGE_SIZE - len, "\n"); @@ -222,6 +222,8 @@ out: #ifdef CONFIG_F2FS_FAULT_INJECTION if (a->struct_type == FAULT_INFO_TYPE && t >= (1 << FAULT_MAX)) return -EINVAL; + if (a->struct_type == FAULT_INFO_RATE && t >= UINT_MAX) + return -EINVAL; #endif if (a->struct_type == RESERVED_BLOCKS) { spin_lock(&sbi->stat_lock); @@ -246,6 +248,11 @@ out: return count; } + if (!strcmp(a->attr.name, "migration_granularity")) { + if (t == 0 || t > sbi->segs_per_sec) + return -EINVAL; + } + if (!strcmp(a->attr.name, "trim_sections")) return -EINVAL; @@ -273,10 +280,16 @@ out: return count; } - *ui = t; - if (!strcmp(a->attr.name, "iostat_enable") && *ui == 0) - f2fs_reset_iostat(sbi); + if (!strcmp(a->attr.name, "iostat_enable")) { + sbi->iostat_enable = !!t; + if (!sbi->iostat_enable) + f2fs_reset_iostat(sbi); + return count; + } + + *ui = (unsigned int)t; + return count; } @@ -406,12 +419,15 @@ F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh); F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ra_nid_pages, ra_nid_pages); F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, dirty_nats_ratio, dirty_nats_ratio); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, migration_granularity, migration_granularity); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, cp_interval, interval_time[CP_TIME]); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, idle_interval, interval_time[REQ_TIME]); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, discard_idle_interval, interval_time[DISCARD_TIME]); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_idle_interval, interval_time[GC_TIME]); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, + umount_discard_timeout, interval_time[UMOUNT_DISCARD_TIMEOUT]); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, iostat_enable, iostat_enable); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, readdir_ra, readdir_ra); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_pin_file_thresh, gc_pin_file_threshold); @@ -460,6 +476,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(min_hot_blocks), ATTR_LIST(min_ssr_sections), ATTR_LIST(max_victim_search), + ATTR_LIST(migration_granularity), ATTR_LIST(dir_level), ATTR_LIST(ram_thresh), ATTR_LIST(ra_nid_pages), @@ -468,6 +485,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(idle_interval), ATTR_LIST(discard_idle_interval), ATTR_LIST(gc_idle_interval), + ATTR_LIST(umount_discard_timeout), ATTR_LIST(iostat_enable), ATTR_LIST(readdir_ra), ATTR_LIST(gc_pin_file_thresh), diff --git a/fs/f2fs/trace.c b/fs/f2fs/trace.c index ce2a5eb210b6..d0ab533a9ce8 100644 --- a/fs/f2fs/trace.c +++ b/fs/f2fs/trace.c @@ -14,7 +14,7 @@ #include "trace.h" static RADIX_TREE(pids, GFP_ATOMIC); -static struct mutex pids_lock; +static spinlock_t pids_lock; static struct last_io_info last_io; static inline void __print_last_io(void) @@ -58,23 +58,29 @@ void f2fs_trace_pid(struct page *page) set_page_private(page, (unsigned long)pid); +retry: if (radix_tree_preload(GFP_NOFS)) return; - mutex_lock(&pids_lock); + spin_lock(&pids_lock); p = radix_tree_lookup(&pids, pid); if (p == current) goto out; if (p) radix_tree_delete(&pids, pid); - f2fs_radix_tree_insert(&pids, pid, current); + if (radix_tree_insert(&pids, pid, current)) { + spin_unlock(&pids_lock); + radix_tree_preload_end(); + cond_resched(); + goto retry; + } trace_printk("%3x:%3x %4x %-16s\n", MAJOR(inode->i_sb->s_dev), MINOR(inode->i_sb->s_dev), pid, current->comm); out: - mutex_unlock(&pids_lock); + spin_unlock(&pids_lock); radix_tree_preload_end(); } @@ -119,7 +125,7 @@ void f2fs_trace_ios(struct f2fs_io_info *fio, int flush) void f2fs_build_trace_ios(void) { - mutex_init(&pids_lock); + spin_lock_init(&pids_lock); } #define PIDVEC_SIZE 128 @@ -147,7 +153,7 @@ void f2fs_destroy_trace_ios(void) pid_t next_pid = 0; unsigned int found; - mutex_lock(&pids_lock); + spin_lock(&pids_lock); while ((found = gang_lookup_pids(pid, next_pid, PIDVEC_SIZE))) { unsigned idx; @@ -155,5 +161,5 @@ void f2fs_destroy_trace_ios(void) for (idx = 0; idx < found; idx++) radix_tree_delete(&pids, pid[idx]); } - mutex_unlock(&pids_lock); + spin_unlock(&pids_lock); } diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index c7c3c2a63a85..8c6161ea4e23 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -270,11 +270,11 @@ static struct f2fs_xattr_entry *__find_inline_xattr(struct inode *inode, { struct f2fs_xattr_entry *entry; unsigned int inline_size = inline_xattr_size(inode); + void *max_addr = base_addr + inline_size; list_for_each_xattr(entry, base_addr) { - if ((void *)entry + sizeof(__u32) > base_addr + inline_size || - (void *)XATTR_NEXT_ENTRY(entry) + sizeof(__u32) > - base_addr + inline_size) { + if ((void *)entry + sizeof(__u32) > max_addr || + (void *)XATTR_NEXT_ENTRY(entry) > max_addr) { *last_addr = entry; return NULL; } @@ -285,6 +285,13 @@ static struct f2fs_xattr_entry *__find_inline_xattr(struct inode *inode, if (!memcmp(entry->e_name, name, len)) break; } + + /* inline xattr header or entry across max inline xattr size */ + if (IS_XATTR_LAST_ENTRY(entry) && + (void *)entry + sizeof(__u32) > max_addr) { + *last_addr = entry; + return NULL; + } return entry; } @@ -334,7 +341,7 @@ static int read_xattr_block(struct inode *inode, void *txattr_addr) static int lookup_all_xattrs(struct inode *inode, struct page *ipage, unsigned int index, unsigned int len, const char *name, struct f2fs_xattr_entry **xe, - void **base_addr) + void **base_addr, int *base_size) { void *cur_addr, *txattr_addr, *last_addr = NULL; nid_t xnid = F2FS_I(inode)->i_xattr_nid; @@ -345,8 +352,8 @@ static int lookup_all_xattrs(struct inode *inode, struct page *ipage, if (!size && !inline_size) return -ENODATA; - txattr_addr = f2fs_kzalloc(F2FS_I_SB(inode), - inline_size + size + XATTR_PADDING_SIZE, GFP_NOFS); + *base_size = inline_size + size + XATTR_PADDING_SIZE; + txattr_addr = f2fs_kzalloc(F2FS_I_SB(inode), *base_size, GFP_NOFS); if (!txattr_addr) return -ENOMEM; @@ -358,8 +365,10 @@ static int lookup_all_xattrs(struct inode *inode, struct page *ipage, *xe = __find_inline_xattr(inode, txattr_addr, &last_addr, index, len, name); - if (*xe) + if (*xe) { + *base_size = inline_size; goto check; + } } /* read from xattr node block */ @@ -384,7 +393,7 @@ check: *base_addr = txattr_addr; return 0; out: - kzfree(txattr_addr); + kvfree(txattr_addr); return err; } @@ -427,7 +436,7 @@ static int read_all_xattrs(struct inode *inode, struct page *ipage, *base_addr = txattr_addr; return 0; fail: - kzfree(txattr_addr); + kvfree(txattr_addr); return err; } @@ -461,7 +470,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize, } f2fs_wait_on_page_writeback(ipage ? ipage : in_page, - NODE, true); + NODE, true, true); /* no need to use xattr node block */ if (hsize <= inline_size) { err = f2fs_truncate_xattr_node(inode); @@ -485,7 +494,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize, goto in_page_out; } f2fs_bug_on(sbi, new_nid); - f2fs_wait_on_page_writeback(xpage, NODE, true); + f2fs_wait_on_page_writeback(xpage, NODE, true, true); } else { struct dnode_of_data dn; set_new_dnode(&dn, inode, NULL, NULL, new_nid); @@ -520,6 +529,7 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name, int error = 0; unsigned int size, len; void *base_addr = NULL; + int base_size; if (name == NULL) return -EINVAL; @@ -530,7 +540,7 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name, down_read(&F2FS_I(inode)->i_xattr_sem); error = lookup_all_xattrs(inode, ipage, index, len, name, - &entry, &base_addr); + &entry, &base_addr, &base_size); up_read(&F2FS_I(inode)->i_xattr_sem); if (error) return error; @@ -544,11 +554,16 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name, if (buffer) { char *pval = entry->e_name + entry->e_name_len; + + if (base_size - (pval - (char *)base_addr) < size) { + error = -ERANGE; + goto out; + } memcpy(buffer, pval, size); } error = size; out: - kzfree(base_addr); + kvfree(base_addr); return error; } @@ -587,7 +602,7 @@ ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) } error = buffer_size - rest; cleanup: - kzfree(base_addr); + kvfree(base_addr); return error; } @@ -718,7 +733,7 @@ static int __f2fs_setxattr(struct inode *inode, int index, if (!error && S_ISDIR(inode->i_mode)) set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_CP); exit: - kzfree(base_addr); + kvfree(base_addr); return error; } diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h index 595d2af00a58..12a5ef3a607d 100644 --- a/fs/f2fs/xattr.h +++ b/fs/f2fs/xattr.h @@ -78,6 +78,12 @@ struct f2fs_xattr_entry { sizeof(struct f2fs_xattr_header) - \ sizeof(struct f2fs_xattr_entry)) +#define MAX_INLINE_XATTR_SIZE \ + (DEF_ADDRS_PER_INODE - \ + F2FS_TOTAL_EXTRA_ATTR_SIZE / sizeof(__le32) - \ + DEF_INLINE_RESERVED_SIZE - \ + MIN_INLINE_DENTRY_SIZE / sizeof(__le32)) + /* * On-disk structure of f2fs_xattr * We use inline xattrs space + 1 block for xattr. diff --git a/fs/file.c b/fs/file.c index 39f8f15921da..7e9eb65a2912 100644 --- a/fs/file.c +++ b/fs/file.c @@ -474,6 +474,7 @@ struct files_struct init_files = { .full_fds_bits = init_files.full_fds_bits_init, }, .file_lock = __SPIN_LOCK_UNLOCKED(init_files.file_lock), + .resize_wait = __WAIT_QUEUE_HEAD_INITIALIZER(init_files.resize_wait), }; static unsigned long find_next_fd(struct fdtable *fdt, unsigned long start) diff --git a/fs/fscache/object.c b/fs/fscache/object.c index 7a182c87f378..ab1d7f35f6c2 100644 --- a/fs/fscache/object.c +++ b/fs/fscache/object.c @@ -715,6 +715,9 @@ static const struct fscache_state *fscache_drop_object(struct fscache_object *ob if (awaken) wake_up_bit(&cookie->flags, FSCACHE_COOKIE_INVALIDATING); + if (test_and_clear_bit(FSCACHE_COOKIE_LOOKING_UP, &cookie->flags)) + wake_up_bit(&cookie->flags, FSCACHE_COOKIE_LOOKING_UP); + /* Prevent a race with our last child, which has to signal EV_CLEARED * before dropping our spinlock. diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index e98a75233f68..d74d959057bb 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1745,7 +1745,6 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode, req->in.h.nodeid = outarg->nodeid; req->in.numargs = 2; req->in.argpages = 1; - req->page_descs[0].offset = offset; req->end = fuse_retrieve_end; index = outarg->offset >> PAGE_CACHE_SHIFT; @@ -1760,6 +1759,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode, this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset); req->pages[req->num_pages] = page; + req->page_descs[req->num_pages].offset = offset; req->page_descs[req->num_pages].length = this_num; req->num_pages++; @@ -2084,10 +2084,13 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe, ret = fuse_dev_do_write(fud, &cs, len); + pipe_lock(pipe); for (idx = 0; idx < nbuf; idx++) { struct pipe_buffer *buf = &bufs[idx]; buf->ops->release(pipe, buf); } + pipe_unlock(pipe); + out: kfree(bufs); return ret; diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 7014318f6d18..d40c2451487c 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1784,7 +1784,7 @@ static bool fuse_writepage_in_flight(struct fuse_req *new_req, spin_unlock(&fc->lock); dec_wb_stat(&bdi->wb, WB_WRITEBACK); - dec_zone_page_state(page, NR_WRITEBACK_TEMP); + dec_zone_page_state(new_req->pages[0], NR_WRITEBACK_TEMP); wb_writeout_inc(&bdi->wb); fuse_writepage_free(fc, new_req); fuse_request_free(new_req); diff --git a/fs/hfs/btree.c b/fs/hfs/btree.c index 1ab19e660e69..1ff5774a5382 100644 --- a/fs/hfs/btree.c +++ b/fs/hfs/btree.c @@ -328,13 +328,14 @@ void hfs_bmap_free(struct hfs_bnode *node) nidx -= len * 8; i = node->next; - hfs_bnode_put(node); if (!i) { /* panic */; pr_crit("unable to free bnode %u. bmap not found!\n", node->this); + hfs_bnode_put(node); return; } + hfs_bnode_put(node); node = hfs_bnode_find(tree, i); if (IS_ERR(node)) return; diff --git a/fs/hfsplus/btree.c b/fs/hfsplus/btree.c index 3345c7553edc..7adc8a327e03 100644 --- a/fs/hfsplus/btree.c +++ b/fs/hfsplus/btree.c @@ -453,14 +453,15 @@ void hfs_bmap_free(struct hfs_bnode *node) nidx -= len * 8; i = node->next; - hfs_bnode_put(node); if (!i) { /* panic */; pr_crit("unable to free bnode %u. " "bmap not found!\n", node->this); + hfs_bnode_put(node); return; } + hfs_bnode_put(node); node = hfs_bnode_find(tree, i); if (IS_ERR(node)) return; diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index a17da8b57fc6..cefae2350da5 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -118,6 +118,16 @@ static void huge_pagevec_release(struct pagevec *pvec) pagevec_reinit(pvec); } +/* + * Mask used when checking the page offset value passed in via system + * calls. This value will be converted to a loff_t which is signed. + * Therefore, we want to check the upper PAGE_SHIFT + 1 bits of the + * value. The extra bit (- 1 in the shift value) is to take the sign + * bit into account. + */ +#define PGOFF_LOFFT_MAX \ + (((1UL << (PAGE_SHIFT + 1)) - 1) << (BITS_PER_LONG - (PAGE_SHIFT + 1))) + static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) { struct inode *inode = file_inode(file); @@ -136,17 +146,31 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) vma->vm_flags |= VM_HUGETLB | VM_DONTEXPAND; vma->vm_ops = &hugetlb_vm_ops; + /* + * page based offset in vm_pgoff could be sufficiently large to + * overflow a loff_t when converted to byte offset. This can + * only happen on architectures where sizeof(loff_t) == + * sizeof(unsigned long). So, only check in those instances. + */ + if (sizeof(unsigned long) == sizeof(loff_t)) { + if (vma->vm_pgoff & PGOFF_LOFFT_MAX) + return -EINVAL; + } + + /* must be huge page aligned */ if (vma->vm_pgoff & (~huge_page_mask(h) >> PAGE_SHIFT)) return -EINVAL; vma_len = (loff_t)(vma->vm_end - vma->vm_start); + len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); + /* check for overflow */ + if (len < vma_len) + return -EINVAL; mutex_lock(&inode->i_mutex); file_accessed(file); ret = -ENOMEM; - len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); - if (hugetlb_reserve_pages(inode, vma->vm_pgoff >> huge_page_order(h), len >> huge_page_shift(h), vma, @@ -155,7 +179,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) ret = 0; if (vma->vm_flags & VM_WRITE && inode->i_size < len) - inode->i_size = len; + i_size_write(inode, len); out: mutex_unlock(&inode->i_mutex); @@ -845,6 +869,18 @@ static int hugetlbfs_migrate_page(struct address_space *mapping, rc = migrate_huge_page_move_mapping(mapping, newpage, page); if (rc != MIGRATEPAGE_SUCCESS) return rc; + + /* + * page_private is subpool pointer in hugetlb pages. Transfer to + * new page. PagePrivate is not associated with page_private for + * hugetlb pages and can not be set here as only page_huge_active + * pages can be migrated. + */ + if (page_private(page)) { + set_page_private(newpage, page_private(page)); + set_page_private(page, 0); + } + migrate_page_copy(newpage, page); return MIGRATEPAGE_SUCCESS; diff --git a/fs/inode.c b/fs/inode.c index 48185da625ce..9f98d1d0769d 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -632,6 +632,7 @@ again: dispose_list(&dispose); } +EXPORT_SYMBOL_GPL(evict_inodes); /** * invalidate_inodes - attempt to free all inodes on a superblock diff --git a/fs/internal.h b/fs/internal.h index 6387b35a1c0d..71cc026dac30 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -123,7 +123,6 @@ extern void inode_add_lru(struct inode *inode); extern void inode_io_list_del(struct inode *inode); extern long get_nr_dirty_inodes(void); -extern void evict_inodes(struct super_block *); extern int invalidate_inodes(struct super_block *, bool); /* diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index bce343febb9e..c34433432d47 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -1215,11 +1215,12 @@ int jbd2_journal_get_undo_access(handle_t *handle, struct buffer_head *bh) struct journal_head *jh; char *committed_data = NULL; - JBUFFER_TRACE(jh, "entry"); if (jbd2_write_access_granted(handle, bh, true)) return 0; jh = jbd2_journal_add_journal_head(bh); + JBUFFER_TRACE(jh, "entry"); + /* * Do this first --- it can drop the journal lock, so we want to * make sure that obtaining the committed_data is done @@ -1336,15 +1337,17 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) if (is_handle_aborted(handle)) return -EROFS; - if (!buffer_jbd(bh)) { - ret = -EUCLEAN; - goto out; - } + if (!buffer_jbd(bh)) + return -EUCLEAN; + /* * We don't grab jh reference here since the buffer must be part * of the running transaction. */ jh = bh2jh(bh); + jbd_debug(5, "journal_head %p\n", jh); + JBUFFER_TRACE(jh, "entry"); + /* * This and the following assertions are unreliable since we may see jh * in inconsistent state unless we grab bh_state lock. But this is @@ -1378,9 +1381,6 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) } journal = transaction->t_journal; - jbd_debug(5, "journal_head %p\n", jh); - JBUFFER_TRACE(jh, "entry"); - jbd_lock_bh_state(bh); if (jh->b_modified == 0) { @@ -1578,14 +1578,21 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh) /* However, if the buffer is still owned by a prior * (committing) transaction, we can't drop it yet... */ JBUFFER_TRACE(jh, "belongs to older transaction"); - /* ... but we CAN drop it from the new transaction if we - * have also modified it since the original commit. */ + /* ... but we CAN drop it from the new transaction through + * marking the buffer as freed and set j_next_transaction to + * the new transaction, so that not only the commit code + * knows it should clear dirty bits when it is done with the + * buffer, but also the buffer can be checkpointed only + * after the new transaction commits. */ - if (jh->b_next_transaction) { - J_ASSERT(jh->b_next_transaction == transaction); + set_buffer_freed(bh); + + if (!jh->b_next_transaction) { spin_lock(&journal->j_list_lock); - jh->b_next_transaction = NULL; + jh->b_next_transaction = transaction; spin_unlock(&journal->j_list_lock); + } else { + J_ASSERT(jh->b_next_transaction == transaction); /* * only drop a reference if this transaction modified diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index 1544f530ccd0..023e7f32ee1b 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c @@ -101,7 +101,8 @@ static int jffs2_sync_fs(struct super_block *sb, int wait) struct jffs2_sb_info *c = JFFS2_SB_INFO(sb); #ifdef CONFIG_JFFS2_FS_WRITEBUFFER - cancel_delayed_work_sync(&c->wbuf_dwork); + if (jffs2_is_writebuffered(c)) + cancel_delayed_work_sync(&c->wbuf_dwork); #endif mutex_lock(&c->alloc_sem); diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c index 0a3f9b594602..37779ed3f790 100644 --- a/fs/ncpfs/ioctl.c +++ b/fs/ncpfs/ioctl.c @@ -233,7 +233,7 @@ ncp_get_charsets(struct ncp_server* server, struct ncp_nls_ioctl __user *arg) len = strlen(server->nls_vol->charset); if (len > NCP_IOCSNAME_LEN) len = NCP_IOCSNAME_LEN; - strncpy(user.codepage, server->nls_vol->charset, len); + strscpy(user.codepage, server->nls_vol->charset, NCP_IOCSNAME_LEN); user.codepage[len] = 0; } @@ -243,7 +243,7 @@ ncp_get_charsets(struct ncp_server* server, struct ncp_nls_ioctl __user *arg) len = strlen(server->nls_io->charset); if (len > NCP_IOCSNAME_LEN) len = NCP_IOCSNAME_LEN; - strncpy(user.iocharset, server->nls_io->charset, len); + strscpy(user.iocharset, server->nls_io->charset, NCP_IOCSNAME_LEN); user.iocharset[len] = 0; } mutex_unlock(&server->root_setup_lock); diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 211440722e24..88cb8e0d6014 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -670,6 +670,10 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) req = nfs_list_entry(reqs.next); nfs_direct_setup_mirroring(dreq, &desc, req); + if (desc.pg_error < 0) { + list_splice_init(&reqs, &failed); + goto out_failed; + } list_for_each_entry_safe(req, tmp, &reqs, wb_list) { if (!nfs_pageio_add_request(&desc, req)) { @@ -677,13 +681,17 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) nfs_list_add_request(req, &failed); spin_lock(cinfo.lock); dreq->flags = 0; - dreq->error = -EIO; + if (desc.pg_error < 0) + dreq->error = desc.pg_error; + else + dreq->error = -EIO; spin_unlock(cinfo.lock); } nfs_release_request(req); } nfs_pageio_complete(&desc); +out_failed: while (!list_empty(&failed)) { req = nfs_list_entry(failed.next); nfs_list_remove_request(req); @@ -898,6 +906,11 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, } nfs_direct_setup_mirroring(dreq, &desc, req); + if (desc.pg_error < 0) { + nfs_free_request(req); + result = desc.pg_error; + break; + } nfs_lock_request(req); req->wb_index = pos >> PAGE_SHIFT; diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index fd8da630fd22..8e268965c96d 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -882,13 +882,19 @@ static void filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) { - if (!pgio->pg_lseg) + if (!pgio->pg_lseg) { pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, req->wb_context, 0, NFS4_MAX_UINT64, IOMODE_READ, GFP_KERNEL); + if (IS_ERR(pgio->pg_lseg)) { + pgio->pg_error = PTR_ERR(pgio->pg_lseg); + pgio->pg_lseg = NULL; + return; + } + } /* If no lseg, fall back to read through mds */ if (pgio->pg_lseg == NULL) nfs_pageio_reset_read_mds(pgio); @@ -901,13 +907,20 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_commit_info cinfo; int status; - if (!pgio->pg_lseg) + if (!pgio->pg_lseg) { pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, req->wb_context, 0, NFS4_MAX_UINT64, IOMODE_RW, GFP_NOFS); + if (IS_ERR(pgio->pg_lseg)) { + pgio->pg_error = PTR_ERR(pgio->pg_lseg); + pgio->pg_lseg = NULL; + return; + } + } + /* If no lseg, fall back to write through mds */ if (pgio->pg_lseg == NULL) goto out_mds; diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index c8e90152b61b..6506775575aa 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -786,13 +786,19 @@ ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio, int ds_idx; /* Use full layout for now */ - if (!pgio->pg_lseg) + if (!pgio->pg_lseg) { pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, req->wb_context, 0, NFS4_MAX_UINT64, IOMODE_READ, GFP_KERNEL); + if (IS_ERR(pgio->pg_lseg)) { + pgio->pg_error = PTR_ERR(pgio->pg_lseg); + pgio->pg_lseg = NULL; + return; + } + } /* If no lseg, fall back to read through mds */ if (pgio->pg_lseg == NULL) goto out_mds; @@ -826,13 +832,19 @@ ff_layout_pg_init_write(struct nfs_pageio_descriptor *pgio, int i; int status; - if (!pgio->pg_lseg) + if (!pgio->pg_lseg) { pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, req->wb_context, 0, NFS4_MAX_UINT64, IOMODE_RW, GFP_NOFS); + if (IS_ERR(pgio->pg_lseg)) { + pgio->pg_error = PTR_ERR(pgio->pg_lseg); + pgio->pg_lseg = NULL; + return; + } + } /* If no lseg, fall back to write through mds */ if (pgio->pg_lseg == NULL) goto out_mds; @@ -868,18 +880,25 @@ static unsigned int ff_layout_pg_get_mirror_count_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) { - if (!pgio->pg_lseg) + if (!pgio->pg_lseg) { pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, req->wb_context, 0, NFS4_MAX_UINT64, IOMODE_RW, GFP_NOFS); + if (IS_ERR(pgio->pg_lseg)) { + pgio->pg_error = PTR_ERR(pgio->pg_lseg); + pgio->pg_lseg = NULL; + goto out; + } + } if (pgio->pg_lseg) return FF_LAYOUT_MIRROR_COUNT(pgio->pg_lseg); /* no lseg means that pnfs is not in use, so no mirroring here */ nfs_pageio_reset_write_mds(pgio); +out: return 1; } diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 4bdc2fc86280..8a2077408ab0 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -872,6 +872,9 @@ static int nfs_pageio_setup_mirroring(struct nfs_pageio_descriptor *pgio, mirror_count = pgio->pg_ops->pg_get_mirror_count(pgio, req); + if (pgio->pg_error < 0) + return pgio->pg_error; + if (!mirror_count || mirror_count > NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX) return -EINVAL; @@ -980,6 +983,8 @@ static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc, } else { if (desc->pg_ops->pg_init) desc->pg_ops->pg_init(desc, req); + if (desc->pg_error < 0) + return 0; mirror->pg_base = req->wb_pgbase; } if (!nfs_can_coalesce_requests(prev, req, desc)) @@ -1102,7 +1107,6 @@ static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc) struct nfs_page *req; req = list_first_entry(&head, struct nfs_page, wb_list); - nfs_list_remove_request(req); if (__nfs_pageio_add_request(desc, req)) continue; if (desc->pg_error < 0) { @@ -1145,6 +1149,8 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, bytes = req->wb_bytes; nfs_pageio_setup_mirroring(desc, req); + if (desc->pg_error < 0) + return 0; for (midx = 0; midx < desc->pg_mirror_count; midx++) { if (midx) { @@ -1196,7 +1202,7 @@ static void nfs_pageio_complete_mirror(struct nfs_pageio_descriptor *desc, desc->pg_mirror_idx = mirror_idx; for (;;) { nfs_pageio_doio(desc); - if (!mirror->pg_recoalesce) + if (desc->pg_error < 0 || !mirror->pg_recoalesce) break; if (!nfs_do_recoalesce(desc)) break; @@ -1230,7 +1236,7 @@ int nfs_pageio_resend(struct nfs_pageio_descriptor *desc, nfs_pageio_complete(desc); if (!list_empty(&failed)) { list_move(&failed, &hdr->pages); - return -EIO; + return desc->pg_error < 0 ? desc->pg_error : -EIO; } return 0; } diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index c8e75e5e6a67..d34fb0feb5c2 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -909,14 +909,15 @@ send_layoutget(struct pnfs_layout_hdr *lo, if (IS_ERR(lseg)) { switch (PTR_ERR(lseg)) { - case -ENOMEM: case -ERESTARTSYS: + case -EIO: + case -ENOSPC: + case -EROFS: + case -E2BIG: break; default: - /* remember that LAYOUTGET failed and suspend trying */ - pnfs_layout_io_set_failed(lo, range->iomode); + return NULL; } - return NULL; } else pnfs_layout_clear_fail_bit(lo, pnfs_iomode_to_fail_bit(range->iomode)); @@ -1625,7 +1626,7 @@ out: "(%s, offset: %llu, length: %llu)\n", __func__, ino->i_sb->s_id, (unsigned long long)NFS_FILEID(ino), - lseg == NULL ? "not found" : "found", + IS_ERR_OR_NULL(lseg) ? "not found" : "found", iomode==IOMODE_RW ? "read/write" : "read-only", (unsigned long long)pos, (unsigned long long)count); @@ -1804,6 +1805,11 @@ pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *r rd_size, IOMODE_READ, GFP_KERNEL); + if (IS_ERR(pgio->pg_lseg)) { + pgio->pg_error = PTR_ERR(pgio->pg_lseg); + pgio->pg_lseg = NULL; + return; + } } /* If no lseg, fall back to read through mds */ if (pgio->pg_lseg == NULL) @@ -1816,13 +1822,19 @@ void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req, u64 wb_size) { - if (pgio->pg_lseg == NULL) + if (pgio->pg_lseg == NULL) { pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, req->wb_context, req_offset(req), wb_size, IOMODE_RW, GFP_NOFS); + if (IS_ERR(pgio->pg_lseg)) { + pgio->pg_error = PTR_ERR(pgio->pg_lseg); + pgio->pg_lseg = NULL; + return; + } + } /* If no lseg, fall back to write through mds */ if (pgio->pg_lseg == NULL) nfs_pageio_reset_write_mds(pgio); diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 0a5e33f33b5c..0bb580174cb3 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -115,7 +115,7 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, pgm = &pgio.pg_mirrors[0]; NFS_I(inode)->read_io += pgm->pg_bytes_written; - return 0; + return pgio.pg_error < 0 ? pgio.pg_error : 0; } static void nfs_readpage_release(struct nfs_page *req) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 62f358f67764..9b42139a479b 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1877,6 +1877,11 @@ static int nfs_parse_devname(const char *dev_name, size_t len; char *end; + if (unlikely(!dev_name || !*dev_name)) { + dfprintk(MOUNT, "NFS: device name not specified\n"); + return -EINVAL; + } + /* Is the host name protected with square brakcets? */ if (*dev_name == '[') { end = strchr(++dev_name, ']'); @@ -2376,8 +2381,7 @@ static int nfs_compare_mount_options(const struct super_block *s, const struct n goto Ebusy; if (a->acdirmax != b->acdirmax) goto Ebusy; - if (b->auth_info.flavor_len > 0 && - clnt_a->cl_auth->au_flavor != clnt_b->cl_auth->au_flavor) + if (clnt_a->cl_auth->au_flavor != clnt_b->cl_auth->au_flavor) goto Ebusy; return 1; Ebusy: diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 7b755b7f785c..91146f025769 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -430,8 +430,19 @@ nfsd3_proc_readdir(struct svc_rqst *rqstp, struct nfsd3_readdirargs *argp, &resp->common, nfs3svc_encode_entry); memcpy(resp->verf, argp->verf, 8); resp->count = resp->buffer - argp->buffer; - if (resp->offset) - xdr_encode_hyper(resp->offset, argp->cookie); + if (resp->offset) { + loff_t offset = argp->cookie; + + if (unlikely(resp->offset1)) { + /* we ended up with offset on a page boundary */ + *resp->offset = htonl(offset >> 32); + *resp->offset1 = htonl(offset & 0xffffffff); + resp->offset1 = NULL; + } else { + xdr_encode_hyper(resp->offset, offset); + } + resp->offset = NULL; + } RETURN_STATUS(nfserr); } @@ -499,6 +510,7 @@ nfsd3_proc_readdirplus(struct svc_rqst *rqstp, struct nfsd3_readdirargs *argp, } else { xdr_encode_hyper(resp->offset, offset); } + resp->offset = NULL; } RETURN_STATUS(nfserr); diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 7162ab7bc093..d4fa7fbc37dc 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -898,6 +898,7 @@ encode_entry(struct readdir_cd *ccd, const char *name, int namlen, } else { xdr_encode_hyper(cd->offset, offset64); } + cd->offset = NULL; } /* diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 9690cb4dd588..0cd57db5c5af 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1106,6 +1106,8 @@ static ssize_t write_v4_end_grace(struct file *file, char *buf, size_t size) case 'Y': case 'y': case '1': + if (!nn->nfsd_serv) + return -EBUSY; nfsd4_end_grace(nn); break; default: diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c index 272269f1c310..9ee8bcfbf00f 100644 --- a/fs/ocfs2/buffer_head_io.c +++ b/fs/ocfs2/buffer_head_io.c @@ -146,7 +146,6 @@ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block, BUG(); } - clear_buffer_uptodate(bh); get_bh(bh); /* for end_buffer_read_sync() */ bh->b_end_io = end_buffer_read_sync; submit_bh(READ, bh); @@ -300,7 +299,6 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr, continue; } - clear_buffer_uptodate(bh); get_bh(bh); /* for end_buffer_read_sync() */ if (validate) set_buffer_needs_validate(bh); diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c index 3c45a9301a09..a87a08e1bfab 100644 --- a/fs/ocfs2/cluster/nodemanager.c +++ b/fs/ocfs2/cluster/nodemanager.c @@ -621,13 +621,15 @@ static void o2nm_node_group_drop_item(struct config_group *group, struct o2nm_node *node = to_o2nm_node(item); struct o2nm_cluster *cluster = to_o2nm_cluster(group->cg_item.ci_parent); - o2net_disconnect_node(node); + if (cluster->cl_nodes[node->nd_num] == node) { + o2net_disconnect_node(node); - if (cluster->cl_has_local && - (cluster->cl_local_node == node->nd_num)) { - cluster->cl_has_local = 0; - cluster->cl_local_node = O2NM_INVALID_NODE_NUM; - o2net_stop_listening(node); + if (cluster->cl_has_local && + (cluster->cl_local_node == node->nd_num)) { + cluster->cl_has_local = 0; + cluster->cl_local_node = O2NM_INVALID_NODE_NUM; + o2net_stop_listening(node); + } } /* XXX call into net to stop this node from trading messages */ diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c index 827fc9809bc2..3494e220b510 100644 --- a/fs/ocfs2/export.c +++ b/fs/ocfs2/export.c @@ -125,10 +125,10 @@ check_err: check_gen: if (handle->ih_generation != inode->i_generation) { - iput(inode); trace_ocfs2_get_dentry_generation((unsigned long long)blkno, handle->ih_generation, inode->i_generation); + iput(inode); result = ERR_PTR(-ESTALE); goto bail; } diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c index 0a4457fb0711..85111d740c9d 100644 --- a/fs/ocfs2/localalloc.c +++ b/fs/ocfs2/localalloc.c @@ -345,13 +345,18 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb) if (num_used || alloc->id1.bitmap1.i_used || alloc->id1.bitmap1.i_total - || la->la_bm_off) - mlog(ML_ERROR, "Local alloc hasn't been recovered!\n" + || la->la_bm_off) { + mlog(ML_ERROR, "inconsistent detected, clean journal with" + " unrecovered local alloc, please run fsck.ocfs2!\n" "found = %u, set = %u, taken = %u, off = %u\n", num_used, le32_to_cpu(alloc->id1.bitmap1.i_used), le32_to_cpu(alloc->id1.bitmap1.i_total), OCFS2_LOCAL_ALLOC(alloc)->la_bm_off); + status = -EINVAL; + goto bail; + } + osb->local_alloc_bh = alloc_bh; osb->local_alloc_state = OCFS2_LA_ENABLED; diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c index 124471d26a73..c1a83c58456e 100644 --- a/fs/ocfs2/move_extents.c +++ b/fs/ocfs2/move_extents.c @@ -156,18 +156,14 @@ out: } /* - * lock allocators, and reserving appropriate number of bits for - * meta blocks and data clusters. - * - * in some cases, we don't need to reserve clusters, just let data_ac - * be NULL. + * lock allocator, and reserve appropriate number of bits for + * meta blocks. */ -static int ocfs2_lock_allocators_move_extents(struct inode *inode, +static int ocfs2_lock_meta_allocator_move_extents(struct inode *inode, struct ocfs2_extent_tree *et, u32 clusters_to_move, u32 extents_to_split, struct ocfs2_alloc_context **meta_ac, - struct ocfs2_alloc_context **data_ac, int extra_blocks, int *credits) { @@ -192,13 +188,6 @@ static int ocfs2_lock_allocators_move_extents(struct inode *inode, goto out; } - if (data_ac) { - ret = ocfs2_reserve_clusters(osb, clusters_to_move, data_ac); - if (ret) { - mlog_errno(ret); - goto out; - } - } *credits += ocfs2_calc_extend_credits(osb->sb, et->et_root_el); @@ -260,10 +249,10 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context, } } - ret = ocfs2_lock_allocators_move_extents(inode, &context->et, *len, 1, - &context->meta_ac, - &context->data_ac, - extra_blocks, &credits); + ret = ocfs2_lock_meta_allocator_move_extents(inode, &context->et, + *len, 1, + &context->meta_ac, + extra_blocks, &credits); if (ret) { mlog_errno(ret); goto out; @@ -286,6 +275,21 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context, } } + /* + * Make sure ocfs2_reserve_cluster is called after + * __ocfs2_flush_truncate_log, otherwise, dead lock may happen. + * + * If ocfs2_reserve_cluster is called + * before __ocfs2_flush_truncate_log, dead lock on global bitmap + * may happen. + * + */ + ret = ocfs2_reserve_clusters(osb, *len, &context->data_ac); + if (ret) { + mlog_errno(ret); + goto out_unlock_mutex; + } + handle = ocfs2_start_trans(osb, credits); if (IS_ERR(handle)) { ret = PTR_ERR(handle); @@ -606,9 +610,10 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context, } } - ret = ocfs2_lock_allocators_move_extents(inode, &context->et, len, 1, - &context->meta_ac, - NULL, extra_blocks, &credits); + ret = ocfs2_lock_meta_allocator_move_extents(inode, &context->et, + len, 1, + &context->meta_ac, + extra_blocks, &credits); if (ret) { mlog_errno(ret); goto out; diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 64c5386d0c1b..d49ac2ae7099 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -357,7 +357,7 @@ int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, } out_unlock: unlock_rename(workdir, upperdir); - revert_creds(old_cred); + ovl_revert_creds(old_cred); if (link) free_page((unsigned long) link); diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index f8aa54272121..d7d880f9495b 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -408,13 +408,22 @@ static int ovl_create_or_link(struct dentry *dentry, int mode, dev_t rdev, err = ovl_create_upper(dentry, inode, &stat, link, hardlink); } else { const struct cred *old_cred; + struct cred *override_cred; old_cred = ovl_override_creds(dentry->d_sb); - err = ovl_create_over_whiteout(dentry, inode, &stat, link, - hardlink); + err = -ENOMEM; + override_cred = prepare_creds(); + if (override_cred) { + override_cred->fsuid = old_cred->fsuid; + override_cred->fsgid = old_cred->fsgid; + put_cred(override_creds(override_cred)); + put_cred(override_cred); - revert_creds(old_cred); + err = ovl_create_over_whiteout(dentry, inode, &stat, + link, hardlink); + } + ovl_revert_creds(old_cred); } if (!err) @@ -648,7 +657,7 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir) err = ovl_remove_and_whiteout(dentry, is_dir); - revert_creds(old_cred); + ovl_revert_creds(old_cred); } out_drop_write: ovl_drop_write(dentry); @@ -887,8 +896,7 @@ out_dput_old: out_unlock: unlock_rename(new_upperdir, old_upperdir); out_revert_creds: - if (old_opaque || new_opaque) - revert_creds(old_cred); + ovl_revert_creds(old_cred); out_drop_write: ovl_drop_write(old); out: diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 27a42975d7cd..0723b8f97651 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -151,6 +151,7 @@ bool ovl_dentry_is_opaque(struct dentry *dentry); void ovl_dentry_set_opaque(struct dentry *dentry, bool opaque); bool ovl_is_whiteout(struct dentry *dentry); const struct cred *ovl_override_creds(struct super_block *sb); +void ovl_revert_creds(const struct cred *oldcred); void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry); struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags); diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index da999e73c97a..9ff8f8192bf1 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -223,7 +223,7 @@ static int ovl_check_whiteouts(struct dentry *dir, struct ovl_readdir_data *rdd) } mutex_unlock(&dir->d_inode->i_mutex); } - revert_creds(old_cred); + ovl_revert_creds(old_cred); return err; } diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index fa20c95bd456..48f7c1ab94cd 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -31,6 +31,7 @@ struct ovl_config { char *lowerdir; char *upperdir; char *workdir; + bool override_creds; }; /* private information held for overlayfs's superblock */ @@ -252,9 +253,17 @@ const struct cred *ovl_override_creds(struct super_block *sb) { struct ovl_fs *ofs = sb->s_fs_info; + if (!ofs->config.override_creds) + return NULL; return override_creds(ofs->creator_cred); } +void ovl_revert_creds(const struct cred *old_cred) +{ + if (old_cred) + revert_creds(old_cred); +} + static bool ovl_is_opaquedir(struct dentry *dentry) { int res; @@ -626,6 +635,11 @@ static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf) return err; } +static bool __read_mostly ovl_override_creds_def = true; +module_param_named(override_creds, ovl_override_creds_def, bool, 0644); +MODULE_PARM_DESC(ovl_override_creds_def, + "Use mounter's credentials for accesses"); + /** * ovl_show_options * @@ -642,6 +656,9 @@ static int ovl_show_options(struct seq_file *m, struct dentry *dentry) seq_show_option(m, "upperdir", ufs->config.upperdir); seq_show_option(m, "workdir", ufs->config.workdir); } + if (ufs->config.override_creds != ovl_override_creds_def) + seq_show_option(m, "override_creds", + ufs->config.override_creds ? "on" : "off"); return 0; } @@ -666,6 +683,8 @@ enum { OPT_LOWERDIR, OPT_UPPERDIR, OPT_WORKDIR, + OPT_OVERRIDE_CREDS_ON, + OPT_OVERRIDE_CREDS_OFF, OPT_ERR, }; @@ -673,6 +692,8 @@ static const match_table_t ovl_tokens = { {OPT_LOWERDIR, "lowerdir=%s"}, {OPT_UPPERDIR, "upperdir=%s"}, {OPT_WORKDIR, "workdir=%s"}, + {OPT_OVERRIDE_CREDS_ON, "override_creds=on"}, + {OPT_OVERRIDE_CREDS_OFF, "override_creds=off"}, {OPT_ERR, NULL} }; @@ -703,6 +724,7 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config) { char *p; + config->override_creds = ovl_override_creds_def; while ((p = ovl_next_opt(&opt)) != NULL) { int token; substring_t args[MAX_OPT_ARGS]; @@ -733,6 +755,14 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config) return -ENOMEM; break; + case OPT_OVERRIDE_CREDS_ON: + config->override_creds = true; + break; + + case OPT_OVERRIDE_CREDS_OFF: + config->override_creds = false; + break; + default: pr_err("overlayfs: unrecognized mount option \"%s\" or missing value\n", p); return -EINVAL; diff --git a/fs/pnode.c b/fs/pnode.c index ddb846f878b8..35154a5c9392 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -610,36 +610,18 @@ int propagate_umount(struct list_head *list) return 0; } -/* - * Iterates over all slaves, and slaves of slaves. - */ -static struct mount *next_descendent(struct mount *root, struct mount *cur) -{ - if (!IS_MNT_NEW(cur) && !list_empty(&cur->mnt_slave_list)) - return first_slave(cur); - do { - struct mount *master = cur->mnt_master; - - if (!master || cur->mnt_slave.next != &master->mnt_slave_list) { - struct mount *next = next_slave(cur); - - return (next == root) ? NULL : next; - } - cur = master; - } while (cur != root); - return NULL; -} - void propagate_remount(struct mount *mnt) { - struct mount *m = mnt; + struct mount *parent = mnt->mnt_parent; + struct mount *p = mnt, *m; struct super_block *sb = mnt->mnt.mnt_sb; - if (sb->s_op->copy_mnt_data) { - m = next_descendent(mnt, m); - while (m) { + if (!sb->s_op->copy_mnt_data) + return; + for (p = propagation_next(parent, parent); p; + p = propagation_next(p, parent)) { + m = __lookup_mnt(&p->mnt, mnt->mnt_mountpoint); + if (m) sb->s_op->copy_mnt_data(m->mnt.data, mnt->mnt.data); - m = next_descendent(mnt, m); - } } } diff --git a/fs/proc/array.c b/fs/proc/array.c index cb71cbae606d..60cbaa821164 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -333,7 +333,7 @@ static inline void task_seccomp(struct seq_file *m, struct task_struct *p) #ifdef CONFIG_SECCOMP seq_printf(m, "Seccomp:\t%d\n", p->seccomp.mode); #endif - seq_printf(m, "\nSpeculation_Store_Bypass:\t"); + seq_printf(m, "Speculation_Store_Bypass:\t"); switch (arch_prctl_spec_ctrl_get(p, PR_SPEC_STORE_BYPASS)) { case -EINVAL: seq_printf(m, "unknown"); diff --git a/fs/proc/base.c b/fs/proc/base.c index d780651a760c..b31c27d304eb 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -255,7 +255,7 @@ static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf, * Inherently racy -- command line shares address space * with code and data. */ - rv = access_remote_vm(mm, arg_end - 1, &c, 1, 0); + rv = access_remote_vm(mm, arg_end - 1, &c, 1, FOLL_ANON); if (rv <= 0) goto out_free_page; @@ -273,7 +273,7 @@ static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf, int nr_read; _count = min3(count, len, PAGE_SIZE); - nr_read = access_remote_vm(mm, p, page, _count, 0); + nr_read = access_remote_vm(mm, p, page, _count, FOLL_ANON); if (nr_read < 0) rv = nr_read; if (nr_read <= 0) @@ -308,7 +308,7 @@ static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf, bool final; _count = min3(count, len, PAGE_SIZE); - nr_read = access_remote_vm(mm, p, page, _count, 0); + nr_read = access_remote_vm(mm, p, page, _count, FOLL_ANON); if (nr_read < 0) rv = nr_read; if (nr_read <= 0) @@ -357,7 +357,7 @@ skip_argv: bool final; _count = min3(count, len, PAGE_SIZE); - nr_read = access_remote_vm(mm, p, page, _count, 0); + nr_read = access_remote_vm(mm, p, page, _count, FOLL_ANON); if (nr_read < 0) rv = nr_read; if (nr_read <= 0) @@ -869,6 +869,7 @@ static ssize_t mem_rw(struct file *file, char __user *buf, unsigned long addr = *ppos; ssize_t copied; char *page; + unsigned int flags; if (!mm) return 0; @@ -881,6 +882,11 @@ static ssize_t mem_rw(struct file *file, char __user *buf, if (!atomic_inc_not_zero(&mm->mm_users)) goto free; + /* Maybe we should limit FOLL_FORCE to actual ptrace users? */ + flags = FOLL_FORCE; + if (write) + flags |= FOLL_WRITE; + while (count > 0) { int this_len = min_t(int, count, PAGE_SIZE); @@ -889,7 +895,7 @@ static ssize_t mem_rw(struct file *file, char __user *buf, break; } - this_len = access_remote_vm(mm, addr, page, this_len, write); + this_len = access_remote_vm(mm, addr, page, this_len, flags); if (!this_len) { if (!copied) copied = -EIO; @@ -1001,8 +1007,7 @@ static ssize_t environ_read(struct file *file, char __user *buf, max_len = min_t(size_t, PAGE_SIZE, count); this_len = min(max_len, this_len); - retval = access_remote_vm(mm, (env_start + src), - page, this_len, 0); + retval = access_remote_vm(mm, (env_start + src), page, this_len, FOLL_ANON); if (retval <= 0) { ret = retval; diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 5e1054f028af..c7e32a891502 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -1550,7 +1550,8 @@ static void drop_sysctl_table(struct ctl_table_header *header) if (--header->nreg) return; - put_links(header); + if (parent) + put_links(header); start_unregistering(header); if (!--header->count) kfree_rcu(header, rcu); diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index a83798cc448b..611bf28851c6 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -141,7 +141,7 @@ static void seq_print_vma_name(struct seq_file *m, struct vm_area_struct *vma) struct page *page; pages_pinned = get_user_pages(current, mm, page_start_vaddr, - 1, 0, 0, &page, NULL); + 1, 0, &page, NULL); if (pages_pinned < 1) { seq_puts(m, "]"); return; diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index 97526024c962..328e70ab7871 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -392,8 +392,8 @@ static void pstore_console_write(struct console *con, const char *s, unsigned c) } else { spin_lock_irqsave(&psinfo->buf_lock, flags); } - memcpy(psinfo->buf, s, c); - psinfo->write(PSTORE_TYPE_CONSOLE, 0, &id, 0, 0, 0, c, psinfo); + psinfo->write_buf(PSTORE_TYPE_CONSOLE, 0, &id, 0, + s, 0, c, psinfo); spin_unlock_irqrestore(&psinfo->buf_lock, flags); s += c; c = e - s; diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c index ecdb3baa1283..11e558efd61e 100644 --- a/fs/pstore/ram_core.c +++ b/fs/pstore/ram_core.c @@ -488,6 +488,11 @@ static int persistent_ram_post_init(struct persistent_ram_zone *prz, u32 sig, sig ^= PERSISTENT_RAM_SIG; if (prz->buffer->sig == sig) { + if (buffer_size(prz) == 0) { + pr_debug("found existing empty buffer\n"); + return 0; + } + if (buffer_size(prz) > prz->buffer_size || buffer_start(prz) > buffer_size(prz)) pr_info("found existing invalid buffer, size %zu, start %zu\n", diff --git a/fs/read_write.c b/fs/read_write.c index bfd1a5dddf6e..16e554ba885d 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -363,8 +363,10 @@ ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos) iter->type |= WRITE; ret = file->f_op->write_iter(&kiocb, iter); BUG_ON(ret == -EIOCBQUEUED); - if (ret > 0) + if (ret > 0) { *ppos = kiocb.ki_pos; + fsnotify_modify(file); + } return ret; } EXPORT_SYMBOL(vfs_iter_write); diff --git a/fs/super.c b/fs/super.c index 652c9e6b6b92..eb27955a229a 100644 --- a/fs/super.c +++ b/fs/super.c @@ -118,13 +118,23 @@ static unsigned long super_cache_count(struct shrinker *shrink, sb = container_of(shrink, struct super_block, s_shrink); /* - * Don't call trylock_super as it is a potential - * scalability bottleneck. The counts could get updated - * between super_cache_count and super_cache_scan anyway. - * Call to super_cache_count with shrinker_rwsem held - * ensures the safety of call to list_lru_shrink_count() and - * s_op->nr_cached_objects(). + * We don't call trylock_super() here as it is a scalability bottleneck, + * so we're exposed to partial setup state. The shrinker rwsem does not + * protect filesystem operations backing list_lru_shrink_count() or + * s_op->nr_cached_objects(). Counts can change between + * super_cache_count and super_cache_scan, so we really don't need locks + * here. + * + * However, if we are currently mounting the superblock, the underlying + * filesystem might be in a state of partial construction and hence it + * is dangerous to access it. trylock_super() uses a MS_BORN check to + * avoid this situation, so do the same here. The memory barrier is + * matched with the one in mount_fs() as we don't hold locks here. */ + if (!(sb->s_flags & MS_BORN)) + return 0; + smp_rmb(); + if (sb->s_op && sb->s_op->nr_cached_objects) total_objects = sb->s_op->nr_cached_objects(sb, sc); @@ -1151,6 +1161,14 @@ mount_fs(struct file_system_type *type, int flags, const char *name, struct vfsm sb = root->d_sb; BUG_ON(!sb); WARN_ON(!sb->s_bdi); + + /* + * Write barrier is for super_cache_count(). We place it before setting + * MS_BORN as the data dependency between the two functions is the + * superblock structure contents that we just set up, not the MS_BORN + * flag. + */ + smp_wmb(); sb->s_flags |= MS_BORN; error = security_sb_kern_mount(sb, flags, secdata); diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index 02fa1dcc5969..29f5b2e589a1 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c @@ -275,7 +275,7 @@ static int __sysv_write_inode(struct inode *inode, int wait) } } brelse(bh); - return 0; + return err; } int sysv_write_inode(struct inode *inode, struct writeback_control *wbc) diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 0e659d9c69a1..613193c6bb42 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -1364,6 +1364,12 @@ reread: iinfo->i_alloc_type = le16_to_cpu(fe->icbTag.flags) & ICBTAG_FLAG_AD_MASK; + if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_SHORT && + iinfo->i_alloc_type != ICBTAG_FLAG_AD_LONG && + iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) { + ret = -EIO; + goto out; + } iinfo->i_unique = 0; iinfo->i_lenEAttr = 0; iinfo->i_lenExtents = 0; diff --git a/fs/udf/truncate.c b/fs/udf/truncate.c index 42b8c57795cb..c6ce7503a329 100644 --- a/fs/udf/truncate.c +++ b/fs/udf/truncate.c @@ -260,6 +260,9 @@ void udf_truncate_extents(struct inode *inode) epos.block = eloc; epos.bh = udf_tread(sb, udf_get_lb_pblock(sb, &eloc, 0)); + /* Error reading indirect block? */ + if (!epos.bh) + return; if (elen) indirect_ext_len = (elen + sb->s_blocksize - 1) >> diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index d859d8bd1f96..e7cc0d860499 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -792,6 +792,18 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx, if (cur->vm_ops) goto out_unlock; + /* + * UFFDIO_COPY will fill file holes even without + * PROT_WRITE. This check enforces that if this is a + * MAP_SHARED, the process has write permission to the backing + * file. If VM_MAYWRITE is set it also enforces that on a + * MAP_SHARED vma: there is no F_WRITE_SEAL and no further + * F_WRITE_SEAL can be taken until the vma is destroyed. + */ + ret = -EPERM; + if (unlikely(!(cur->vm_flags & VM_MAYWRITE))) + goto out_unlock; + /* * Check that this vma isn't already owned by a * different userfaultfd. We can't allow more than one @@ -817,6 +829,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx, BUG_ON(vma->vm_ops); BUG_ON(vma->vm_userfaultfd_ctx.ctx && vma->vm_userfaultfd_ctx.ctx != ctx); + WARN_ON(!(vma->vm_flags & VM_MAYWRITE)); /* * Nothing to do: this vma is already registered into this @@ -953,6 +966,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx, cond_resched(); BUG_ON(vma->vm_ops); + WARN_ON(!(vma->vm_flags & VM_MAYWRITE)); /* * Nothing to do: this vma is already registered into this diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index fb9636cc927c..5d8d12746e6e 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -528,7 +528,14 @@ xfs_attr_shortform_addname(xfs_da_args_t *args) if (args->flags & ATTR_CREATE) return retval; retval = xfs_attr_shortform_remove(args); - ASSERT(retval == 0); + if (retval) + return retval; + /* + * Since we have removed the old attr, clear ATTR_REPLACE so + * that the leaf format add routine won't trip over the attr + * not being around. + */ + args->flags &= ~ATTR_REPLACE; } if (args->namelen >= XFS_ATTR_SF_ENTSIZE_MAX || diff --git a/include/keys/user-type.h b/include/keys/user-type.h index c56fef40f53e..5d744ec8f644 100644 --- a/include/keys/user-type.h +++ b/include/keys/user-type.h @@ -31,7 +31,7 @@ struct user_key_payload { struct rcu_head rcu; /* RCU destructor */ unsigned short datalen; /* length of this data */ - char data[0]; /* actual data */ + char data[0] __aligned(__alignof__(u64)); /* actual data */ }; extern struct key_type key_type_user; diff --git a/include/linux/atalk.h b/include/linux/atalk.h index 73fd8b7e9534..af43ed404ff4 100644 --- a/include/linux/atalk.h +++ b/include/linux/atalk.h @@ -150,19 +150,29 @@ extern int sysctl_aarp_retransmit_limit; extern int sysctl_aarp_resolve_time; #ifdef CONFIG_SYSCTL -extern void atalk_register_sysctl(void); +extern int atalk_register_sysctl(void); extern void atalk_unregister_sysctl(void); #else -#define atalk_register_sysctl() do { } while(0) -#define atalk_unregister_sysctl() do { } while(0) +static inline int atalk_register_sysctl(void) +{ + return 0; +} +static inline void atalk_unregister_sysctl(void) +{ +} #endif #ifdef CONFIG_PROC_FS extern int atalk_proc_init(void); extern void atalk_proc_exit(void); #else -#define atalk_proc_init() ({ 0; }) -#define atalk_proc_exit() do { } while(0) +static inline int atalk_proc_init(void) +{ + return 0; +} +static inline void atalk_proc_exit(void) +{ +} #endif /* CONFIG_PROC_FS */ #endif /* __LINUX_ATALK_H__ */ diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index a307c37c2e6c..f2b0702df835 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -136,6 +136,7 @@ struct bdi_writeback { struct backing_dev_info { struct list_head bdi_list; unsigned long ra_pages; /* max readahead in PAGE_CACHE_SIZE units */ + unsigned long io_pages; /* max allowed IO size */ unsigned int capabilities; /* Device capabilities */ congested_fn *congested_fn; /* Function pointer if device is md/dm */ void *congested_data; /* Pointer to aux data for congested func */ @@ -225,6 +226,14 @@ static inline void wb_get(struct bdi_writeback *wb) */ static inline void wb_put(struct bdi_writeback *wb) { + if (WARN_ON_ONCE(!wb->bdi)) { + /* + * A driver bug might cause a file to be removed before bdi was + * initialized. + */ + return; + } + if (wb != &wb->bdi->wb) percpu_ref_put(&wb->refcnt); } diff --git a/include/linux/bitrev.h b/include/linux/bitrev.h index fb790b8449c1..333e42cf08de 100644 --- a/include/linux/bitrev.h +++ b/include/linux/bitrev.h @@ -31,32 +31,32 @@ static inline u32 __bitrev32(u32 x) #define __constant_bitrev32(x) \ ({ \ - u32 __x = x; \ - __x = (__x >> 16) | (__x << 16); \ - __x = ((__x & (u32)0xFF00FF00UL) >> 8) | ((__x & (u32)0x00FF00FFUL) << 8); \ - __x = ((__x & (u32)0xF0F0F0F0UL) >> 4) | ((__x & (u32)0x0F0F0F0FUL) << 4); \ - __x = ((__x & (u32)0xCCCCCCCCUL) >> 2) | ((__x & (u32)0x33333333UL) << 2); \ - __x = ((__x & (u32)0xAAAAAAAAUL) >> 1) | ((__x & (u32)0x55555555UL) << 1); \ - __x; \ + u32 ___x = x; \ + ___x = (___x >> 16) | (___x << 16); \ + ___x = ((___x & (u32)0xFF00FF00UL) >> 8) | ((___x & (u32)0x00FF00FFUL) << 8); \ + ___x = ((___x & (u32)0xF0F0F0F0UL) >> 4) | ((___x & (u32)0x0F0F0F0FUL) << 4); \ + ___x = ((___x & (u32)0xCCCCCCCCUL) >> 2) | ((___x & (u32)0x33333333UL) << 2); \ + ___x = ((___x & (u32)0xAAAAAAAAUL) >> 1) | ((___x & (u32)0x55555555UL) << 1); \ + ___x; \ }) #define __constant_bitrev16(x) \ ({ \ - u16 __x = x; \ - __x = (__x >> 8) | (__x << 8); \ - __x = ((__x & (u16)0xF0F0U) >> 4) | ((__x & (u16)0x0F0FU) << 4); \ - __x = ((__x & (u16)0xCCCCU) >> 2) | ((__x & (u16)0x3333U) << 2); \ - __x = ((__x & (u16)0xAAAAU) >> 1) | ((__x & (u16)0x5555U) << 1); \ - __x; \ + u16 ___x = x; \ + ___x = (___x >> 8) | (___x << 8); \ + ___x = ((___x & (u16)0xF0F0U) >> 4) | ((___x & (u16)0x0F0FU) << 4); \ + ___x = ((___x & (u16)0xCCCCU) >> 2) | ((___x & (u16)0x3333U) << 2); \ + ___x = ((___x & (u16)0xAAAAU) >> 1) | ((___x & (u16)0x5555U) << 1); \ + ___x; \ }) #define __constant_bitrev8(x) \ ({ \ - u8 __x = x; \ - __x = (__x >> 4) | (__x << 4); \ - __x = ((__x & (u8)0xCCU) >> 2) | ((__x & (u8)0x33U) << 2); \ - __x = ((__x & (u8)0xAAU) >> 1) | ((__x & (u8)0x55U) << 1); \ - __x; \ + u8 ___x = x; \ + ___x = (___x >> 4) | (___x << 4); \ + ___x = ((___x & (u8)0xCCU) >> 2) | ((___x & (u8)0x33U) << 2); \ + ___x = ((___x & (u8)0xAAU) >> 1) | ((___x & (u8)0x55U) << 1); \ + ___x; \ }) #define bitrev32(x) \ diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 8a4b9e22d616..f76c3440289b 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -228,20 +228,12 @@ __ATTR(_name, _perm, show_##_name, NULL) static struct freq_attr _name = \ __ATTR(_name, 0644, show_##_name, store_##_name) -struct global_attr { - struct attribute attr; - ssize_t (*show)(struct kobject *kobj, - struct attribute *attr, char *buf); - ssize_t (*store)(struct kobject *a, struct attribute *b, - const char *c, size_t count); -}; - #define define_one_global_ro(_name) \ -static struct global_attr _name = \ +static struct kobj_attribute _name = \ __ATTR(_name, 0444, show_##_name, NULL) #define define_one_global_rw(_name) \ -static struct global_attr _name = \ +static struct kobj_attribute _name = \ __ATTR(_name, 0644, show_##_name, store_##_name) diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 0aa9a374cc46..9df992b818a4 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -161,8 +161,8 @@ struct dentry_operations { struct vfsmount *(*d_automount)(struct path *); int (*d_manage)(struct dentry *, bool); struct inode *(*d_select_inode)(struct dentry *, unsigned); - void (*d_canonical_path)(const struct path *, struct path *); struct dentry *(*d_real)(struct dentry *, struct inode *); + void (*d_canonical_path)(const struct path *, struct path *); } ____cacheline_aligned; /* diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index f3f87db34429..4a08fef28acc 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -606,7 +606,7 @@ extern struct ratelimit_state dm_ratelimit_state; */ #define dm_target_offset(ti, sector) ((sector) - (ti)->begin) -static inline sector_t to_sector(unsigned long n) +static inline sector_t to_sector(unsigned long long n) { return (n >> SECTOR_SHIFT); } diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 162f83358abb..b19433738690 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -116,6 +116,7 @@ struct f2fs_super_block { /* * For checkpoint */ +#define CP_DISABLED_QUICK_FLAG 0x00002000 #define CP_DISABLED_FLAG 0x00001000 #define CP_QUOTA_NEED_FSCK_FLAG 0x00000800 #define CP_LARGE_NAT_BITMAP_FLAG 0x00000400 @@ -186,7 +187,7 @@ struct f2fs_orphan_block { struct f2fs_extent { __le32 fofs; /* start file offset of the extent */ __le32 blk; /* start block address of the extent */ - __le32 len; /* lengh of the extent */ + __le32 len; /* length of the extent */ } __packed; #define F2FS_NAME_LEN 255 @@ -284,7 +285,7 @@ enum { struct node_footer { __le32 nid; /* node id */ - __le32 ino; /* inode nunmber */ + __le32 ino; /* inode number */ __le32 flag; /* include cold/fsync/dentry marks and offset */ __le64 cp_ver; /* checkpoint version */ __le32 next_blkaddr; /* next node page block address */ @@ -489,12 +490,12 @@ typedef __le32 f2fs_hash_t; /* * space utilization of regular dentry and inline dentry (w/o extra reservation) - * regular dentry inline dentry - * bitmap 1 * 27 = 27 1 * 23 = 23 - * reserved 1 * 3 = 3 1 * 7 = 7 - * dentry 11 * 214 = 2354 11 * 182 = 2002 - * filename 8 * 214 = 1712 8 * 182 = 1456 - * total 4096 3488 + * regular dentry inline dentry (def) inline dentry (min) + * bitmap 1 * 27 = 27 1 * 23 = 23 1 * 1 = 1 + * reserved 1 * 3 = 3 1 * 7 = 7 1 * 1 = 1 + * dentry 11 * 214 = 2354 11 * 182 = 2002 11 * 2 = 22 + * filename 8 * 214 = 1712 8 * 182 = 1456 8 * 2 = 16 + * total 4096 3488 40 * * Note: there are more reserved space in inline dentry than in regular * dentry, when converting inline dentry we should handle this carefully. @@ -506,12 +507,13 @@ typedef __le32 f2fs_hash_t; #define SIZE_OF_RESERVED (PAGE_SIZE - ((SIZE_OF_DIR_ENTRY + \ F2FS_SLOT_LEN) * \ NR_DENTRY_IN_BLOCK + SIZE_OF_DENTRY_BITMAP)) +#define MIN_INLINE_DENTRY_SIZE 40 /* just include '.' and '..' entries */ /* One directory entry slot representing F2FS_SLOT_LEN-sized file name */ struct f2fs_dir_entry { __le32 hash_code; /* hash code of file name */ __le32 ino; /* inode number */ - __le16 name_len; /* lengh of file name */ + __le16 name_len; /* length of file name */ __u8 file_type; /* file type */ } __packed; diff --git a/include/linux/fs.h b/include/linux/fs.h index 8544edd381a4..d3e971c185e4 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2671,6 +2671,7 @@ static inline void lockdep_annotate_inode_mutex_key(struct inode *inode) { }; #endif extern void unlock_new_inode(struct inode *); extern unsigned int get_next_ino(void); +extern void evict_inodes(struct super_block *sb); extern void __iget(struct inode * inode); extern void iget_failed(struct inode *); diff --git a/include/linux/genl_magic_struct.h b/include/linux/genl_magic_struct.h index eecd19b37001..250e9be65e74 100644 --- a/include/linux/genl_magic_struct.h +++ b/include/linux/genl_magic_struct.h @@ -185,6 +185,7 @@ static inline void ct_assert_unique_operations(void) { switch (0) { #include GENL_MAGIC_INCLUDE_FILE + case 0: ; } } @@ -203,6 +204,7 @@ static inline void ct_assert_unique_top_level_attributes(void) { switch (0) { #include GENL_MAGIC_INCLUDE_FILE + case 0: ; } } @@ -212,7 +214,8 @@ static inline void ct_assert_unique_top_level_attributes(void) static inline void ct_assert_unique_ ## s_name ## _attributes(void) \ { \ switch (0) { \ - s_fields \ + s_fields \ + case 0: \ ; \ } \ } diff --git a/include/linux/hashtable.h b/include/linux/hashtable.h index 519b6e2d769e..661e5c2a8e2a 100644 --- a/include/linux/hashtable.h +++ b/include/linux/hashtable.h @@ -16,6 +16,10 @@ struct hlist_head name[1 << (bits)] = \ { [0 ... ((1 << (bits)) - 1)] = HLIST_HEAD_INIT } +#define DEFINE_READ_MOSTLY_HASHTABLE(name, bits) \ + struct hlist_head name[1 << (bits)] __read_mostly = \ + { [0 ... ((1 << (bits)) - 1)] = HLIST_HEAD_INIT } + #define DECLARE_HASHTABLE(name, bits) \ struct hlist_head name[1 << (bits)] diff --git a/include/linux/hid-debug.h b/include/linux/hid-debug.h index 8663f216c563..2d6100edf204 100644 --- a/include/linux/hid-debug.h +++ b/include/linux/hid-debug.h @@ -24,7 +24,10 @@ #ifdef CONFIG_DEBUG_FS +#include + #define HID_DEBUG_BUFSIZE 512 +#define HID_DEBUG_FIFOSIZE 512 void hid_dump_input(struct hid_device *, struct hid_usage *, __s32); void hid_dump_report(struct hid_device *, int , u8 *, int); @@ -37,11 +40,8 @@ void hid_debug_init(void); void hid_debug_exit(void); void hid_debug_event(struct hid_device *, char *); - struct hid_debug_list { - char *hid_debug_buf; - int head; - int tail; + DECLARE_KFIFO_PTR(hid_debug_fifo, char); struct fasync_struct *fasync; struct hid_device *hdev; struct list_head node; @@ -64,4 +64,3 @@ struct hid_debug_list { #endif #endif - diff --git a/include/linux/kobject.h b/include/linux/kobject.h index e6284591599e..5957c6a3fd7f 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -113,6 +113,23 @@ extern void kobject_put(struct kobject *kobj); extern const void *kobject_namespace(struct kobject *kobj); extern char *kobject_get_path(struct kobject *kobj, gfp_t flag); +/** + * kobject_has_children - Returns whether a kobject has children. + * @kobj: the object to test + * + * This will return whether a kobject has other kobjects as children. + * + * It does NOT account for the presence of attribute files, only sub + * directories. It also assumes there is no concurrent addition or + * removal of such children, and thus relies on external locking. + */ +static inline bool kobject_has_children(struct kobject *kobj) +{ + WARN_ON_ONCE(atomic_read(&kobj->kref.refcount) == 0); + + return kobj->sd && kobj->sd->dir.subdirs; +} + struct kobj_type { void (*release)(struct kobject *kobj); const struct sysfs_ops *sysfs_ops; diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index e23392517db9..cb527c78de9f 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -197,6 +197,7 @@ struct kretprobe_instance { struct kretprobe *rp; kprobe_opcode_t *ret_addr; struct task_struct *task; + void *fp; char data[0]; }; diff --git a/include/linux/mm.h b/include/linux/mm.h index e3e9d6848fee..dd6462142334 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1207,7 +1207,7 @@ static inline int fixup_user_fault(struct task_struct *tsk, extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write); extern int access_remote_vm(struct mm_struct *mm, unsigned long addr, - void *buf, int len, int write); + void *buf, int len, unsigned int gup_flags); long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, @@ -1215,19 +1215,17 @@ long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, struct vm_area_struct **vmas, int *nonblocking); long get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages, + unsigned int gup_flags, struct page **pages, struct vm_area_struct **vmas); long get_user_pages_locked(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages, - int *locked); + unsigned int gup_flags, struct page **pages, int *locked); long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages, - unsigned int gup_flags); + struct page **pages, unsigned int gup_flags); long get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages); + struct page **pages, unsigned int gup_flags); int get_user_pages_fast(unsigned long start, int nr_pages, int write, struct page **pages); @@ -1245,7 +1243,7 @@ struct frame_vector { struct frame_vector *frame_vector_create(unsigned int nr_frames); void frame_vector_destroy(struct frame_vector *vec); int get_vaddr_frames(unsigned long start, unsigned int nr_pfns, - bool write, bool force, struct frame_vector *vec); + unsigned int gup_flags, struct frame_vector *vec); void put_vaddr_frames(struct frame_vector *vec); int frame_vector_to_pages(struct frame_vector *vec); void frame_vector_to_pfns(struct frame_vector *vec); @@ -2138,6 +2136,7 @@ static inline struct page *follow_page(struct vm_area_struct *vma, #define FOLL_TRIED 0x800 /* a retry, previous pass started an IO */ #define FOLL_MLOCK 0x1000 /* lock present pages */ #define FOLL_COW 0x4000 /* internal GUP flag */ +#define FOLL_ANON 0x8000 /* don't do file mappings */ typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, void *data); diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index f0d87347df19..0508fcc67480 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -11,6 +11,8 @@ #define _LINUX_NETDEV_FEATURES_H #include +#include +#include typedef u64 netdev_features_t; @@ -125,8 +127,26 @@ enum { #define NETIF_F_HW_L2FW_DOFFLOAD __NETIF_F(HW_L2FW_DOFFLOAD) #define NETIF_F_BUSY_POLL __NETIF_F(BUSY_POLL) -#define for_each_netdev_feature(mask_addr, bit) \ - for_each_set_bit(bit, (unsigned long *)mask_addr, NETDEV_FEATURE_COUNT) +/* Finds the next feature with the highest number of the range of start till 0. + */ +static inline int find_next_netdev_feature(u64 feature, unsigned long start) +{ + /* like BITMAP_LAST_WORD_MASK() for u64 + * this sets the most significant 64 - start to 0. + */ + feature &= ~0ULL >> (-start & ((sizeof(feature) * 8) - 1)); + + return fls64(feature) - 1; +} + +/* This goes for the MSB to the LSB through the set feature bits, + * mask_addr should be a u64 and bit an int + */ +#define for_each_netdev_feature(mask_addr, bit) \ + for ((bit) = find_next_netdev_feature((mask_addr), \ + NETDEV_FEATURE_COUNT); \ + (bit) >= 0; \ + (bit) = find_next_netdev_feature((mask_addr), (bit) - 1)) /* Features valid for ethtool to change */ /* = all defined minus driver/device-class-related */ diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 907f3fd191ac..3e28a1a8d823 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -65,8 +65,8 @@ struct k_itimer { spinlock_t it_lock; clockid_t it_clock; /* which timer type */ timer_t it_id; /* timer id */ - int it_overrun; /* overrun on pending signal */ - int it_overrun_last; /* overrun on last delivered signal */ + s64 it_overrun; /* overrun on pending signal */ + s64 it_overrun_last; /* overrun on last delivered signal */ int it_requeue_pending; /* waiting to requeue this timer */ #define REQUEUE_PENDING 1 int it_sigev_notify; /* notify word of sigevent struct */ diff --git a/include/linux/rculist.h b/include/linux/rculist.h index 0c94d17a4642..c22ce750df4e 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -399,6 +399,42 @@ static inline void hlist_add_head_rcu(struct hlist_node *n, first->pprev = &n->next; } +/** + * hlist_add_tail_rcu + * @n: the element to add to the hash list. + * @h: the list to add to. + * + * Description: + * Adds the specified element to the specified hlist, + * while permitting racing traversals. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as hlist_add_head_rcu() + * or hlist_del_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * hlist_for_each_entry_rcu(), used to prevent memory-consistency + * problems on Alpha CPUs. Regardless of the type of CPU, the + * list-traversal primitive must be guarded by rcu_read_lock(). + */ +static inline void hlist_add_tail_rcu(struct hlist_node *n, + struct hlist_head *h) +{ + struct hlist_node *i, *last = NULL; + + for (i = hlist_first_rcu(h); i; i = hlist_next_rcu(i)) + last = i; + + if (last) { + n->next = last->next; + n->pprev = &last->next; + rcu_assign_pointer(hlist_next_rcu(last), n); + } else { + hlist_add_head_rcu(n, h); + } +} + /** * hlist_add_before_rcu * @n: the new element to add to the hash list. diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index e50b31d18462..e97cdfd6cba9 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -133,23 +133,23 @@ struct rhashtable_params { /** * struct rhashtable - Hash table handle * @tbl: Bucket table - * @nelems: Number of elements in table * @key_len: Key length for hashfn * @elasticity: Maximum chain length before rehash * @p: Configuration parameters * @run_work: Deferred worker to expand/shrink asynchronously * @mutex: Mutex to protect current/future table swapping * @lock: Spin lock to protect walker list + * @nelems: Number of elements in table */ struct rhashtable { struct bucket_table __rcu *tbl; - atomic_t nelems; unsigned int key_len; unsigned int elasticity; struct rhashtable_params p; struct work_struct run_work; struct mutex mutex; spinlock_t lock; + atomic_t nelems; }; /** @@ -343,7 +343,8 @@ int rhashtable_init(struct rhashtable *ht, struct bucket_table *rhashtable_insert_slow(struct rhashtable *ht, const void *key, struct rhash_head *obj, - struct bucket_table *old_tbl); + struct bucket_table *old_tbl, + void **data); int rhashtable_insert_rehash(struct rhashtable *ht, struct bucket_table *tbl); int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter); @@ -514,18 +515,8 @@ static inline int rhashtable_compare(struct rhashtable_compare_arg *arg, return memcmp(ptr + ht->p.key_offset, arg->key, ht->p.key_len); } -/** - * rhashtable_lookup_fast - search hash table, inlined version - * @ht: hash table - * @key: the pointer to the key - * @params: hash table parameters - * - * Computes the hash value for the key and traverses the bucket chain looking - * for a entry with an identical key. The first matching entry is returned. - * - * Returns the first entry on which the compare function returned true. - */ -static inline void *rhashtable_lookup_fast( +/* Internal function, do not use. */ +static inline struct rhash_head *__rhashtable_lookup( struct rhashtable *ht, const void *key, const struct rhashtable_params params) { @@ -537,8 +528,6 @@ static inline void *rhashtable_lookup_fast( struct rhash_head *he; unsigned int hash; - rcu_read_lock(); - tbl = rht_dereference_rcu(ht->tbl, ht); restart: hash = rht_key_hashfn(ht, tbl, key, params); @@ -547,8 +536,7 @@ restart: params.obj_cmpfn(&arg, rht_obj(ht, he)) : rhashtable_compare(&arg, rht_obj(ht, he))) continue; - rcu_read_unlock(); - return rht_obj(ht, he); + return he; } /* Ensure we see any new tables. */ @@ -557,13 +545,64 @@ restart: tbl = rht_dereference_rcu(tbl->future_tbl, ht); if (unlikely(tbl)) goto restart; - rcu_read_unlock(); return NULL; } -/* Internal function, please use rhashtable_insert_fast() instead */ -static inline int __rhashtable_insert_fast( +/** + * rhashtable_lookup - search hash table + * @ht: hash table + * @key: the pointer to the key + * @params: hash table parameters + * + * Computes the hash value for the key and traverses the bucket chain looking + * for a entry with an identical key. The first matching entry is returned. + * + * This must only be called under the RCU read lock. + * + * Returns the first entry on which the compare function returned true. + */ +static inline void *rhashtable_lookup( + struct rhashtable *ht, const void *key, + const struct rhashtable_params params) +{ + struct rhash_head *he = __rhashtable_lookup(ht, key, params); + + return he ? rht_obj(ht, he) : NULL; +} + +/** + * rhashtable_lookup_fast - search hash table, without RCU read lock + * @ht: hash table + * @key: the pointer to the key + * @params: hash table parameters + * + * Computes the hash value for the key and traverses the bucket chain looking + * for a entry with an identical key. The first matching entry is returned. + * + * Only use this function when you have other mechanisms guaranteeing + * that the object won't go away after the RCU read lock is released. + * + * Returns the first entry on which the compare function returned true. + */ +static inline void *rhashtable_lookup_fast( + struct rhashtable *ht, const void *key, + const struct rhashtable_params params) +{ + void *obj; + + rcu_read_lock(); + obj = rhashtable_lookup(ht, key, params); + rcu_read_unlock(); + + return obj; +} + +/* Internal function, please use rhashtable_insert_fast() instead. This + * function returns the existing element already in hashes in there is a clash, + * otherwise it returns an error via ERR_PTR(). + */ +static inline void *__rhashtable_insert_fast( struct rhashtable *ht, const void *key, struct rhash_head *obj, const struct rhashtable_params params) { @@ -576,6 +615,7 @@ static inline int __rhashtable_insert_fast( spinlock_t *lock; unsigned int elasticity; unsigned int hash; + void *data = NULL; int err; restart: @@ -600,11 +640,14 @@ restart: new_tbl = rht_dereference_rcu(tbl->future_tbl, ht); if (unlikely(new_tbl)) { - tbl = rhashtable_insert_slow(ht, key, obj, new_tbl); + tbl = rhashtable_insert_slow(ht, key, obj, new_tbl, &data); if (!IS_ERR_OR_NULL(tbl)) goto slow_path; err = PTR_ERR(tbl); + if (err == -EEXIST) + err = 0; + goto out; } @@ -618,25 +661,25 @@ slow_path: err = rhashtable_insert_rehash(ht, tbl); rcu_read_unlock(); if (err) - return err; + return ERR_PTR(err); goto restart; } - err = -EEXIST; + err = 0; elasticity = ht->elasticity; rht_for_each(head, tbl, hash) { if (key && unlikely(!(params.obj_cmpfn ? params.obj_cmpfn(&arg, rht_obj(ht, head)) : - rhashtable_compare(&arg, rht_obj(ht, head))))) + rhashtable_compare(&arg, rht_obj(ht, head))))) { + data = rht_obj(ht, head); goto out; + } if (!--elasticity) goto slow_path; } - err = 0; - head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash); RCU_INIT_POINTER(obj->next, head); @@ -651,7 +694,7 @@ out: spin_unlock_bh(lock); rcu_read_unlock(); - return err; + return err ? ERR_PTR(err) : data; } /** @@ -674,7 +717,13 @@ static inline int rhashtable_insert_fast( struct rhashtable *ht, struct rhash_head *obj, const struct rhashtable_params params) { - return __rhashtable_insert_fast(ht, NULL, obj, params); + void *ret; + + ret = __rhashtable_insert_fast(ht, NULL, obj, params); + if (IS_ERR(ret)) + return PTR_ERR(ret); + + return ret == NULL ? 0 : -EEXIST; } /** @@ -703,11 +752,15 @@ static inline int rhashtable_lookup_insert_fast( const struct rhashtable_params params) { const char *key = rht_obj(ht, obj); + void *ret; BUG_ON(ht->p.obj_hashfn); - return __rhashtable_insert_fast(ht, key + ht->p.key_offset, obj, - params); + ret = __rhashtable_insert_fast(ht, key + ht->p.key_offset, obj, params); + if (IS_ERR(ret)) + return PTR_ERR(ret); + + return ret == NULL ? 0 : -EEXIST; } /** @@ -735,6 +788,32 @@ static inline int rhashtable_lookup_insert_fast( static inline int rhashtable_lookup_insert_key( struct rhashtable *ht, const void *key, struct rhash_head *obj, const struct rhashtable_params params) +{ + void *ret; + + BUG_ON(!ht->p.obj_hashfn || !key); + + ret = __rhashtable_insert_fast(ht, key, obj, params); + if (IS_ERR(ret)) + return PTR_ERR(ret); + + return ret == NULL ? 0 : -EEXIST; +} + +/** + * rhashtable_lookup_get_insert_key - lookup and insert object into hash table + * @ht: hash table + * @obj: pointer to hash head inside object + * @params: hash table parameters + * @data: pointer to element data already in hashes + * + * Just like rhashtable_lookup_insert_key(), but this function returns the + * object if it exists, NULL if it does not and the insertion was successful, + * and an ERR_PTR otherwise. + */ +static inline void *rhashtable_lookup_get_insert_key( + struct rhashtable *ht, const void *key, struct rhash_head *obj, + const struct rhashtable_params params) { BUG_ON(!ht->p.obj_hashfn || !key); diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 19d0778ec382..121c8f99ecdd 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -125,7 +125,7 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts, unsigned long *lost_events); struct ring_buffer_iter * -ring_buffer_read_prepare(struct ring_buffer *buffer, int cpu); +ring_buffer_read_prepare(struct ring_buffer *buffer, int cpu, gfp_t flags); void ring_buffer_read_prepare_sync(void); void ring_buffer_read_start(struct ring_buffer_iter *iter); void ring_buffer_read_finish(struct ring_buffer_iter *iter); diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index a490dd718654..502787c29ce9 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -556,9 +556,14 @@ struct sk_buff { struct skb_mstamp skb_mstamp; }; }; - struct rb_node rbnode; /* used in netem & tcp stack */ + struct rb_node rbnode; /* used in netem, ip4 defrag, and tcp stack */ }; - struct sock *sk; + + union { + struct sock *sk; + int ip_defrag_offset; + }; + struct net_device *dev; /* @@ -2273,7 +2278,7 @@ static inline void __skb_queue_purge(struct sk_buff_head *list) kfree_skb(skb); } -void skb_rbtree_purge(struct rb_root *root); +unsigned int skb_rbtree_purge(struct rb_root *root); void *netdev_alloc_frag(unsigned int fragsz); @@ -2791,6 +2796,7 @@ static inline unsigned char *skb_push_rcsum(struct sk_buff *skb, return skb->data; } +int pskb_trim_rcsum_slow(struct sk_buff *skb, unsigned int len); /** * pskb_trim_rcsum - trim received skb and update checksum * @skb: buffer to trim @@ -2798,15 +2804,14 @@ static inline unsigned char *skb_push_rcsum(struct sk_buff *skb, * * This is exactly the same as pskb_trim except that it ensures the * checksum of received packets are still valid after the operation. + * It can change skb pointers. */ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len) { if (likely(len >= skb->len)) return 0; - if (skb->ip_summed == CHECKSUM_COMPLETE) - skb->ip_summed = CHECKSUM_NONE; - return __pskb_trim(skb, len); + return pskb_trim_rcsum_slow(skb, len); } #define rb_to_skb(rb) rb_entry_safe(rb, struct sk_buff, rbnode) diff --git a/include/linux/string.h b/include/linux/string.h index c0c8878e321e..0af2f63824b5 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -26,7 +26,7 @@ extern char * strncpy(char *,const char *, __kernel_size_t); size_t strlcpy(char *, const char *, size_t); #endif #ifndef __HAVE_ARCH_STRSCPY -ssize_t __must_check strscpy(char *, const char *, size_t); +ssize_t strscpy(char *, const char *, size_t); #endif #ifndef __HAVE_ARCH_STRCAT extern char * strcat(char *, const char *); @@ -110,6 +110,9 @@ extern void * memscan(void *,int,__kernel_size_t); #ifndef __HAVE_ARCH_MEMCMP extern int memcmp(const void *,const void *,__kernel_size_t); #endif +#ifndef __HAVE_ARCH_BCMP +extern int bcmp(const void *,const void *,__kernel_size_t); +#endif #ifndef __HAVE_ARCH_MEMCHR extern void * memchr(const void *,int,__kernel_size_t); #endif diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index cc0fc712bb82..a8ac3f25b4ec 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -290,9 +290,12 @@ struct svc_rqst { struct svc_cacherep * rq_cacherep; /* cache info */ struct task_struct *rq_task; /* service thread */ spinlock_t rq_lock; /* per-request lock */ + struct net *rq_bc_net; /* pointer to backchannel's + * net namespace + */ }; -#define SVC_NET(svc_rqst) (svc_rqst->rq_xprt->xpt_net) +#define SVC_NET(rqst) (rqst->rq_xprt ? rqst->rq_xprt->xpt_net : rqst->rq_bc_net) /* * Rigorous type checking on sockaddr type conversions diff --git a/include/linux/swap.h b/include/linux/swap.h index a3c65d09e6d9..555ff8d90dbc 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -135,9 +135,9 @@ struct swap_extent { /* * Max bad pages in the new format.. */ -#define __swapoffset(x) ((unsigned long)&((union swap_header *)0)->x) #define MAX_SWAP_BADPAGES \ - ((__swapoffset(magic.magic) - __swapoffset(info.badpages)) / sizeof(int)) + ((offsetof(union swap_header, magic.magic) - \ + offsetof(union swap_header, info.badpages)) / sizeof(int)) enum { SWP_USED = (1 << 0), /* is slot in swap_info[] used? */ diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index e5ce8ab0b8b0..26c155bb639b 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -70,7 +70,7 @@ struct virtio_config_ops { int (*find_vqs)(struct virtio_device *, unsigned nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], - const char *names[]); + const char * const names[]); void (*del_vqs)(struct virtio_device *); u64 (*get_features)(struct virtio_device *vdev); int (*finalize_features)(struct virtio_device *vdev); @@ -149,6 +149,19 @@ static inline bool virtio_has_feature(const struct virtio_device *vdev, return __virtio_test_bit(vdev, fbit); } +/** + * virtio_has_iommu_quirk - determine whether this device has the iommu quirk + * @vdev: the device + */ +static inline bool virtio_has_iommu_quirk(const struct virtio_device *vdev) +{ + /* + * Note the reverse polarity of the quirk feature (compared to most + * other features), this is for compatibility with legacy systems. + */ + return !virtio_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM); +} + static inline struct virtqueue *virtio_find_single_vq(struct virtio_device *vdev, vq_callback_t *c, const char *n) diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h new file mode 100644 index 000000000000..584f9a647ad4 --- /dev/null +++ b/include/linux/virtio_vsock.h @@ -0,0 +1,157 @@ +#ifndef _LINUX_VIRTIO_VSOCK_H +#define _LINUX_VIRTIO_VSOCK_H + +#include +#include +#include +#include + +#define VIRTIO_VSOCK_DEFAULT_MIN_BUF_SIZE 128 +#define VIRTIO_VSOCK_DEFAULT_BUF_SIZE (1024 * 256) +#define VIRTIO_VSOCK_DEFAULT_MAX_BUF_SIZE (1024 * 256) +#define VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE (1024 * 4) +#define VIRTIO_VSOCK_MAX_BUF_SIZE 0xFFFFFFFFUL +#define VIRTIO_VSOCK_MAX_PKT_BUF_SIZE (1024 * 64) + +enum { + VSOCK_VQ_RX = 0, /* for host to guest data */ + VSOCK_VQ_TX = 1, /* for guest to host data */ + VSOCK_VQ_EVENT = 2, + VSOCK_VQ_MAX = 3, +}; + +/* Per-socket state (accessed via vsk->trans) */ +struct virtio_vsock_sock { + struct vsock_sock *vsk; + + /* Protected by lock_sock(sk_vsock(trans->vsk)) */ + u32 buf_size; + u32 buf_size_min; + u32 buf_size_max; + + spinlock_t tx_lock; + spinlock_t rx_lock; + + /* Protected by tx_lock */ + u32 tx_cnt; + u32 buf_alloc; + u32 peer_fwd_cnt; + u32 peer_buf_alloc; + + /* Protected by rx_lock */ + u32 fwd_cnt; + u32 rx_bytes; + struct list_head rx_queue; +}; + +struct virtio_vsock_pkt { + struct virtio_vsock_hdr hdr; + struct work_struct work; + struct list_head list; + /* socket refcnt not held, only use for cancellation */ + struct vsock_sock *vsk; + void *buf; + u32 len; + u32 off; + bool reply; +}; + +struct virtio_vsock_pkt_info { + u32 remote_cid, remote_port; + struct vsock_sock *vsk; + struct msghdr *msg; + u32 pkt_len; + u16 type; + u16 op; + u32 flags; + bool reply; +}; + +struct virtio_transport { + /* This must be the first field */ + struct vsock_transport transport; + + /* Takes ownership of the packet */ + int (*send_pkt)(struct virtio_vsock_pkt *pkt); +}; + +ssize_t +virtio_transport_stream_dequeue(struct vsock_sock *vsk, + struct msghdr *msg, + size_t len, + int type); +int +virtio_transport_dgram_dequeue(struct vsock_sock *vsk, + struct msghdr *msg, + size_t len, int flags); + +s64 virtio_transport_stream_has_data(struct vsock_sock *vsk); +s64 virtio_transport_stream_has_space(struct vsock_sock *vsk); + +int virtio_transport_do_socket_init(struct vsock_sock *vsk, + struct vsock_sock *psk); +u64 virtio_transport_get_buffer_size(struct vsock_sock *vsk); +u64 virtio_transport_get_min_buffer_size(struct vsock_sock *vsk); +u64 virtio_transport_get_max_buffer_size(struct vsock_sock *vsk); +void virtio_transport_set_buffer_size(struct vsock_sock *vsk, u64 val); +void virtio_transport_set_min_buffer_size(struct vsock_sock *vsk, u64 val); +void virtio_transport_set_max_buffer_size(struct vsock_sock *vs, u64 val); +int +virtio_transport_notify_poll_in(struct vsock_sock *vsk, + size_t target, + bool *data_ready_now); +int +virtio_transport_notify_poll_out(struct vsock_sock *vsk, + size_t target, + bool *space_available_now); + +int virtio_transport_notify_recv_init(struct vsock_sock *vsk, + size_t target, struct vsock_transport_recv_notify_data *data); +int virtio_transport_notify_recv_pre_block(struct vsock_sock *vsk, + size_t target, struct vsock_transport_recv_notify_data *data); +int virtio_transport_notify_recv_pre_dequeue(struct vsock_sock *vsk, + size_t target, struct vsock_transport_recv_notify_data *data); +int virtio_transport_notify_recv_post_dequeue(struct vsock_sock *vsk, + size_t target, ssize_t copied, bool data_read, + struct vsock_transport_recv_notify_data *data); +int virtio_transport_notify_send_init(struct vsock_sock *vsk, + struct vsock_transport_send_notify_data *data); +int virtio_transport_notify_send_pre_block(struct vsock_sock *vsk, + struct vsock_transport_send_notify_data *data); +int virtio_transport_notify_send_pre_enqueue(struct vsock_sock *vsk, + struct vsock_transport_send_notify_data *data); +int virtio_transport_notify_send_post_enqueue(struct vsock_sock *vsk, + ssize_t written, struct vsock_transport_send_notify_data *data); + +u64 virtio_transport_stream_rcvhiwat(struct vsock_sock *vsk); +bool virtio_transport_stream_is_active(struct vsock_sock *vsk); +bool virtio_transport_stream_allow(u32 cid, u32 port); +int virtio_transport_dgram_bind(struct vsock_sock *vsk, + struct sockaddr_vm *addr); +bool virtio_transport_dgram_allow(u32 cid, u32 port); + +int virtio_transport_connect(struct vsock_sock *vsk); + +int virtio_transport_shutdown(struct vsock_sock *vsk, int mode); + +void virtio_transport_release(struct vsock_sock *vsk); + +ssize_t +virtio_transport_stream_enqueue(struct vsock_sock *vsk, + struct msghdr *msg, + size_t len); +int +virtio_transport_dgram_enqueue(struct vsock_sock *vsk, + struct sockaddr_vm *remote_addr, + struct msghdr *msg, + size_t len); + +void virtio_transport_destruct(struct vsock_sock *vsk); + +void virtio_transport_recv_pkt(struct virtio_vsock_pkt *pkt); +void virtio_transport_free_pkt(struct virtio_vsock_pkt *pkt); +void virtio_transport_inc_tx_pkt(struct virtio_vsock_sock *vvs, struct virtio_vsock_pkt *pkt); +u32 virtio_transport_get_credit(struct virtio_vsock_sock *vvs, u32 wanted); +void virtio_transport_put_credit(struct virtio_vsock_sock *vvs, u32 credit); + +#endif /* _LINUX_VIRTIO_VSOCK_H */ diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h index f7a35fcaaaf6..f38fe1c00564 100644 --- a/include/net/af_vsock.h +++ b/include/net/af_vsock.h @@ -64,6 +64,8 @@ struct vsock_sock { bool rejected; struct delayed_work connect_work; struct delayed_work pending_work; + struct delayed_work close_work; + bool close_work_scheduled; u32 peer_shutdown; bool sent_request; bool ignore_connecting_rst; @@ -98,6 +100,9 @@ struct vsock_transport { void (*destruct)(struct vsock_sock *); void (*release)(struct vsock_sock *); + /* Cancel all pending packets sent on vsock. */ + int (*cancel_pkt)(struct vsock_sock *vsk); + /* Connections. */ int (*connect)(struct vsock_sock *); @@ -165,6 +170,9 @@ static inline int vsock_core_init(const struct vsock_transport *t) } void vsock_core_exit(void); +/* The transport may downcast this to access transport-specific functions */ +const struct vsock_transport *vsock_core_get_transport(void); + /**** UTILS ****/ void vsock_release_pending(struct sock *pending); @@ -177,6 +185,7 @@ void vsock_remove_connected(struct vsock_sock *vsk); struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr); struct sock *vsock_find_connected_socket(struct sockaddr_vm *src, struct sockaddr_vm *dst); +void vsock_remove_sock(struct vsock_sock *vsk); void vsock_for_each_connected_socket(void (*fn)(struct sock *sk)); #endif /* __AF_VSOCK_H__ */ diff --git a/include/net/ax25.h b/include/net/ax25.h index e602f8177ebf..b507ce2b1952 100644 --- a/include/net/ax25.h +++ b/include/net/ax25.h @@ -199,6 +199,18 @@ static inline void ax25_hold_route(ax25_route *ax25_rt) void __ax25_put_route(ax25_route *ax25_rt); +extern rwlock_t ax25_route_lock; + +static inline void ax25_route_lock_use(void) +{ + read_lock(&ax25_route_lock); +} + +static inline void ax25_route_lock_unuse(void) +{ + read_unlock(&ax25_route_lock); +} + static inline void ax25_put_route(ax25_route *ax25_rt) { if (atomic_dec_and_test(&ax25_rt->refcount)) diff --git a/include/net/gro_cells.h b/include/net/gro_cells.h index cf6c74550baa..cd856b7a11f5 100644 --- a/include/net/gro_cells.h +++ b/include/net/gro_cells.h @@ -19,22 +19,30 @@ static inline void gro_cells_receive(struct gro_cells *gcells, struct sk_buff *s struct gro_cell *cell; struct net_device *dev = skb->dev; + rcu_read_lock(); + if (unlikely(!(dev->flags & IFF_UP))) + goto drop; + if (!gcells->cells || skb_cloned(skb) || !(dev->features & NETIF_F_GRO)) { netif_rx(skb); - return; + goto unlock; } cell = this_cpu_ptr(gcells->cells); if (skb_queue_len(&cell->napi_skbs) > netdev_max_backlog) { +drop: atomic_long_inc(&dev->rx_dropped); kfree_skb(skb); - return; + goto unlock; } __skb_queue_tail(&cell->napi_skbs, skb); if (skb_queue_len(&cell->napi_skbs) == 1) napi_schedule(&cell->napi); + +unlock: + rcu_read_unlock(); } /* called under BH context */ @@ -84,6 +92,7 @@ static inline void gro_cells_destroy(struct gro_cells *gcells) for_each_possible_cpu(i) { struct gro_cell *cell = per_cpu_ptr(gcells->cells, i); + napi_disable(&cell->napi); netif_napi_del(&cell->napi); __skb_queue_purge(&cell->napi_skbs); } diff --git a/include/net/icmp.h b/include/net/icmp.h index 970028e13382..06ceb483475d 100644 --- a/include/net/icmp.h +++ b/include/net/icmp.h @@ -22,6 +22,7 @@ #include #include +#include struct icmp_err { int errno; @@ -39,7 +40,13 @@ struct net_proto_family; struct sk_buff; struct net; -void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info); +void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, + const struct ip_options *opt); +static inline void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) +{ + __icmp_send(skb_in, type, code, info, &IPCB(skb_in)->opt); +} + int icmp_rcv(struct sk_buff *skb); void icmp_err(struct sk_buff *skb, u32 info); int icmp_init(void); diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index c26a6e4dc306..6260ec146142 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -1,13 +1,19 @@ #ifndef __NET_FRAG_H__ #define __NET_FRAG_H__ +#include + struct netns_frags { - /* Keep atomic mem on separate cachelines in structs that include it */ - atomic_t mem ____cacheline_aligned_in_smp; /* sysctls */ + long high_thresh; + long low_thresh; int timeout; - int high_thresh; - int low_thresh; + struct inet_frags *f; + + struct rhashtable rhashtable ____cacheline_aligned_in_smp; + + /* Keep atomic mem on separate cachelines in structs that include it */ + atomic_long_t mem ____cacheline_aligned_in_smp; }; /** @@ -23,74 +29,68 @@ enum { INET_FRAG_COMPLETE = BIT(2), }; +struct frag_v4_compare_key { + __be32 saddr; + __be32 daddr; + u32 user; + u32 vif; + __be16 id; + u16 protocol; +}; + +struct frag_v6_compare_key { + struct in6_addr saddr; + struct in6_addr daddr; + u32 user; + __be32 id; + u32 iif; +}; + /** * struct inet_frag_queue - fragment queue * - * @lock: spinlock protecting the queue + * @node: rhash node + * @key: keys identifying this frag. * @timer: queue expiration timer - * @list: hash bucket list + * @lock: spinlock protecting this frag * @refcnt: reference count of the queue * @fragments: received fragments head + * @rb_fragments: received fragments rb-tree root * @fragments_tail: received fragments tail + * @last_run_head: the head of the last "run". see ip_fragment.c * @stamp: timestamp of the last received fragment * @len: total length of the original datagram * @meat: length of received fragments so far * @flags: fragment queue flags * @max_size: maximum received fragment size * @net: namespace that this frag belongs to - * @list_evictor: list of queues to forcefully evict (e.g. due to low memory) + * @rcu: rcu head for freeing deferall */ struct inet_frag_queue { - spinlock_t lock; + struct rhash_head node; + union { + struct frag_v4_compare_key v4; + struct frag_v6_compare_key v6; + } key; struct timer_list timer; - struct hlist_node list; + spinlock_t lock; atomic_t refcnt; - struct sk_buff *fragments; + struct sk_buff *fragments; /* Used in IPv6. */ + struct rb_root rb_fragments; /* Used in IPv4. */ struct sk_buff *fragments_tail; + struct sk_buff *last_run_head; ktime_t stamp; int len; int meat; __u8 flags; u16 max_size; - struct netns_frags *net; - struct hlist_node list_evictor; -}; - -#define INETFRAGS_HASHSZ 1024 - -/* averaged: - * max_depth = default ipfrag_high_thresh / INETFRAGS_HASHSZ / - * rounded up (SKB_TRUELEN(0) + sizeof(struct ipq or - * struct frag_queue)) - */ -#define INETFRAGS_MAXDEPTH 128 - -struct inet_frag_bucket { - struct hlist_head chain; - spinlock_t chain_lock; + struct netns_frags *net; + struct rcu_head rcu; }; struct inet_frags { - struct inet_frag_bucket hash[INETFRAGS_HASHSZ]; - - struct work_struct frags_work; - unsigned int next_bucket; - unsigned long last_rebuild_jiffies; - bool rebuild; - - /* The first call to hashfn is responsible to initialize - * rnd. This is best done with net_get_random_once. - * - * rnd_seqlock is used to let hash insertion detect - * when it needs to re-lookup the hash chain to use. - */ - u32 rnd; - seqlock_t rnd_seqlock; int qsize; - unsigned int (*hashfn)(const struct inet_frag_queue *); - bool (*match)(const struct inet_frag_queue *q, - const void *arg); void (*constructor)(struct inet_frag_queue *q, const void *arg); void (*destructor)(struct inet_frag_queue *); @@ -98,56 +98,47 @@ struct inet_frags { void (*frag_expire)(unsigned long data); struct kmem_cache *frags_cachep; const char *frags_cache_name; + struct rhashtable_params rhash_params; }; int inet_frags_init(struct inet_frags *); void inet_frags_fini(struct inet_frags *); -static inline void inet_frags_init_net(struct netns_frags *nf) +static inline int inet_frags_init_net(struct netns_frags *nf) { - atomic_set(&nf->mem, 0); + atomic_long_set(&nf->mem, 0); + return rhashtable_init(&nf->rhashtable, &nf->f->rhash_params); } -void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f); +void inet_frags_exit_net(struct netns_frags *nf); -void inet_frag_kill(struct inet_frag_queue *q, struct inet_frags *f); -void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f); -struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, - struct inet_frags *f, void *key, unsigned int hash); +void inet_frag_kill(struct inet_frag_queue *q); +void inet_frag_destroy(struct inet_frag_queue *q); +struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key); -void inet_frag_maybe_warn_overflow(struct inet_frag_queue *q, - const char *prefix); +/* Free all skbs in the queue; return the sum of their truesizes. */ +unsigned int inet_frag_rbtree_purge(struct rb_root *root); -static inline void inet_frag_put(struct inet_frag_queue *q, struct inet_frags *f) +static inline void inet_frag_put(struct inet_frag_queue *q) { if (atomic_dec_and_test(&q->refcnt)) - inet_frag_destroy(q, f); -} - -static inline bool inet_frag_evicting(struct inet_frag_queue *q) -{ - return !hlist_unhashed(&q->list_evictor); + inet_frag_destroy(q); } /* Memory Tracking Functions. */ -static inline int frag_mem_limit(struct netns_frags *nf) +static inline long frag_mem_limit(const struct netns_frags *nf) { - return atomic_read(&nf->mem); + return atomic_long_read(&nf->mem); } -static inline void sub_frag_mem_limit(struct netns_frags *nf, int i) +static inline void sub_frag_mem_limit(struct netns_frags *nf, long val) { - atomic_sub(i, &nf->mem); + atomic_long_sub(val, &nf->mem); } -static inline void add_frag_mem_limit(struct netns_frags *nf, int i) +static inline void add_frag_mem_limit(struct netns_frags *nf, long val) { - atomic_add(i, &nf->mem); -} - -static inline int sum_frag_mem_limit(struct netns_frags *nf) -{ - return atomic_read(&nf->mem); + atomic_long_add(val, &nf->mem); } /* RFC 3168 support : diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h index 235c7811a86a..408d76f47bd2 100644 --- a/include/net/inetpeer.h +++ b/include/net/inetpeer.h @@ -40,6 +40,7 @@ struct inet_peer { u32 metrics[RTAX_MAX]; u32 rate_tokens; /* rate limiting for ICMP */ + u32 n_redirects; unsigned long rate_last; union { struct list_head gc_list; diff --git a/include/net/ip.h b/include/net/ip.h index 10664a684acf..2a1b897f1e79 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -525,7 +525,6 @@ static inline struct sk_buff *ip_check_defrag(struct net *net, struct sk_buff *s return skb; } #endif -int ip_frag_mem(struct net *net); /* * Functions provided by ip_forward.c @@ -548,6 +547,8 @@ static inline int ip_options_echo(struct ip_options *dopt, struct sk_buff *skb) } void ip_options_fragment(struct sk_buff *skb); +int __ip_options_compile(struct net *net, struct ip_options *opt, + struct sk_buff *skb, __be32 *info); int ip_options_compile(struct net *net, struct ip_options *opt, struct sk_buff *skb); int ip_options_get(struct net *net, struct ip_options_rcu **optp, diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 2a25b53cd427..f6ff83b2ac87 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -200,7 +200,7 @@ int fib_table_insert(struct fib_table *, struct fib_config *); int fib_table_delete(struct fib_table *, struct fib_config *); int fib_table_dump(struct fib_table *table, struct sk_buff *skb, struct netlink_callback *cb); -int fib_table_flush(struct fib_table *table); +int fib_table_flush(struct fib_table *table, bool flush_all); struct fib_table *fib_trie_unmerge(struct fib_table *main_tb); void fib_table_flush_external(struct fib_table *table); void fib_free_table(struct fib_table *tb); diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 0e01d570fa22..c07cf9596b6f 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -320,13 +320,6 @@ static inline bool ipv6_accept_ra(struct inet6_dev *idev) idev->cnf.accept_ra; } -#if IS_ENABLED(CONFIG_IPV6) -static inline int ip6_frag_mem(struct net *net) -{ - return sum_frag_mem_limit(&net->ipv6.frags); -} -#endif - #define IPV6_FRAG_HIGH_THRESH (4 * 1024*1024) /* 4194304 */ #define IPV6_FRAG_LOW_THRESH (3 * 1024*1024) /* 3145728 */ #define IPV6_FRAG_TIMEOUT (60 * HZ) /* 60 seconds */ @@ -505,17 +498,8 @@ enum ip6_defrag_users { __IP6_DEFRAG_CONNTRACK_BRIDGE_IN = IP6_DEFRAG_CONNTRACK_BRIDGE_IN + USHRT_MAX, }; -struct ip6_create_arg { - __be32 id; - u32 user; - const struct in6_addr *src; - const struct in6_addr *dst; - int iif; - u8 ecn; -}; - void ip6_frag_init(struct inet_frag_queue *q, const void *a); -bool ip6_frag_match(const struct inet_frag_queue *q, const void *a); +extern const struct rhashtable_params ip6_rhash_params; /* * Equivalent of ipv4 struct ip @@ -523,19 +507,13 @@ bool ip6_frag_match(const struct inet_frag_queue *q, const void *a); struct frag_queue { struct inet_frag_queue q; - __be32 id; /* fragment id */ - u32 user; - struct in6_addr saddr; - struct in6_addr daddr; - int iif; unsigned int csum; __u16 nhoffset; u8 ecn; }; -void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq, - struct inet_frags *frags); +void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq); static inline bool ipv6_addr_any(const struct in6_addr *a) { diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 8b683841e574..f6017ddc4ded 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -448,6 +448,7 @@ static inline int neigh_hh_bridge(struct hh_cache *hh, struct sk_buff *skb) static inline int neigh_hh_output(const struct hh_cache *hh, struct sk_buff *skb) { + unsigned int hh_alen = 0; unsigned int seq; int hh_len; @@ -455,16 +456,33 @@ static inline int neigh_hh_output(const struct hh_cache *hh, struct sk_buff *skb seq = read_seqbegin(&hh->hh_lock); hh_len = hh->hh_len; if (likely(hh_len <= HH_DATA_MOD)) { - /* this is inlined by gcc */ - memcpy(skb->data - HH_DATA_MOD, hh->hh_data, HH_DATA_MOD); - } else { - int hh_alen = HH_DATA_ALIGN(hh_len); + hh_alen = HH_DATA_MOD; - memcpy(skb->data - hh_alen, hh->hh_data, hh_alen); + /* skb_push() would proceed silently if we have room for + * the unaligned size but not for the aligned size: + * check headroom explicitly. + */ + if (likely(skb_headroom(skb) >= HH_DATA_MOD)) { + /* this is inlined by gcc */ + memcpy(skb->data - HH_DATA_MOD, hh->hh_data, + HH_DATA_MOD); + } + } else { + hh_alen = HH_DATA_ALIGN(hh_len); + + if (likely(skb_headroom(skb) >= hh_alen)) { + memcpy(skb->data - hh_alen, hh->hh_data, + hh_alen); + } } } while (read_seqretry(&hh->hh_lock, seq)); - skb_push(skb, hh_len); + if (WARN_ON_ONCE(skb_headroom(skb) < hh_alen)) { + kfree_skb(skb); + return NET_XMIT_DROP; + } + + __skb_push(skb, hh_len); return dev_queue_xmit(skb); } diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 6965dfe7e88b..0daed810724d 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -53,6 +53,7 @@ struct net { */ spinlock_t rules_mod_lock; + u32 hash_mix; atomic64_t cookie_gen; struct list_head list; /* list of network namespaces */ diff --git a/include/net/netfilter/br_netfilter.h b/include/net/netfilter/br_netfilter.h index e8d1448425a7..b1d0d46344e2 100644 --- a/include/net/netfilter/br_netfilter.h +++ b/include/net/netfilter/br_netfilter.h @@ -42,7 +42,6 @@ static inline struct rtable *bridge_parent_rtable(const struct net_device *dev) } struct net_device *setup_pre_routing(struct sk_buff *skb); -void br_netfilter_enable(void); #if IS_ENABLED(CONFIG_IPV6) int br_validate_ipv6(struct net *net, struct sk_buff *skb); diff --git a/include/net/netns/hash.h b/include/net/netns/hash.h index 69a6715d9f3f..a347b2f9e748 100644 --- a/include/net/netns/hash.h +++ b/include/net/netns/hash.h @@ -1,21 +1,10 @@ #ifndef __NET_NS_HASH_H__ #define __NET_NS_HASH_H__ -#include - -struct net; +#include static inline u32 net_hash_mix(const struct net *net) { -#ifdef CONFIG_NET_NS - /* - * shift this right to eliminate bits, that are - * always zeroed - */ - - return (u32)(((unsigned long)net) >> L1_CACHE_SHIFT); -#else - return 0; -#endif + return net->hash_mix; } #endif diff --git a/include/net/phonet/pep.h b/include/net/phonet/pep.h index b669fe6dbc3b..98f31c7ea23d 100644 --- a/include/net/phonet/pep.h +++ b/include/net/phonet/pep.h @@ -63,10 +63,11 @@ struct pnpipehdr { u8 state_after_reset; /* reset request */ u8 error_code; /* any response */ u8 pep_type; /* status indication */ - u8 data[1]; + u8 data0; /* anything else */ }; + u8 data[]; }; -#define other_pep_type data[1] +#define other_pep_type data[0] static inline struct pnpipehdr *pnp_hdr(struct sk_buff *skb) { diff --git a/include/net/sctp/checksum.h b/include/net/sctp/checksum.h index 4a5b9a306c69..803fc26ef0ba 100644 --- a/include/net/sctp/checksum.h +++ b/include/net/sctp/checksum.h @@ -60,7 +60,7 @@ static inline __wsum sctp_csum_combine(__wsum csum, __wsum csum2, static inline __le32 sctp_compute_cksum(const struct sk_buff *skb, unsigned int offset) { - struct sctphdr *sh = sctp_hdr(skb); + struct sctphdr *sh = (struct sctphdr *)(skb->data + offset); __le32 ret, old = sh->checksum; const struct skb_checksum_ops ops = { .update = sctp_csum_update, diff --git a/include/net/sock.h b/include/net/sock.h index 5b1a71559325..51d99569f4b0 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -299,6 +299,7 @@ struct cg_proto; * @sk_filter: socket filtering instructions * @sk_timer: sock cleanup timer * @sk_stamp: time stamp of last packet received + * @sk_stamp_seq: lock for accessing sk_stamp on 32 bit architectures only * @sk_tsflags: SO_TIMESTAMPING socket options * @sk_tskey: counter to disambiguate concurrent tstamp requests * @sk_socket: Identd and reporting IO signals @@ -434,6 +435,9 @@ struct sock { long sk_sndtimeo; struct timer_list sk_timer; ktime_t sk_stamp; +#if BITS_PER_LONG==32 + seqlock_t sk_stamp_seq; +#endif u16 sk_tsflags; u32 sk_tskey; struct socket *sk_socket; @@ -648,6 +652,12 @@ static inline void sk_add_node_rcu(struct sock *sk, struct hlist_head *list) hlist_add_head_rcu(&sk->sk_node, list); } +static inline void sk_add_node_tail_rcu(struct sock *sk, struct hlist_head *list) +{ + sock_hold(sk); + hlist_add_tail_rcu(&sk->sk_node, list); +} + static inline void __sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list) { hlist_nulls_add_head_rcu(&sk->sk_nulls_node, list); @@ -2154,6 +2164,34 @@ static inline void sk_drops_add(struct sock *sk, const struct sk_buff *skb) atomic_add(segs, &sk->sk_drops); } +static inline ktime_t sock_read_timestamp(struct sock *sk) +{ +#if BITS_PER_LONG==32 + unsigned int seq; + ktime_t kt; + + do { + seq = read_seqbegin(&sk->sk_stamp_seq); + kt = sk->sk_stamp; + } while (read_seqretry(&sk->sk_stamp_seq, seq)); + + return kt; +#else + return sk->sk_stamp; +#endif +} + +static inline void sock_write_timestamp(struct sock *sk, ktime_t kt) +{ +#if BITS_PER_LONG==32 + write_seqlock(&sk->sk_stamp_seq); + sk->sk_stamp = kt; + write_sequnlock(&sk->sk_stamp_seq); +#else + sk->sk_stamp = kt; +#endif +} + void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb); void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk, @@ -2178,7 +2216,7 @@ sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE))) __sock_recv_timestamp(msg, sk, skb); else - sk->sk_stamp = kt; + sock_write_timestamp(sk, kt); if (sock_flag(sk, SOCK_WIFI_STATUS) && skb->wifi_acked_valid) __sock_recv_wifi_status(msg, sk, skb); @@ -2198,7 +2236,7 @@ static inline void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, if (sk->sk_flags & FLAGS_TS_OR_DROPS || sk->sk_tsflags & TSFLAGS_ANY) __sock_recv_ts_and_drops(msg, sk, skb); else - sk->sk_stamp = skb->tstamp; + sock_write_timestamp(sk, skb->tstamp); } void __sock_tx_timestamp(const struct sock *sk, __u8 *tx_flags); diff --git a/include/net/tcp.h b/include/net/tcp.h index c6a4c41674e7..5bd96455ddbd 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1460,6 +1460,7 @@ static inline void tcp_write_queue_purge(struct sock *sk) sk_wmem_free_skb(sk, skb); sk_mem_reclaim(sk); tcp_clear_all_retrans_hints(tcp_sk(sk)); + inet_csk(sk)->icsk_backoff = 0; } static inline struct sk_buff *tcp_write_queue_head(const struct sock *sk) diff --git a/include/sound/compress_driver.h b/include/sound/compress_driver.h index fa1d05512c09..85ff3181e6f1 100644 --- a/include/sound/compress_driver.h +++ b/include/sound/compress_driver.h @@ -178,7 +178,11 @@ static inline void snd_compr_drain_notify(struct snd_compr_stream *stream) if (snd_BUG_ON(!stream)) return; - stream->runtime->state = SNDRV_PCM_STATE_SETUP; + if (stream->direction == SND_COMPRESS_PLAYBACK) + stream->runtime->state = SNDRV_PCM_STATE_SETUP; + else + stream->runtime->state = SNDRV_PCM_STATE_PREPARED; + wake_up(&stream->runtime->sleep); } diff --git a/include/sound/pcm.h b/include/sound/pcm.h index 700811929a2b..61a92c3ec111 100644 --- a/include/sound/pcm.h +++ b/include/sound/pcm.h @@ -100,7 +100,7 @@ struct snd_pcm_ops { #endif #define SNDRV_PCM_IOCTL1_RESET 0 -#define SNDRV_PCM_IOCTL1_INFO 1 +/* 1 is absent slot. */ #define SNDRV_PCM_IOCTL1_CHANNEL_INFO 2 #define SNDRV_PCM_IOCTL1_GSTATE 3 #define SNDRV_PCM_IOCTL1_FIFO_SIZE 4 diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 594b4b29a224..7ef11b97cb2a 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -223,6 +223,26 @@ TRACE_EVENT(ext4_drop_inode, (unsigned long) __entry->ino, __entry->drop) ); +TRACE_EVENT(ext4_nfs_commit_metadata, + TP_PROTO(struct inode *inode), + + TP_ARGS(inode), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( ino_t, ino ) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + ), + + TP_printk("dev %d,%d ino %lu", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long) __entry->ino) +); + TRACE_EVENT(ext4_mark_inode_dirty, TP_PROTO(struct inode *inode, unsigned long IP), diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 0cdf6cc5c557..fe009522f4e1 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -141,6 +141,17 @@ TRACE_DEFINE_ENUM(CP_TRIMMED); { CP_SPEC_LOG_NUM, "log type is 2" }, \ { CP_RECOVER_DIR, "dir needs recovery" }) +#define show_shutdown_mode(type) \ + __print_symbolic(type, \ + { F2FS_GOING_DOWN_FULLSYNC, "full sync" }, \ + { F2FS_GOING_DOWN_METASYNC, "meta sync" }, \ + { F2FS_GOING_DOWN_NOSYNC, "no sync" }, \ + { F2FS_GOING_DOWN_METAFLUSH, "meta flush" }, \ + { F2FS_GOING_DOWN_NEED_FSCK, "need fsck" }) + +struct f2fs_sb_info; +struct f2fs_io_info; +struct extent_info; struct victim_sel_policy; struct f2fs_map_blocks; @@ -525,6 +536,9 @@ TRACE_EVENT(f2fs_map_blocks, __field(block_t, m_lblk) __field(block_t, m_pblk) __field(unsigned int, m_len) + __field(unsigned int, m_flags) + __field(int, m_seg_type) + __field(bool, m_may_create) __field(int, ret) ), @@ -534,15 +548,22 @@ TRACE_EVENT(f2fs_map_blocks, __entry->m_lblk = map->m_lblk; __entry->m_pblk = map->m_pblk; __entry->m_len = map->m_len; + __entry->m_flags = map->m_flags; + __entry->m_seg_type = map->m_seg_type; + __entry->m_may_create = map->m_may_create; __entry->ret = ret; ), TP_printk("dev = (%d,%d), ino = %lu, file offset = %llu, " - "start blkaddr = 0x%llx, len = 0x%llx, err = %d", + "start blkaddr = 0x%llx, len = 0x%llx, flags = %u," + "seg_type = %d, may_create = %d, err = %d", show_dev_ino(__entry), (unsigned long long)__entry->m_lblk, (unsigned long long)__entry->m_pblk, (unsigned long long)__entry->m_len, + __entry->m_flags, + __entry->m_seg_type, + __entry->m_may_create, __entry->ret) ); @@ -1604,6 +1625,30 @@ DEFINE_EVENT(f2fs_sync_dirty_inodes, f2fs_sync_dirty_inodes_exit, TP_ARGS(sb, type, count) ); +TRACE_EVENT(f2fs_shutdown, + + TP_PROTO(struct f2fs_sb_info *sbi, unsigned int mode, int ret), + + TP_ARGS(sbi, mode, ret), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(unsigned int, mode) + __field(int, ret) + ), + + TP_fast_assign( + __entry->dev = sbi->sb->s_dev; + __entry->mode = mode; + __entry->ret = ret; + ), + + TP_printk("dev = (%d,%d), mode: %s, ret:%d", + show_dev(__entry->dev), + show_shutdown_mode(__entry->mode), + __entry->ret) +); + #endif /* _TRACE_F2FS_H */ /* This part must be outside protection */ diff --git a/include/trace/events/vsock_virtio_transport_common.h b/include/trace/events/vsock_virtio_transport_common.h new file mode 100644 index 000000000000..b7f1d6278280 --- /dev/null +++ b/include/trace/events/vsock_virtio_transport_common.h @@ -0,0 +1,144 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM vsock + +#if !defined(_TRACE_VSOCK_VIRTIO_TRANSPORT_COMMON_H) || \ + defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_VSOCK_VIRTIO_TRANSPORT_COMMON_H + +#include + +TRACE_DEFINE_ENUM(VIRTIO_VSOCK_TYPE_STREAM); + +#define show_type(val) \ + __print_symbolic(val, { VIRTIO_VSOCK_TYPE_STREAM, "STREAM" }) + +TRACE_DEFINE_ENUM(VIRTIO_VSOCK_OP_INVALID); +TRACE_DEFINE_ENUM(VIRTIO_VSOCK_OP_REQUEST); +TRACE_DEFINE_ENUM(VIRTIO_VSOCK_OP_RESPONSE); +TRACE_DEFINE_ENUM(VIRTIO_VSOCK_OP_RST); +TRACE_DEFINE_ENUM(VIRTIO_VSOCK_OP_SHUTDOWN); +TRACE_DEFINE_ENUM(VIRTIO_VSOCK_OP_RW); +TRACE_DEFINE_ENUM(VIRTIO_VSOCK_OP_CREDIT_UPDATE); +TRACE_DEFINE_ENUM(VIRTIO_VSOCK_OP_CREDIT_REQUEST); + +#define show_op(val) \ + __print_symbolic(val, \ + { VIRTIO_VSOCK_OP_INVALID, "INVALID" }, \ + { VIRTIO_VSOCK_OP_REQUEST, "REQUEST" }, \ + { VIRTIO_VSOCK_OP_RESPONSE, "RESPONSE" }, \ + { VIRTIO_VSOCK_OP_RST, "RST" }, \ + { VIRTIO_VSOCK_OP_SHUTDOWN, "SHUTDOWN" }, \ + { VIRTIO_VSOCK_OP_RW, "RW" }, \ + { VIRTIO_VSOCK_OP_CREDIT_UPDATE, "CREDIT_UPDATE" }, \ + { VIRTIO_VSOCK_OP_CREDIT_REQUEST, "CREDIT_REQUEST" }) + +TRACE_EVENT(virtio_transport_alloc_pkt, + TP_PROTO( + __u32 src_cid, __u32 src_port, + __u32 dst_cid, __u32 dst_port, + __u32 len, + __u16 type, + __u16 op, + __u32 flags + ), + TP_ARGS( + src_cid, src_port, + dst_cid, dst_port, + len, + type, + op, + flags + ), + TP_STRUCT__entry( + __field(__u32, src_cid) + __field(__u32, src_port) + __field(__u32, dst_cid) + __field(__u32, dst_port) + __field(__u32, len) + __field(__u16, type) + __field(__u16, op) + __field(__u32, flags) + ), + TP_fast_assign( + __entry->src_cid = src_cid; + __entry->src_port = src_port; + __entry->dst_cid = dst_cid; + __entry->dst_port = dst_port; + __entry->len = len; + __entry->type = type; + __entry->op = op; + __entry->flags = flags; + ), + TP_printk("%u:%u -> %u:%u len=%u type=%s op=%s flags=%#x", + __entry->src_cid, __entry->src_port, + __entry->dst_cid, __entry->dst_port, + __entry->len, + show_type(__entry->type), + show_op(__entry->op), + __entry->flags) +); + +TRACE_EVENT(virtio_transport_recv_pkt, + TP_PROTO( + __u32 src_cid, __u32 src_port, + __u32 dst_cid, __u32 dst_port, + __u32 len, + __u16 type, + __u16 op, + __u32 flags, + __u32 buf_alloc, + __u32 fwd_cnt + ), + TP_ARGS( + src_cid, src_port, + dst_cid, dst_port, + len, + type, + op, + flags, + buf_alloc, + fwd_cnt + ), + TP_STRUCT__entry( + __field(__u32, src_cid) + __field(__u32, src_port) + __field(__u32, dst_cid) + __field(__u32, dst_port) + __field(__u32, len) + __field(__u16, type) + __field(__u16, op) + __field(__u32, flags) + __field(__u32, buf_alloc) + __field(__u32, fwd_cnt) + ), + TP_fast_assign( + __entry->src_cid = src_cid; + __entry->src_port = src_port; + __entry->dst_cid = dst_cid; + __entry->dst_port = dst_port; + __entry->len = len; + __entry->type = type; + __entry->op = op; + __entry->flags = flags; + __entry->buf_alloc = buf_alloc; + __entry->fwd_cnt = fwd_cnt; + ), + TP_printk("%u:%u -> %u:%u len=%u type=%s op=%s flags=%#x " + "buf_alloc=%u fwd_cnt=%u", + __entry->src_cid, __entry->src_port, + __entry->dst_cid, __entry->dst_port, + __entry->len, + show_type(__entry->type), + show_op(__entry->op), + __entry->flags, + __entry->buf_alloc, + __entry->fwd_cnt) +); + +#endif /* _TRACE_VSOCK_VIRTIO_TRANSPORT_COMMON_H */ + +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE vsock_virtio_transport_common + +/* This part must be outside protection */ +#include diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild index 823d784a6244..d9c9b138a15e 100644 --- a/include/uapi/linux/Kbuild +++ b/include/uapi/linux/Kbuild @@ -450,6 +450,7 @@ header-y += virtio_ring.h header-y += virtio_rng.h header-y += virtio_scsi.h header-y += virtio_types.h +header-y += virtio_vsock.h header-y += vm_sockets.h header-y += vt.h header-y += wait.h diff --git a/include/uapi/linux/android/binder.h b/include/uapi/linux/android/binder.h index bd0da0e992b8..0631c500702c 100644 --- a/include/uapi/linux/android/binder.h +++ b/include/uapi/linux/android/binder.h @@ -87,6 +87,14 @@ enum flat_binder_object_flags { * scheduling policy from the caller (for synchronous transactions). */ FLAT_BINDER_FLAG_INHERIT_RT = 0x800, + + /** + * @FLAT_BINDER_FLAG_TXN_SECURITY_CTX: request security contexts + * + * Only when set, causes senders to include their security + * context + */ + FLAT_BINDER_FLAG_TXN_SECURITY_CTX = 0x1000, }; #ifdef BINDER_IPC_32BIT @@ -264,6 +272,7 @@ struct binder_node_info_for_ref { #define BINDER_VERSION _IOWR('b', 9, struct binder_version) #define BINDER_GET_NODE_DEBUG_INFO _IOWR('b', 11, struct binder_node_debug_info) #define BINDER_GET_NODE_INFO_FOR_REF _IOWR('b', 12, struct binder_node_info_for_ref) +#define BINDER_SET_CONTEXT_MGR_EXT _IOW('b', 13, struct flat_binder_object) /* * NOTE: Two special error codes you should check for when calling @@ -322,6 +331,11 @@ struct binder_transaction_data { } data; }; +struct binder_transaction_data_secctx { + struct binder_transaction_data transaction_data; + binder_uintptr_t secctx; +}; + struct binder_transaction_data_sg { struct binder_transaction_data transaction_data; binder_size_t buffers_size; @@ -357,6 +371,11 @@ enum binder_driver_return_protocol { BR_OK = _IO('r', 1), /* No parameters! */ + BR_TRANSACTION_SEC_CTX = _IOR('r', 2, + struct binder_transaction_data_secctx), + /* + * binder_transaction_data_secctx: the received command. + */ BR_TRANSACTION = _IOR('r', 2, struct binder_transaction_data), BR_REPLY = _IOR('r', 3, struct binder_transaction_data), /* diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index 064d2026ab38..373afec2ed34 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -136,11 +136,18 @@ * This is an Ethernet frame header. */ +/* allow libcs like musl to deactivate this, glibc does not implement this. */ +#ifndef __UAPI_DEF_ETHHDR +#define __UAPI_DEF_ETHHDR 1 +#endif + +#if __UAPI_DEF_ETHHDR struct ethhdr { unsigned char h_dest[ETH_ALEN]; /* destination eth addr */ unsigned char h_source[ETH_ALEN]; /* source ether addr */ __be16 h_proto; /* packet type ID field */ } __attribute__((packed)); +#endif #endif /* _UAPI_LINUX_IF_ETHER_H */ diff --git a/include/uapi/linux/input-event-codes.h b/include/uapi/linux/input-event-codes.h index 87cf351bab03..9e07bf4259e1 100644 --- a/include/uapi/linux/input-event-codes.h +++ b/include/uapi/linux/input-event-codes.h @@ -708,6 +708,15 @@ #define ABS_MISC 0x28 +/* + * 0x2e is reserved and should not be used in input drivers. + * It was used by HID as ABS_MISC+6 and userspace needs to detect if + * the next ABS_* event is correct or is just ABS_MISC + n. + * We define here ABS_RESERVED so userspace can rely on it and detect + * the situation described above. + */ +#define ABS_RESERVED 0x2e + #define ABS_MT_SLOT 0x2f /* MT slot being modified */ #define ABS_MT_TOUCH_MAJOR 0x30 /* Major axis of touching ellipse */ #define ABS_MT_TOUCH_MINOR 0x31 /* Minor axis (omit if circular) */ diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index 25a9ad8bcef1..9de808ebce05 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -55,6 +55,7 @@ enum IPSTATS_MIB_ECT1PKTS, /* InECT1Pkts */ IPSTATS_MIB_ECT0PKTS, /* InECT0Pkts */ IPSTATS_MIB_CEPKTS, /* InCEPkts */ + IPSTATS_MIB_REASM_OVERLAPS, /* ReasmOverlaps */ __IPSTATS_MIB_MAX }; diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h index ab3731917bac..56b7ab584cc0 100644 --- a/include/uapi/linux/vhost.h +++ b/include/uapi/linux/vhost.h @@ -47,6 +47,32 @@ struct vhost_vring_addr { __u64 log_guest_addr; }; +/* no alignment requirement */ +struct vhost_iotlb_msg { + __u64 iova; + __u64 size; + __u64 uaddr; +#define VHOST_ACCESS_RO 0x1 +#define VHOST_ACCESS_WO 0x2 +#define VHOST_ACCESS_RW 0x3 + __u8 perm; +#define VHOST_IOTLB_MISS 1 +#define VHOST_IOTLB_UPDATE 2 +#define VHOST_IOTLB_INVALIDATE 3 +#define VHOST_IOTLB_ACCESS_FAIL 4 + __u8 type; +}; + +#define VHOST_IOTLB_MSG 0x1 + +struct vhost_msg { + int type; + union { + struct vhost_iotlb_msg iotlb; + __u8 padding[64]; + }; +}; + struct vhost_memory_region { __u64 guest_phys_addr; __u64 memory_size; /* bytes */ @@ -126,6 +152,12 @@ struct vhost_memory { #define VHOST_SET_VRING_CALL _IOW(VHOST_VIRTIO, 0x21, struct vhost_vring_file) /* Set eventfd to signal an error */ #define VHOST_SET_VRING_ERR _IOW(VHOST_VIRTIO, 0x22, struct vhost_vring_file) +/* Set busy loop timeout (in us) */ +#define VHOST_SET_VRING_BUSYLOOP_TIMEOUT _IOW(VHOST_VIRTIO, 0x23, \ + struct vhost_vring_state) +/* Get busy loop timeout (in us) */ +#define VHOST_GET_VRING_BUSYLOOP_TIMEOUT _IOW(VHOST_VIRTIO, 0x24, \ + struct vhost_vring_state) /* VHOST_NET specific defines */ @@ -140,6 +172,8 @@ struct vhost_memory { #define VHOST_F_LOG_ALL 26 /* vhost-net should add virtio_net_hdr for RX, and strip for TX packets. */ #define VHOST_NET_F_VIRTIO_NET_HDR 27 +/* Vhost have device IOTLB */ +#define VHOST_F_DEVICE_IOTLB 63 /* VHOST_SCSI specific definitions */ @@ -169,4 +203,9 @@ struct vhost_scsi_target { #define VHOST_SCSI_SET_EVENTS_MISSED _IOW(VHOST_VIRTIO, 0x43, __u32) #define VHOST_SCSI_GET_EVENTS_MISSED _IOW(VHOST_VIRTIO, 0x44, __u32) +/* VHOST_VSOCK specific defines */ + +#define VHOST_VSOCK_SET_GUEST_CID _IOW(VHOST_VIRTIO, 0x60, __u64) +#define VHOST_VSOCK_SET_RUNNING _IOW(VHOST_VIRTIO, 0x61, int) + #endif diff --git a/include/uapi/linux/virtio_config.h b/include/uapi/linux/virtio_config.h index c18264df9504..cf49c7e2cfdb 100644 --- a/include/uapi/linux/virtio_config.h +++ b/include/uapi/linux/virtio_config.h @@ -47,7 +47,7 @@ * transport being used (eg. virtio_ring), the rest are per-device feature * bits. */ #define VIRTIO_TRANSPORT_F_START 28 -#define VIRTIO_TRANSPORT_F_END 33 +#define VIRTIO_TRANSPORT_F_END 34 #ifndef VIRTIO_CONFIG_NO_LEGACY /* Do we get callbacks when the ring is completely used, even if we've @@ -61,4 +61,12 @@ /* v1.0 compliant. */ #define VIRTIO_F_VERSION_1 32 +/* + * If clear - device has the IOMMU bypass quirk feature. + * If set - use platform tools to detect the IOMMU. + * + * Note the reverse polarity (compared to most other features), + * this is for compatibility with legacy systems. + */ +#define VIRTIO_F_IOMMU_PLATFORM 33 #endif /* _UAPI_LINUX_VIRTIO_CONFIG_H */ diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h index 77925f587b15..3228d582234a 100644 --- a/include/uapi/linux/virtio_ids.h +++ b/include/uapi/linux/virtio_ids.h @@ -41,5 +41,6 @@ #define VIRTIO_ID_CAIF 12 /* Virtio caif */ #define VIRTIO_ID_GPU 16 /* virtio GPU */ #define VIRTIO_ID_INPUT 18 /* virtio input */ +#define VIRTIO_ID_VSOCK 19 /* virtio vsock transport */ #endif /* _LINUX_VIRTIO_IDS_H */ diff --git a/include/uapi/linux/virtio_vsock.h b/include/uapi/linux/virtio_vsock.h new file mode 100644 index 000000000000..1d57ed3d84d2 --- /dev/null +++ b/include/uapi/linux/virtio_vsock.h @@ -0,0 +1,94 @@ +/* + * This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so + * anyone can use the definitions to implement compatible drivers/servers: + * + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * Copyright (C) Red Hat, Inc., 2013-2015 + * Copyright (C) Asias He , 2013 + * Copyright (C) Stefan Hajnoczi , 2015 + */ + +#ifndef _UAPI_LINUX_VIRTIO_VSOCK_H +#define _UAPI_LINUX_VIRTIO_VSOCK_H + +#include +#include +#include + +struct virtio_vsock_config { + __le64 guest_cid; +} __attribute__((packed)); + +enum virtio_vsock_event_id { + VIRTIO_VSOCK_EVENT_TRANSPORT_RESET = 0, +}; + +struct virtio_vsock_event { + __le32 id; +} __attribute__((packed)); + +struct virtio_vsock_hdr { + __le64 src_cid; + __le64 dst_cid; + __le32 src_port; + __le32 dst_port; + __le32 len; + __le16 type; /* enum virtio_vsock_type */ + __le16 op; /* enum virtio_vsock_op */ + __le32 flags; + __le32 buf_alloc; + __le32 fwd_cnt; +} __attribute__((packed)); + +enum virtio_vsock_type { + VIRTIO_VSOCK_TYPE_STREAM = 1, +}; + +enum virtio_vsock_op { + VIRTIO_VSOCK_OP_INVALID = 0, + + /* Connect operations */ + VIRTIO_VSOCK_OP_REQUEST = 1, + VIRTIO_VSOCK_OP_RESPONSE = 2, + VIRTIO_VSOCK_OP_RST = 3, + VIRTIO_VSOCK_OP_SHUTDOWN = 4, + + /* To send payload */ + VIRTIO_VSOCK_OP_RW = 5, + + /* Tell the peer our credit info */ + VIRTIO_VSOCK_OP_CREDIT_UPDATE = 6, + /* Request the peer to send the credit info to us */ + VIRTIO_VSOCK_OP_CREDIT_REQUEST = 7, +}; + +/* VIRTIO_VSOCK_OP_SHUTDOWN flags values */ +enum virtio_vsock_shutdown { + VIRTIO_VSOCK_SHUTDOWN_RCV = 1, + VIRTIO_VSOCK_SHUTDOWN_SEND = 2, +}; + +#endif /* _UAPI_LINUX_VIRTIO_VSOCK_H */ diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 35dfa9e9d69e..c43ca9857479 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -191,6 +191,7 @@ struct bpf_insn_aux_data { enum bpf_reg_type ptr_type; /* pointer type for load/store insns */ struct bpf_map *map_ptr; /* pointer for call insn into lookup_elem */ }; + int sanitize_stack_off; /* stack slot to be cleared */ bool seen; /* this insn was processed by the verifier */ }; @@ -569,10 +570,11 @@ static bool is_spillable_regtype(enum bpf_reg_type type) /* check_stack_read/write functions track spill/fill of registers, * stack boundary and alignment are checked in check_mem_access() */ -static int check_stack_write(struct verifier_state *state, int off, int size, - int value_regno) +static int check_stack_write(struct verifier_env *env, + struct verifier_state *state, int off, + int size, int value_regno, int insn_idx) { - int i; + int i, spi = (MAX_BPF_STACK + off) / BPF_REG_SIZE; /* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0, * so it's aligned access and [off, off + size) are within stack limits */ @@ -587,15 +589,37 @@ static int check_stack_write(struct verifier_state *state, int off, int size, } /* save register state */ - state->spilled_regs[(MAX_BPF_STACK + off) / BPF_REG_SIZE] = - state->regs[value_regno]; + state->spilled_regs[spi] = state->regs[value_regno]; - for (i = 0; i < BPF_REG_SIZE; i++) + for (i = 0; i < BPF_REG_SIZE; i++) { + if (state->stack_slot_type[MAX_BPF_STACK + off + i] == STACK_MISC && + !env->allow_ptr_leaks) { + int *poff = &env->insn_aux_data[insn_idx].sanitize_stack_off; + int soff = (-spi - 1) * BPF_REG_SIZE; + + /* detected reuse of integer stack slot with a pointer + * which means either llvm is reusing stack slot or + * an attacker is trying to exploit CVE-2018-3639 + * (speculative store bypass) + * Have to sanitize that slot with preemptive + * store of zero. + */ + if (*poff && *poff != soff) { + /* disallow programs where single insn stores + * into two different stack slots, since verifier + * cannot sanitize them + */ + verbose("insn %d cannot access two stack slots fp%d and fp%d", + insn_idx, *poff, soff); + return -EINVAL; + } + *poff = soff; + } state->stack_slot_type[MAX_BPF_STACK + off + i] = STACK_SPILL; + } } else { /* regular write of data into stack */ - state->spilled_regs[(MAX_BPF_STACK + off) / BPF_REG_SIZE] = - (struct reg_state) {}; + state->spilled_regs[spi] = (struct reg_state) {}; for (i = 0; i < size; i++) state->stack_slot_type[MAX_BPF_STACK + off + i] = STACK_MISC; @@ -696,7 +720,7 @@ static bool is_ctx_reg(struct verifier_env *env, int regno) * if t==write && value_regno==-1, some unknown value is stored into memory * if t==read && value_regno==-1, don't care what we read from memory */ -static int check_mem_access(struct verifier_env *env, u32 regno, int off, +static int check_mem_access(struct verifier_env *env, int insn_idx, u32 regno, int off, int bpf_size, enum bpf_access_type t, int value_regno) { @@ -748,7 +772,8 @@ static int check_mem_access(struct verifier_env *env, u32 regno, int off, verbose("attempt to corrupt spilled pointer on stack\n"); return -EACCES; } - err = check_stack_write(state, off, size, value_regno); + err = check_stack_write(env, state, off, size, + value_regno, insn_idx); } else { err = check_stack_read(state, off, size, value_regno); } @@ -760,7 +785,7 @@ static int check_mem_access(struct verifier_env *env, u32 regno, int off, return err; } -static int check_xadd(struct verifier_env *env, struct bpf_insn *insn) +static int check_xadd(struct verifier_env *env, int insn_idx, struct bpf_insn *insn) { struct reg_state *regs = env->cur_state.regs; int err; @@ -793,13 +818,13 @@ static int check_xadd(struct verifier_env *env, struct bpf_insn *insn) } /* check whether atomic_add can read the memory */ - err = check_mem_access(env, insn->dst_reg, insn->off, + err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, BPF_SIZE(insn->code), BPF_READ, -1); if (err) return err; /* check whether atomic_add can write into the same memory */ - return check_mem_access(env, insn->dst_reg, insn->off, + return check_mem_access(env, insn_idx, insn->dst_reg, insn->off, BPF_SIZE(insn->code), BPF_WRITE, -1); } @@ -1838,13 +1863,14 @@ static int do_check(struct verifier_env *env) /* check that memory (src_reg + off) is readable, * the state of dst_reg will be updated by this func */ - err = check_mem_access(env, insn->src_reg, insn->off, + err = check_mem_access(env, insn_idx, insn->src_reg, insn->off, BPF_SIZE(insn->code), BPF_READ, insn->dst_reg); if (err) return err; - if (BPF_SIZE(insn->code) != BPF_W) { + if (BPF_SIZE(insn->code) != BPF_W && + BPF_SIZE(insn->code) != BPF_DW) { insn_idx++; continue; } @@ -1876,7 +1902,7 @@ static int do_check(struct verifier_env *env) enum bpf_reg_type *prev_dst_type, dst_reg_type; if (BPF_MODE(insn->code) == BPF_XADD) { - err = check_xadd(env, insn); + err = check_xadd(env, insn_idx, insn); if (err) return err; insn_idx++; @@ -1895,7 +1921,7 @@ static int do_check(struct verifier_env *env) dst_reg_type = regs[insn->dst_reg].type; /* check that memory (dst_reg + off) is writeable */ - err = check_mem_access(env, insn->dst_reg, insn->off, + err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, BPF_SIZE(insn->code), BPF_WRITE, insn->src_reg); if (err) @@ -1930,7 +1956,7 @@ static int do_check(struct verifier_env *env) } /* check that memory (dst_reg + off) is writeable */ - err = check_mem_access(env, insn->dst_reg, insn->off, + err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, BPF_SIZE(insn->code), BPF_WRITE, -1); if (err) @@ -2220,13 +2246,43 @@ static int convert_ctx_accesses(struct verifier_env *env) for (i = 0; i < insn_cnt; i++, insn++) { u32 cnt; - if (insn->code == (BPF_LDX | BPF_MEM | BPF_W)) + if (insn->code == (BPF_LDX | BPF_MEM | BPF_W) || + insn->code == (BPF_LDX | BPF_MEM | BPF_DW)) type = BPF_READ; - else if (insn->code == (BPF_STX | BPF_MEM | BPF_W)) + else if (insn->code == (BPF_STX | BPF_MEM | BPF_W) || + insn->code == (BPF_STX | BPF_MEM | BPF_DW)) type = BPF_WRITE; else continue; + if (type == BPF_WRITE && + env->insn_aux_data[i + delta].sanitize_stack_off) { + struct bpf_insn patch[] = { + /* Sanitize suspicious stack slot with zero. + * There are no memory dependencies for this store, + * since it's only using frame pointer and immediate + * constant of zero + */ + BPF_ST_MEM(BPF_DW, BPF_REG_FP, + env->insn_aux_data[i + delta].sanitize_stack_off, + 0), + /* the original STX instruction will immediately + * overwrite the same stack slot with appropriate value + */ + *insn, + }; + + cnt = ARRAY_SIZE(patch); + new_prog = bpf_patch_insn_data(env, i + delta, patch, cnt); + if (!new_prog) + return -ENOMEM; + + delta += cnt - 1; + env->prog = new_prog; + insn = new_prog->insnsi + i + delta; + continue; + } + if (env->insn_aux_data[i + delta].ptr_type != PTR_TO_CTX) continue; diff --git a/kernel/events/core.c b/kernel/events/core.c index 811ec37a9c2d..a23a1e816f21 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6128,6 +6128,7 @@ static void perf_event_mmap_output(struct perf_event *event, struct perf_output_handle handle; struct perf_sample_data sample; int size = mmap_event->event_id.header.size; + u32 type = mmap_event->event_id.header.type; int ret; if (!perf_event_mmap_match(event, data)) @@ -6171,6 +6172,7 @@ static void perf_event_mmap_output(struct perf_event *event, perf_output_end(&handle); out: mmap_event->event_id.header.size = size; + mmap_event->event_id.header.type = type; } static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index d8fc54eef51c..26a4cba37128 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -299,7 +299,7 @@ int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr, retry: /* Read the page with vaddr into memory */ - ret = get_user_pages(NULL, mm, vaddr, 1, 0, 1, &old_page, &vma); + ret = get_user_pages(NULL, mm, vaddr, 1, FOLL_FORCE, &old_page, &vma); if (ret <= 0) return ret; @@ -1699,7 +1699,7 @@ static int is_trap_at_addr(struct mm_struct *mm, unsigned long vaddr) if (likely(result == 0)) goto out; - result = get_user_pages(NULL, mm, vaddr, 1, 0, 1, &page, NULL); + result = get_user_pages(NULL, mm, vaddr, 1, FOLL_FORCE, &page, NULL); if (result < 0) return result; diff --git a/kernel/exit.c b/kernel/exit.c index e57bff761b88..6e8ab67325a7 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -453,12 +453,14 @@ static struct task_struct *find_alive_thread(struct task_struct *p) return NULL; } -static struct task_struct *find_child_reaper(struct task_struct *father) +static struct task_struct *find_child_reaper(struct task_struct *father, + struct list_head *dead) __releases(&tasklist_lock) __acquires(&tasklist_lock) { struct pid_namespace *pid_ns = task_active_pid_ns(father); struct task_struct *reaper = pid_ns->child_reaper; + struct task_struct *p, *n; if (likely(reaper != father)) return reaper; @@ -474,6 +476,12 @@ static struct task_struct *find_child_reaper(struct task_struct *father) panic("Attempted to kill init! exitcode=0x%08x\n", father->signal->group_exit_code ?: father->exit_code); } + + list_for_each_entry_safe(p, n, dead, ptrace_entry) { + list_del_init(&p->ptrace_entry); + release_task(p); + } + zap_pid_ns_processes(pid_ns); write_lock_irq(&tasklist_lock); @@ -560,7 +568,7 @@ static void forget_original_parent(struct task_struct *father, exit_ptrace(father, dead); /* Can drop and reacquire tasklist_lock */ - reaper = find_child_reaper(father); + reaper = find_child_reaper(father, dead); if (list_empty(&father->children)) return; diff --git a/kernel/fork.c b/kernel/fork.c index 5a90b0f6d668..c6e26e433445 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1440,8 +1440,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, posix_cpu_timers_init(p); - p->start_time = ktime_get_ns(); - p->real_start_time = ktime_get_boot_ns(); p->io_context = NULL; p->audit_context = NULL; cgroup_fork(p); @@ -1601,6 +1599,17 @@ static struct task_struct *copy_process(unsigned long clone_flags, if (retval) goto bad_fork_free_pid; + /* + * From this point on we must avoid any synchronous user-space + * communication until we take the tasklist-lock. In particular, we do + * not want user-space to be able to predict the process start-time by + * stalling fork(2) after we recorded the start_time but before it is + * visible to the system. + */ + + p->start_time = ktime_get_ns(); + p->real_start_time = ktime_get_boot_ns(); + /* * Make it visible to the rest of the system, but dont wake it up yet. * Need tasklist lock for parent etc handling! diff --git a/kernel/futex.c b/kernel/futex.c index aedb36c0fd92..39c2b3eda1b9 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -2923,10 +2923,13 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, */ WARN_ON(!q.pi_state); pi_mutex = &q.pi_state->pi_mutex; - ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter); - debug_rt_mutex_free_waiter(&rt_waiter); + ret = rt_mutex_wait_proxy_lock(pi_mutex, to, &rt_waiter); spin_lock(q.lock_ptr); + if (ret && !rt_mutex_cleanup_proxy_lock(pi_mutex, &rt_waiter)) + ret = 0; + + debug_rt_mutex_free_waiter(&rt_waiter); /* * Fixup the pi_state owner and possibly acquire the lock if we * haven't already. @@ -3064,6 +3067,10 @@ int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi) { u32 uval, uninitialized_var(nval), mval; + /* Futex address must be 32bit aligned */ + if ((((unsigned long)uaddr) % sizeof(*uaddr)) != 0) + return -1; + retry: if (get_user(uval, uaddr)) return -1; diff --git a/kernel/hung_task.c b/kernel/hung_task.c index e0f90c2b57aa..cc05b97ba569 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -30,7 +30,7 @@ int __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT; * is disabled during the critical section. It also controls the size of * the RCU grace period. So it needs to be upper-bound. */ -#define HUNG_TASK_BATCHING 1024 +#define HUNG_TASK_LOCK_BREAK (HZ / 10) /* * Zero means infinite timeout - no checking done: @@ -158,7 +158,7 @@ static bool rcu_lock_break(struct task_struct *g, struct task_struct *t) static void check_hung_uninterruptible_tasks(unsigned long timeout) { int max_count = sysctl_hung_task_check_count; - int batch_count = HUNG_TASK_BATCHING; + unsigned long last_break = jiffies; struct task_struct *g, *t; /* @@ -172,10 +172,10 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout) for_each_process_thread(g, t) { if (!max_count--) goto unlock; - if (!--batch_count) { - batch_count = HUNG_TASK_BATCHING; + if (time_after(jiffies, last_break + HUNG_TASK_LOCK_BREAK)) { if (!rcu_lock_break(g, t)) goto unlock; + last_break = jiffies; } /* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */ if (t->state == TASK_UNINTERRUPTIBLE) diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 6a0cd0d585d2..7ebb4bfb4492 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -1098,6 +1098,10 @@ int irq_chip_set_vcpu_affinity_parent(struct irq_data *data, void *vcpu_info) int irq_chip_set_wake_parent(struct irq_data *data, unsigned int on) { data = data->parent_data; + + if (data->chip->flags & IRQCHIP_SKIP_SET_WAKE) + return 0; + if (data->chip->irq_set_wake) return data->chip->irq_set_wake(data, on); diff --git a/kernel/kprobes.c b/kernel/kprobes.c index d8daf6c55d2b..a53998cba804 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -668,7 +668,6 @@ static void unoptimize_kprobe(struct kprobe *p, bool force) static int reuse_unused_kprobe(struct kprobe *ap) { struct optimized_kprobe *op; - int ret; BUG_ON(!kprobe_unused(ap)); /* @@ -682,9 +681,8 @@ static int reuse_unused_kprobe(struct kprobe *ap) /* Enable the probe again */ ap->flags &= ~KPROBE_FLAG_DISABLED; /* Optimize it again (remove from op->list) */ - ret = kprobe_optready(ap); - if (ret) - return ret; + if (!kprobe_optready(ap)) + return -EINVAL; optimize_kprobe(ap); return 0; diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index b066724d7a5b..dd173df9ee5e 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -1712,21 +1712,23 @@ struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock) } /** - * rt_mutex_finish_proxy_lock() - Complete lock acquisition + * rt_mutex_wait_proxy_lock() - Wait for lock acquisition * @lock: the rt_mutex we were woken on * @to: the timeout, null if none. hrtimer should already have * been started. * @waiter: the pre-initialized rt_mutex_waiter * - * Complete the lock acquisition started our behalf by another thread. + * Wait for the the lock acquisition started on our behalf by + * rt_mutex_start_proxy_lock(). Upon failure, the caller must call + * rt_mutex_cleanup_proxy_lock(). * * Returns: * 0 - success * <0 - error, one of -EINTR, -ETIMEDOUT * - * Special API call for PI-futex requeue support + * Special API call for PI-futex support */ -int rt_mutex_finish_proxy_lock(struct rt_mutex *lock, +int rt_mutex_wait_proxy_lock(struct rt_mutex *lock, struct hrtimer_sleeper *to, struct rt_mutex_waiter *waiter) { @@ -1739,9 +1741,6 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock, /* sleep on the mutex */ ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter); - if (unlikely(ret)) - remove_waiter(lock, waiter); - /* * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might * have to fix that up. @@ -1752,3 +1751,42 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock, return ret; } + +/** + * rt_mutex_cleanup_proxy_lock() - Cleanup failed lock acquisition + * @lock: the rt_mutex we were woken on + * @waiter: the pre-initialized rt_mutex_waiter + * + * Attempt to clean up after a failed rt_mutex_wait_proxy_lock(). + * + * Unless we acquired the lock; we're still enqueued on the wait-list and can + * in fact still be granted ownership until we're removed. Therefore we can + * find we are in fact the owner and must disregard the + * rt_mutex_wait_proxy_lock() failure. + * + * Returns: + * true - did the cleanup, we done. + * false - we acquired the lock after rt_mutex_wait_proxy_lock() returned, + * caller should disregards its return value. + * + * Special API call for PI-futex support + */ +bool rt_mutex_cleanup_proxy_lock(struct rt_mutex *lock, + struct rt_mutex_waiter *waiter) +{ + bool cleanup = false; + + raw_spin_lock_irq(&lock->wait_lock); + /* + * Unless we're the owner; we're still enqueued on the wait_list. + * So check if we became owner, if not, take us off the wait_list. + */ + if (rt_mutex_owner(lock) != current) { + remove_waiter(lock, waiter); + fixup_rt_mutex_waiters(lock); + cleanup = true; + } + raw_spin_unlock_irq(&lock->wait_lock); + + return cleanup; +} diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h index e317e1cbb3eb..6f8f68edb700 100644 --- a/kernel/locking/rtmutex_common.h +++ b/kernel/locking/rtmutex_common.h @@ -106,9 +106,11 @@ extern void rt_mutex_proxy_unlock(struct rt_mutex *lock, extern int rt_mutex_start_proxy_lock(struct rt_mutex *lock, struct rt_mutex_waiter *waiter, struct task_struct *task); -extern int rt_mutex_finish_proxy_lock(struct rt_mutex *lock, - struct hrtimer_sleeper *to, - struct rt_mutex_waiter *waiter); +extern int rt_mutex_wait_proxy_lock(struct rt_mutex *lock, + struct hrtimer_sleeper *to, + struct rt_mutex_waiter *waiter); +extern bool rt_mutex_cleanup_proxy_lock(struct rt_mutex *lock, + struct rt_mutex_waiter *waiter); extern int rt_mutex_timed_futex_lock(struct rt_mutex *l, struct hrtimer_sleeper *to); extern bool rt_mutex_futex_unlock(struct rt_mutex *lock, struct wake_q_head *wqh); diff --git a/kernel/memremap.c b/kernel/memremap.c index f719c925cb54..1be42f9b3e00 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -171,15 +171,12 @@ void *devm_memremap_pages(struct device *dev, struct resource *res) struct page_map *page_map; int error, nid; - if (is_ram == REGION_MIXED) { - WARN_ONCE(1, "%s attempted on mixed region %pr\n", - __func__, res); + if (is_ram != REGION_DISJOINT) { + WARN_ONCE(1, "%s attempted on %s region %pr\n", __func__, + is_ram == REGION_MIXED ? "mixed" : "ram", res); return ERR_PTR(-ENXIO); } - if (is_ram == REGION_INTERSECTS) - return __va(res->start); - page_map = devres_alloc_node(devm_memremap_pages_release, sizeof(*page_map), GFP_KERNEL, dev_to_node(dev)); if (!page_map) @@ -202,5 +199,5 @@ void *devm_memremap_pages(struct device *dev, struct resource *res) devres_add(dev, page_map); return __va(res->start); } -EXPORT_SYMBOL(devm_memremap_pages); +EXPORT_SYMBOL_GPL(devm_memremap_pages); #endif /* CONFIG_ZONE_DEVICE */ diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 8a62cbfe1f2f..082aedefe29c 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -1611,15 +1611,23 @@ static int rcu_future_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp) } /* - * Awaken the grace-period kthread for the specified flavor of RCU. - * Don't do a self-awaken, and don't bother awakening when there is - * nothing for the grace-period kthread to do (as in several CPUs - * raced to awaken, and we lost), and finally don't try to awaken - * a kthread that has not yet been created. + * Awaken the grace-period kthread. Don't do a self-awaken (unless in + * an interrupt or softirq handler), and don't bother awakening when there + * is nothing for the grace-period kthread to do (as in several CPUs raced + * to awaken, and we lost), and finally don't try to awaken a kthread that + * has not yet been created. If all those checks are passed, track some + * debug information and awaken. + * + * So why do the self-wakeup when in an interrupt or softirq handler + * in the grace-period kthread's context? Because the kthread might have + * been interrupted just as it was going to sleep, and just after the final + * pre-sleep check of the awaken condition. In this case, a wakeup really + * is required, and is therefore supplied. */ static void rcu_gp_kthread_wake(struct rcu_state *rsp) { - if (current == rsp->gp_kthread || + if ((current == rsp->gp_kthread && + !in_interrupt() && !in_serving_softirq()) || !READ_ONCE(rsp->gp_flags) || !rsp->gp_kthread) return; @@ -3817,7 +3825,7 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp) continue; rdp = per_cpu_ptr(rsp->rda, cpu); pr_cont(" %d-%c%c%c", cpu, - "O."[cpu_online(cpu)], + "O."[!!cpu_online(cpu)], "o."[!!(rdp->grpmask & rnp->expmaskinit)], "N."[!!(rdp->grpmask & rnp->expmaskinitnext)]); } diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index eb952ebf53b1..84994a9adae5 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4478,12 +4478,15 @@ static enum hrtimer_restart sched_cfs_slack_timer(struct hrtimer *timer) return HRTIMER_NORESTART; } +extern const u64 max_cfs_quota_period; + static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer) { struct cfs_bandwidth *cfs_b = container_of(timer, struct cfs_bandwidth, period_timer); int overrun; int idle = 0; + int count = 0; raw_spin_lock(&cfs_b->lock); for (;;) { @@ -4491,6 +4494,28 @@ static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer) if (!overrun) break; + if (++count > 3) { + u64 new, old = ktime_to_ns(cfs_b->period); + + new = (old * 147) / 128; /* ~115% */ + new = min(new, max_cfs_quota_period); + + cfs_b->period = ns_to_ktime(new); + + /* since max is 1s, this is limited to 1e9^2, which fits in u64 */ + cfs_b->quota *= new; + cfs_b->quota = div64_u64(cfs_b->quota, old); + + pr_warn_ratelimited( + "cfs_period_timer[cpu%d]: period too short, scaling up (new cfs_period_us %lld, cfs_quota_us = %lld)\n", + smp_processor_id(), + div_u64(new, NSEC_PER_USEC), + div_u64(cfs_b->quota, NSEC_PER_USEC)); + + /* reset count so we don't come right back in here */ + count = 0; + } + idle = do_sched_cfs_period_timer(cfs_b, overrun); } if (idle) @@ -7783,10 +7808,10 @@ static void update_cfs_rq_h_load(struct cfs_rq *cfs_rq) if (cfs_rq->last_h_load_update == now) return; - cfs_rq->h_load_next = NULL; + WRITE_ONCE(cfs_rq->h_load_next, NULL); for_each_sched_entity(se) { cfs_rq = cfs_rq_of(se); - cfs_rq->h_load_next = se; + WRITE_ONCE(cfs_rq->h_load_next, se); if (cfs_rq->last_h_load_update == now) break; } @@ -7796,7 +7821,7 @@ static void update_cfs_rq_h_load(struct cfs_rq *cfs_rq) cfs_rq->last_h_load_update = now; } - while ((se = cfs_rq->h_load_next) != NULL) { + while ((se = READ_ONCE(cfs_rq->h_load_next)) != NULL) { load = cfs_rq->h_load; load = div64_ul(load * se->avg.load_avg, cfs_rq_load_avg(cfs_rq) + 1); diff --git a/kernel/signal.c b/kernel/signal.c index fcd9a136cd17..425489c951b5 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -696,6 +696,48 @@ static inline bool si_fromuser(const struct siginfo *info) (!is_si_special(info) && SI_FROMUSER(info)); } +static int dequeue_synchronous_signal(siginfo_t *info) +{ + struct task_struct *tsk = current; + struct sigpending *pending = &tsk->pending; + struct sigqueue *q, *sync = NULL; + + /* + * Might a synchronous signal be in the queue? + */ + if (!((pending->signal.sig[0] & ~tsk->blocked.sig[0]) & SYNCHRONOUS_MASK)) + return 0; + + /* + * Return the first synchronous signal in the queue. + */ + list_for_each_entry(q, &pending->list, list) { + /* Synchronous signals have a postive si_code */ + if ((q->info.si_code > SI_USER) && + (sigmask(q->info.si_signo) & SYNCHRONOUS_MASK)) { + sync = q; + goto next; + } + } + return 0; +next: + /* + * Check if there is another siginfo for the same signal. + */ + list_for_each_entry_continue(q, &pending->list, list) { + if (q->info.si_signo == sync->info.si_signo) + goto still_pending; + } + + sigdelset(&pending->signal, sync->info.si_signo); + recalc_sigpending(); +still_pending: + list_del_init(&sync->list); + copy_siginfo(info, &sync->info); + __sigqueue_free(sync); + return info->si_signo; +} + /* * called with RCU read lock from check_kill_permission() */ @@ -2198,6 +2240,14 @@ relock: goto relock; } + /* Has this task already been marked for death? */ + if (signal_group_exit(signal)) { + ksig->info.si_signo = signr = SIGKILL; + sigdelset(¤t->pending.signal, SIGKILL); + recalc_sigpending(); + goto fatal; + } + for (;;) { struct k_sigaction *ka; @@ -2211,7 +2261,15 @@ relock: goto relock; } - signr = dequeue_signal(current, ¤t->blocked, &ksig->info); + /* + * Signals generated by the execution of an instruction + * need to be delivered before any other pending signals + * so that the instruction pointer in the signal stack + * frame points to the faulting instruction. + */ + signr = dequeue_synchronous_signal(&ksig->info); + if (!signr) + signr = dequeue_signal(current, ¤t->blocked, &ksig->info); if (!signr) break; /* will return 0 */ @@ -2293,6 +2351,7 @@ relock: continue; } + fatal: spin_unlock_irq(&sighand->siglock); /* diff --git a/kernel/sysctl.c b/kernel/sysctl.c index bf19e8cbcb51..28df0358ba94 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -126,7 +126,9 @@ static int zero; static int __maybe_unused one = 1; static int __maybe_unused two = 2; static int __maybe_unused four = 4; +static unsigned long zero_ul; static unsigned long one_ul = 1; +static unsigned long long_max = LONG_MAX; static int one_hundred = 100; #ifdef CONFIG_PRINTK static int ten_thousand = 10000; @@ -1693,6 +1695,8 @@ static struct ctl_table fs_table[] = { .maxlen = sizeof(files_stat.max_files), .mode = 0644, .proc_handler = proc_doulongvec_minmax, + .extra1 = &zero_ul, + .extra2 = &long_max, }, { .procname = "nr_open", @@ -2396,7 +2400,16 @@ static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp, { struct do_proc_dointvec_minmax_conv_param *param = data; if (write) { - int val = *negp ? -*lvalp : *lvalp; + int val; + if (*negp) { + if (*lvalp > (unsigned long) INT_MAX + 1) + return -EINVAL; + val = -*lvalp; + } else { + if (*lvalp > (unsigned long) INT_MAX) + return -EINVAL; + val = *lvalp; + } if ((param->min && *param->min > val) || (param->max && *param->max < val)) return -EINVAL; diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index 80016b329d94..8fc68e60c795 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -103,7 +103,7 @@ static void bump_cpu_timer(struct k_itimer *timer, continue; timer->it.cpu.expires += incr; - timer->it_overrun += 1 << i; + timer->it_overrun += 1LL << i; delta -= incr; } } diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index fc7c37ad90a0..0e6ed2e7d066 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -355,6 +355,17 @@ static __init int init_posix_timers(void) __initcall(init_posix_timers); +/* + * The siginfo si_overrun field and the return value of timer_getoverrun(2) + * are of type int. Clamp the overrun value to INT_MAX + */ +static inline int timer_overrun_to_int(struct k_itimer *timr, int baseval) +{ + s64 sum = timr->it_overrun_last + (s64)baseval; + + return sum > (s64)INT_MAX ? INT_MAX : (int)sum; +} + static void schedule_next_timer(struct k_itimer *timr) { struct hrtimer *timer = &timr->it.real.timer; @@ -362,12 +373,11 @@ static void schedule_next_timer(struct k_itimer *timr) if (timr->it.real.interval.tv64 == 0) return; - timr->it_overrun += (unsigned int) hrtimer_forward(timer, - timer->base->get_time(), - timr->it.real.interval); + timr->it_overrun += hrtimer_forward(timer, timer->base->get_time(), + timr->it.real.interval); timr->it_overrun_last = timr->it_overrun; - timr->it_overrun = -1; + timr->it_overrun = -1LL; ++timr->it_requeue_pending; hrtimer_restart(timer); } @@ -396,7 +406,7 @@ void do_schedule_next_timer(struct siginfo *info) else schedule_next_timer(timr); - info->si_overrun += timr->it_overrun_last; + info->si_overrun = timer_overrun_to_int(timr, info->si_overrun); } if (timr) @@ -491,8 +501,7 @@ static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer) now = ktime_add(now, kj); } #endif - timr->it_overrun += (unsigned int) - hrtimer_forward(timer, now, + timr->it_overrun += hrtimer_forward(timer, now, timr->it.real.interval); ret = HRTIMER_RESTART; ++timr->it_requeue_pending; @@ -633,7 +642,7 @@ SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock, it_id_set = IT_ID_SET; new_timer->it_id = (timer_t) new_timer_id; new_timer->it_clock = which_clock; - new_timer->it_overrun = -1; + new_timer->it_overrun = -1LL; if (timer_event_spec) { if (copy_from_user(&event, timer_event_spec, sizeof (event))) { @@ -762,7 +771,7 @@ common_timer_get(struct k_itimer *timr, struct itimerspec *cur_setting) */ if (iv.tv64 && (timr->it_requeue_pending & REQUEUE_PENDING || timr->it_sigev_notify == SIGEV_NONE)) - timr->it_overrun += (unsigned int) hrtimer_forward(timer, now, iv); + timr->it_overrun += hrtimer_forward(timer, now, iv); remaining = __hrtimer_expires_remaining_adjusted(timer, now); /* Return 0 only, when the timer is expired and not pending */ @@ -824,7 +833,7 @@ SYSCALL_DEFINE1(timer_getoverrun, timer_t, timer_id) if (!timr) return -EINVAL; - overrun = timr->it_overrun_last; + overrun = timer_overrun_to_int(timr, 0); unlock_timer(timr, flags); return overrun; diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 7902ecbce8ec..7d9067f1c056 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -39,7 +39,9 @@ static struct { seqcount_t seq; struct timekeeper timekeeper; -} tk_core ____cacheline_aligned; +} tk_core ____cacheline_aligned = { + .seq = SEQCNT_ZERO(tk_core.seq), +}; static DEFINE_RAW_SPINLOCK(timekeeper_lock); static struct timekeeper shadow_timekeeper; diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index ef4f16e81283..1407ed20ea93 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c @@ -399,7 +399,7 @@ static int __init init_timer_list_procfs(void) { struct proc_dir_entry *pe; - pe = proc_create("timer_list", 0444, NULL, &timer_list_fops); + pe = proc_create("timer_list", 0400, NULL, &timer_list_fops); if (!pe) return -ENOMEM; return 0; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 4228fd3682c3..3dd40c736067 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -119,11 +119,13 @@ static u64 bpf_trace_printk(u64 r1, u64 fmt_size, u64 r3, u64 r4, u64 r5) i++; } else if (fmt[i] == 'p' || fmt[i] == 's') { mod[fmt_cnt]++; - i++; - if (!isspace(fmt[i]) && !ispunct(fmt[i]) && fmt[i] != 0) + /* disallow any further format extensions */ + if (fmt[i + 1] != 0 && + !isspace(fmt[i + 1]) && + !ispunct(fmt[i + 1])) return -EINVAL; fmt_cnt++; - if (fmt[i - 1] == 's') { + if (fmt[i] == 's') { if (str_seen) /* allow only one '%s' per fmt string */ return -EINVAL; diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index ac758a53fcea..6380ec0453e0 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -32,6 +32,7 @@ #include #include #include +#include #include @@ -4767,6 +4768,7 @@ void ftrace_destroy_filter_files(struct ftrace_ops *ops) if (ops->flags & FTRACE_OPS_FL_ENABLED) ftrace_shutdown(ops, 0); ops->flags |= FTRACE_OPS_FL_DELETED; + ftrace_free_filter(ops); mutex_unlock(&ftrace_lock); } @@ -5164,7 +5166,7 @@ static struct ftrace_ops control_ops = { INIT_OPS_HASH(control_ops) }; -static inline void +static nokprobe_inline void __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *ignored, struct pt_regs *regs) { @@ -5213,11 +5215,13 @@ static void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, { __ftrace_ops_list_func(ip, parent_ip, NULL, regs); } +NOKPROBE_SYMBOL(ftrace_ops_list_func); #else static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip) { __ftrace_ops_list_func(ip, parent_ip, NULL, NULL); } +NOKPROBE_SYMBOL(ftrace_ops_no_ops); #endif /* @@ -5238,6 +5242,7 @@ static void ftrace_ops_recurs_func(unsigned long ip, unsigned long parent_ip, trace_clear_recursion(bit); } +NOKPROBE_SYMBOL(ftrace_ops_recurs_func); /** * ftrace_ops_get_func - get the function a trampoline should call diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 74b20e3ab8c6..5e091614fe39 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -4042,6 +4042,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_consume); * ring_buffer_read_prepare - Prepare for a non consuming read of the buffer * @buffer: The ring buffer to read from * @cpu: The cpu buffer to iterate over + * @flags: gfp flags to use for memory allocation * * This performs the initial preparations necessary to iterate * through the buffer. Memory is allocated, buffer recording @@ -4059,7 +4060,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_consume); * This overall must be paired with ring_buffer_read_finish. */ struct ring_buffer_iter * -ring_buffer_read_prepare(struct ring_buffer *buffer, int cpu) +ring_buffer_read_prepare(struct ring_buffer *buffer, int cpu, gfp_t flags) { struct ring_buffer_per_cpu *cpu_buffer; struct ring_buffer_iter *iter; @@ -4067,7 +4068,7 @@ ring_buffer_read_prepare(struct ring_buffer *buffer, int cpu) if (!cpumask_test_cpu(cpu, buffer->cpumask)) return NULL; - iter = kmalloc(sizeof(*iter), GFP_KERNEL); + iter = kmalloc(sizeof(*iter), flags); if (!iter) return NULL; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 88bb99b96150..5814e7386109 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3194,7 +3194,8 @@ __tracing_open(struct inode *inode, struct file *file, bool snapshot) if (iter->cpu_file == RING_BUFFER_ALL_CPUS) { for_each_tracing_cpu(cpu) { iter->buffer_iter[cpu] = - ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu); + ring_buffer_read_prepare(iter->trace_buffer->buffer, + cpu, GFP_KERNEL); } ring_buffer_read_prepare_sync(); for_each_tracing_cpu(cpu) { @@ -3204,7 +3205,8 @@ __tracing_open(struct inode *inode, struct file *file, bool snapshot) } else { cpu = iter->cpu_file; iter->buffer_iter[cpu] = - ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu); + ring_buffer_read_prepare(iter->trace_buffer->buffer, + cpu, GFP_KERNEL); ring_buffer_read_prepare_sync(); ring_buffer_read_start(iter->buffer_iter[cpu]); tracing_iter_reset(iter, cpu); @@ -4798,7 +4800,6 @@ out: return ret; fail: - kfree(iter->trace); kfree(iter); __trace_array_put(tr); mutex_unlock(&trace_types_lock); diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index b8a894adab2c..8be66a2b0cac 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -727,8 +727,10 @@ static int set_trigger_filter(char *filter_str, /* The filter is for the 'trigger' event, not the triggered event */ ret = create_event_filter(file->event_call, filter_str, false, &filter); - if (ret) - goto out; + /* + * If create_event_filter() fails, filter still needs to be freed. + * Which the calling code will do with data->filter. + */ assign: tmp = rcu_access_pointer(data->filter); diff --git a/kernel/trace/trace_kdb.c b/kernel/trace/trace_kdb.c index 57149bce6aad..896458285fdd 100644 --- a/kernel/trace/trace_kdb.c +++ b/kernel/trace/trace_kdb.c @@ -50,14 +50,16 @@ static void ftrace_dump_buf(int skip_lines, long cpu_file) if (cpu_file == RING_BUFFER_ALL_CPUS) { for_each_tracing_cpu(cpu) { iter.buffer_iter[cpu] = - ring_buffer_read_prepare(iter.trace_buffer->buffer, cpu); + ring_buffer_read_prepare(iter.trace_buffer->buffer, + cpu, GFP_ATOMIC); ring_buffer_read_start(iter.buffer_iter[cpu]); tracing_iter_reset(&iter, cpu); } } else { iter.cpu_file = cpu_file; iter.buffer_iter[cpu_file] = - ring_buffer_read_prepare(iter.trace_buffer->buffer, cpu_file); + ring_buffer_read_prepare(iter.trace_buffer->buffer, + cpu_file, GFP_ATOMIC); ring_buffer_read_start(iter.buffer_iter[cpu_file]); tracing_iter_reset(&iter, cpu_file); } diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 1dc887bab085..518e62a398d2 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -150,7 +150,14 @@ static void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs, ret = strncpy_from_user(dst, src, maxlen); if (ret == maxlen) - dst[--ret] = '\0'; + dst[ret - 1] = '\0'; + else if (ret >= 0) + /* + * Include the terminating null byte. In this case it + * was copied by strncpy_from_user but not accounted + * for in ret. + */ + ret++; if (ret < 0) { /* Failed to fetch string */ ((u8 *)get_rloc_data(dest))[0] = '\0'; diff --git a/lib/assoc_array.c b/lib/assoc_array.c index 5cd093589c5a..3b46c5433b7a 100644 --- a/lib/assoc_array.c +++ b/lib/assoc_array.c @@ -781,9 +781,11 @@ all_leaves_cluster_together: new_s0->index_key[i] = ops->get_key_chunk(index_key, i * ASSOC_ARRAY_KEY_CHUNK_SIZE); - blank = ULONG_MAX << (level & ASSOC_ARRAY_KEY_CHUNK_MASK); - pr_devel("blank off [%zu] %d: %lx\n", keylen - 1, level, blank); - new_s0->index_key[keylen - 1] &= ~blank; + if (level & ASSOC_ARRAY_KEY_CHUNK_MASK) { + blank = ULONG_MAX << (level & ASSOC_ARRAY_KEY_CHUNK_MASK); + pr_devel("blank off [%zu] %d: %lx\n", keylen - 1, level, blank); + new_s0->index_key[keylen - 1] &= ~blank; + } /* This now reduces to a node splitting exercise for which we'll need * to regenerate the disparity table. diff --git a/lib/bsearch.c b/lib/bsearch.c index e33c179089db..d50048446b77 100644 --- a/lib/bsearch.c +++ b/lib/bsearch.c @@ -11,6 +11,7 @@ #include #include +#include /* * bsearch - binary search an array of elements @@ -51,3 +52,4 @@ void *bsearch(const void *key, const void *base, size_t num, size_t size, return NULL; } EXPORT_SYMBOL(bsearch); +NOKPROBE_SYMBOL(bsearch); diff --git a/lib/debugobjects.c b/lib/debugobjects.c index a26328ec39f1..bb37541cd441 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -1088,7 +1088,8 @@ void __init debug_objects_mem_init(void) obj_cache = kmem_cache_create("debug_objects_cache", sizeof (struct debug_obj), 0, - SLAB_DEBUG_OBJECTS, NULL); + SLAB_DEBUG_OBJECTS | SLAB_NOLEAKTRACE, + NULL); if (!obj_cache || debug_objects_replace_static_objects()) { debug_objects_enabled = 0; diff --git a/lib/div64.c b/lib/div64.c index 62a698a432bc..75b8521c2146 100644 --- a/lib/div64.c +++ b/lib/div64.c @@ -100,7 +100,7 @@ u64 div64_u64_rem(u64 dividend, u64 divisor, u64 *remainder) quot = div_u64_rem(dividend, divisor, &rem32); *remainder = rem32; } else { - int n = 1 + fls(high); + int n = fls(high); quot = div_u64(dividend >> n, divisor >> n); if (quot != 0) @@ -138,7 +138,7 @@ u64 div64_u64(u64 dividend, u64 divisor) if (high == 0) { quot = div_u64(dividend, divisor); } else { - int n = 1 + fls(high); + int n = fls(high); quot = div_u64(dividend >> n, divisor >> n); if (quot != 0) diff --git a/lib/int_sqrt.c b/lib/int_sqrt.c index 1ef4cc344977..6d35274170bc 100644 --- a/lib/int_sqrt.c +++ b/lib/int_sqrt.c @@ -7,6 +7,7 @@ #include #include +#include /** * int_sqrt - rough approximation to sqrt @@ -21,7 +22,7 @@ unsigned long int_sqrt(unsigned long x) if (x <= 1) return x; - m = 1UL << (BITS_PER_LONG - 2); + m = 1UL << (__fls(x) & ~1UL); while (m != 0) { b = y + m; y >>= 1; diff --git a/lib/interval_tree_test.c b/lib/interval_tree_test.c index 245900b98c8e..222c8010bda0 100644 --- a/lib/interval_tree_test.c +++ b/lib/interval_tree_test.c @@ -1,27 +1,38 @@ #include +#include #include #include +#include #include -#define NODES 100 -#define PERF_LOOPS 100000 -#define SEARCHES 100 -#define SEARCH_LOOPS 10000 +#define __param(type, name, init, msg) \ + static type name = init; \ + module_param(name, type, 0444); \ + MODULE_PARM_DESC(name, msg); + +__param(int, nnodes, 100, "Number of nodes in the interval tree"); +__param(int, perf_loops, 1000, "Number of iterations modifying the tree"); + +__param(int, nsearches, 100, "Number of searches to the interval tree"); +__param(int, search_loops, 1000, "Number of iterations searching the tree"); +__param(bool, search_all, false, "Searches will iterate all nodes in the tree"); + +__param(uint, max_endpoint, ~0, "Largest value for the interval's endpoint"); static struct rb_root root = RB_ROOT; -static struct interval_tree_node nodes[NODES]; -static u32 queries[SEARCHES]; +static struct interval_tree_node *nodes = NULL; +static u32 *queries = NULL; static struct rnd_state rnd; static inline unsigned long -search(unsigned long query, struct rb_root *root) +search(struct rb_root *root, unsigned long start, unsigned long last) { struct interval_tree_node *node; unsigned long results = 0; - for (node = interval_tree_iter_first(root, query, query); node; - node = interval_tree_iter_next(node, query, query)) + for (node = interval_tree_iter_first(root, start, last); node; + node = interval_tree_iter_next(node, start, last)) results++; return results; } @@ -29,19 +40,22 @@ search(unsigned long query, struct rb_root *root) static void init(void) { int i; - for (i = 0; i < NODES; i++) { - u32 a = prandom_u32_state(&rnd); - u32 b = prandom_u32_state(&rnd); - if (a <= b) { - nodes[i].start = a; - nodes[i].last = b; - } else { - nodes[i].start = b; - nodes[i].last = a; - } + + for (i = 0; i < nnodes; i++) { + u32 b = (prandom_u32_state(&rnd) >> 4) % max_endpoint; + u32 a = (prandom_u32_state(&rnd) >> 4) % b; + + nodes[i].start = a; + nodes[i].last = b; } - for (i = 0; i < SEARCHES; i++) - queries[i] = prandom_u32_state(&rnd); + + /* + * Limit the search scope to what the user defined. + * Otherwise we are merely measuring empty walks, + * which is pointless. + */ + for (i = 0; i < nsearches; i++) + queries[i] = (prandom_u32_state(&rnd) >> 4) % max_endpoint; } static int interval_tree_test_init(void) @@ -50,6 +64,16 @@ static int interval_tree_test_init(void) unsigned long results; cycles_t time1, time2, time; + nodes = kmalloc(nnodes * sizeof(struct interval_tree_node), GFP_KERNEL); + if (!nodes) + return -ENOMEM; + + queries = kmalloc(nsearches * sizeof(int), GFP_KERNEL); + if (!queries) { + kfree(nodes); + return -ENOMEM; + } + printk(KERN_ALERT "interval tree insert/remove"); prandom_seed_state(&rnd, 3141592653589793238ULL); @@ -57,39 +81,46 @@ static int interval_tree_test_init(void) time1 = get_cycles(); - for (i = 0; i < PERF_LOOPS; i++) { - for (j = 0; j < NODES; j++) + for (i = 0; i < perf_loops; i++) { + for (j = 0; j < nnodes; j++) interval_tree_insert(nodes + j, &root); - for (j = 0; j < NODES; j++) + for (j = 0; j < nnodes; j++) interval_tree_remove(nodes + j, &root); } time2 = get_cycles(); time = time2 - time1; - time = div_u64(time, PERF_LOOPS); + time = div_u64(time, perf_loops); printk(" -> %llu cycles\n", (unsigned long long)time); printk(KERN_ALERT "interval tree search"); - for (j = 0; j < NODES; j++) + for (j = 0; j < nnodes; j++) interval_tree_insert(nodes + j, &root); time1 = get_cycles(); results = 0; - for (i = 0; i < SEARCH_LOOPS; i++) - for (j = 0; j < SEARCHES; j++) - results += search(queries[j], &root); + for (i = 0; i < search_loops; i++) + for (j = 0; j < nsearches; j++) { + unsigned long start = search_all ? 0 : queries[j]; + unsigned long last = search_all ? max_endpoint : queries[j]; + + results += search(&root, start, last); + } time2 = get_cycles(); time = time2 - time1; - time = div_u64(time, SEARCH_LOOPS); - results = div_u64(results, SEARCH_LOOPS); + time = div_u64(time, search_loops); + results = div_u64(results, search_loops); printk(" -> %llu cycles (%lu results)\n", (unsigned long long)time, results); + kfree(queries); + kfree(nodes); + return -EAGAIN; /* Fail will directly unload the module */ } diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile index 3b10a48fa040..a84efd4aad37 100644 --- a/lib/raid6/Makefile +++ b/lib/raid6/Makefile @@ -23,7 +23,7 @@ endif ifeq ($(CONFIG_KERNEL_MODE_NEON),y) NEON_FLAGS := -ffreestanding ifeq ($(ARCH),arm) -NEON_FLAGS += -mfloat-abi=softfp -mfpu=neon +NEON_FLAGS += -march=armv7-a -mfloat-abi=softfp -mfpu=neon endif ifeq ($(ARCH),arm64) CFLAGS_REMOVE_neon1.o += -mgeneral-regs-only diff --git a/lib/rbtree_test.c b/lib/rbtree_test.c index 8b3c9dc88262..afedd3770562 100644 --- a/lib/rbtree_test.c +++ b/lib/rbtree_test.c @@ -1,11 +1,18 @@ #include +#include #include #include +#include #include -#define NODES 100 -#define PERF_LOOPS 100000 -#define CHECK_LOOPS 100 +#define __param(type, name, init, msg) \ + static type name = init; \ + module_param(name, type, 0444); \ + MODULE_PARM_DESC(name, msg); + +__param(int, nnodes, 100, "Number of nodes in the rb-tree"); +__param(int, perf_loops, 1000, "Number of iterations modifying the rb-tree"); +__param(int, check_loops, 100, "Number of iterations modifying and verifying the rb-tree"); struct test_node { u32 key; @@ -17,7 +24,7 @@ struct test_node { }; static struct rb_root root = RB_ROOT; -static struct test_node nodes[NODES]; +static struct test_node *nodes = NULL; static struct rnd_state rnd; @@ -95,7 +102,7 @@ static void erase_augmented(struct test_node *node, struct rb_root *root) static void init(void) { int i; - for (i = 0; i < NODES; i++) { + for (i = 0; i < nnodes; i++) { nodes[i].key = prandom_u32_state(&rnd); nodes[i].val = prandom_u32_state(&rnd); } @@ -177,6 +184,10 @@ static int __init rbtree_test_init(void) int i, j; cycles_t time1, time2, time; + nodes = kmalloc(nnodes * sizeof(*nodes), GFP_KERNEL); + if (!nodes) + return -ENOMEM; + printk(KERN_ALERT "rbtree testing"); prandom_seed_state(&rnd, 3141592653589793238ULL); @@ -184,27 +195,27 @@ static int __init rbtree_test_init(void) time1 = get_cycles(); - for (i = 0; i < PERF_LOOPS; i++) { - for (j = 0; j < NODES; j++) + for (i = 0; i < perf_loops; i++) { + for (j = 0; j < nnodes; j++) insert(nodes + j, &root); - for (j = 0; j < NODES; j++) + for (j = 0; j < nnodes; j++) erase(nodes + j, &root); } time2 = get_cycles(); time = time2 - time1; - time = div_u64(time, PERF_LOOPS); + time = div_u64(time, perf_loops); printk(" -> %llu cycles\n", (unsigned long long)time); - for (i = 0; i < CHECK_LOOPS; i++) { + for (i = 0; i < check_loops; i++) { init(); - for (j = 0; j < NODES; j++) { + for (j = 0; j < nnodes; j++) { check(j); insert(nodes + j, &root); } - for (j = 0; j < NODES; j++) { - check(NODES - j); + for (j = 0; j < nnodes; j++) { + check(nnodes - j); erase(nodes + j, &root); } check(0); @@ -216,32 +227,34 @@ static int __init rbtree_test_init(void) time1 = get_cycles(); - for (i = 0; i < PERF_LOOPS; i++) { - for (j = 0; j < NODES; j++) + for (i = 0; i < perf_loops; i++) { + for (j = 0; j < nnodes; j++) insert_augmented(nodes + j, &root); - for (j = 0; j < NODES; j++) + for (j = 0; j < nnodes; j++) erase_augmented(nodes + j, &root); } time2 = get_cycles(); time = time2 - time1; - time = div_u64(time, PERF_LOOPS); + time = div_u64(time, perf_loops); printk(" -> %llu cycles\n", (unsigned long long)time); - for (i = 0; i < CHECK_LOOPS; i++) { + for (i = 0; i < check_loops; i++) { init(); - for (j = 0; j < NODES; j++) { + for (j = 0; j < nnodes; j++) { check_augmented(j); insert_augmented(nodes + j, &root); } - for (j = 0; j < NODES; j++) { - check_augmented(NODES - j); + for (j = 0; j < nnodes; j++) { + check_augmented(nnodes - j); erase_augmented(nodes + j, &root); } check_augmented(0); } + kfree(nodes); + return -EAGAIN; /* Fail will directly unload the module */ } diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 37ea94b636a3..7bb8649429bf 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -250,8 +250,10 @@ static int rhashtable_rehash_table(struct rhashtable *ht) if (!new_tbl) return 0; - for (old_hash = 0; old_hash < old_tbl->size; old_hash++) + for (old_hash = 0; old_hash < old_tbl->size; old_hash++) { rhashtable_rehash_chain(ht, old_hash); + cond_resched(); + } /* Publish the new table pointer. */ rcu_assign_pointer(ht->tbl, new_tbl); @@ -441,7 +443,8 @@ EXPORT_SYMBOL_GPL(rhashtable_insert_rehash); struct bucket_table *rhashtable_insert_slow(struct rhashtable *ht, const void *key, struct rhash_head *obj, - struct bucket_table *tbl) + struct bucket_table *tbl, + void **data) { struct rhash_head *head; unsigned int hash; @@ -452,8 +455,11 @@ struct bucket_table *rhashtable_insert_slow(struct rhashtable *ht, spin_lock_nested(rht_bucket_lock(tbl, hash), SINGLE_DEPTH_NESTING); err = -EEXIST; - if (key && rhashtable_lookup_fast(ht, key, ht->p)) - goto exit; + if (key) { + *data = rhashtable_lookup_fast(ht, key, ht->p); + if (*data) + goto exit; + } err = -E2BIG; if (unlikely(rht_grow_above_max(ht, tbl))) @@ -838,6 +844,7 @@ void rhashtable_free_and_destroy(struct rhashtable *ht, for (i = 0; i < tbl->size; i++) { struct rhash_head *pos, *next; + cond_resched(); for (pos = rht_dereference(tbl->buckets[i], ht), next = !rht_is_a_nulls(pos) ? rht_dereference(pos->next, ht) : NULL; diff --git a/lib/seq_buf.c b/lib/seq_buf.c index 5c94e1012a91..cbef5ee4c459 100644 --- a/lib/seq_buf.c +++ b/lib/seq_buf.c @@ -143,9 +143,13 @@ int seq_buf_puts(struct seq_buf *s, const char *str) WARN_ON(s->size == 0); + /* Add 1 to len for the trailing null byte which must be there */ + len += 1; + if (seq_buf_can_fit(s, len)) { memcpy(s->buffer + s->len, str, len); - s->len += len; + /* Don't count the trailing null byte against the capacity */ + s->len += len - 1; return 0; } seq_buf_set_overflow(s); diff --git a/lib/string.c b/lib/string.c index ed83562a53ae..1cd9757291b1 100644 --- a/lib/string.c +++ b/lib/string.c @@ -772,6 +772,26 @@ __visible int memcmp(const void *cs, const void *ct, size_t count) EXPORT_SYMBOL(memcmp); #endif +#ifndef __HAVE_ARCH_BCMP +/** + * bcmp - returns 0 if and only if the buffers have identical contents. + * @a: pointer to first buffer. + * @b: pointer to second buffer. + * @len: size of buffers. + * + * The sign or magnitude of a non-zero return value has no particular + * meaning, and architectures may implement their own more efficient bcmp(). So + * while this particular implementation is a simple (tail) call to memcmp, do + * not rely on anything but whether the return value is zero or non-zero. + */ +#undef bcmp +int bcmp(const void *a, const void *b, size_t len) +{ + return memcmp(a, b, len); +} +EXPORT_SYMBOL(bcmp); +#endif + #ifndef __HAVE_ARCH_MEMSCAN /** * memscan - Find a character in an area of memory. diff --git a/lib/swiotlb.c b/lib/swiotlb.c index 771234d050c7..6bc452b33b76 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -17,6 +17,8 @@ * 08/12/11 beckyb Add highmem support */ +#define pr_fmt(fmt) "software IO TLB: " fmt + #include #include #include @@ -143,20 +145,16 @@ static bool no_iotlb_memory; void swiotlb_print_info(void) { unsigned long bytes = io_tlb_nslabs << IO_TLB_SHIFT; - unsigned char *vstart, *vend; if (no_iotlb_memory) { - pr_warn("software IO TLB: No low mem\n"); + pr_warn("No low mem\n"); return; } - vstart = phys_to_virt(io_tlb_start); - vend = phys_to_virt(io_tlb_end); - - printk(KERN_INFO "software IO TLB [mem %#010llx-%#010llx] (%luMB) mapped at [%p-%p]\n", + pr_info("mapped [mem %#010llx-%#010llx] (%luMB)\n", (unsigned long long)io_tlb_start, (unsigned long long)io_tlb_end, - bytes >> 20, vstart, vend - 1); + bytes >> 20); } int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) @@ -230,7 +228,7 @@ swiotlb_init(int verbose) if (io_tlb_start) memblock_free_early(io_tlb_start, PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); - pr_warn("Cannot allocate SWIOTLB buffer"); + pr_warn("Cannot allocate buffer"); no_iotlb_memory = true; } @@ -272,8 +270,8 @@ swiotlb_late_init_with_default_size(size_t default_size) return -ENOMEM; } if (order != get_order(bytes)) { - printk(KERN_WARNING "Warning: only able to allocate %ld MB " - "for software IO TLB\n", (PAGE_SIZE << order) >> 20); + pr_warn("only able to allocate %ld MB\n", + (PAGE_SIZE << order) >> 20); io_tlb_nslabs = SLABS_PER_PAGE << order; } rc = swiotlb_late_init_with_tbl(vstart, io_tlb_nslabs); @@ -680,7 +678,7 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size, return ret; err_warn: - pr_warn("swiotlb: coherent allocation failed for device %s size=%zu\n", + pr_warn("coherent allocation failed for device %s size=%zu\n", dev_name(hwdev), size); dump_stack(); diff --git a/lib/test-hexdump.c b/lib/test-hexdump.c index 5241df36eedf..dadcabe50988 100644 --- a/lib/test-hexdump.c +++ b/lib/test-hexdump.c @@ -81,7 +81,7 @@ static void __init test_hexdump(size_t len, int rowsize, int groupsize, const char *q = *result++; size_t amount = strlen(q); - strncpy(p, q, amount); + memcpy(p, q, amount); p += amount + 1; } if (i) diff --git a/mm/cma.c b/mm/cma.c index 43f4a122e969..f0d91aca5a4c 100644 --- a/mm/cma.c +++ b/mm/cma.c @@ -339,12 +339,14 @@ int __init cma_declare_contiguous(phys_addr_t base, ret = cma_init_reserved_mem(base, size, order_per_bit, res_cma); if (ret) - goto err; + goto free_mem; pr_info("Reserved %ld MiB at %pa\n", (unsigned long)size / SZ_1M, &base); return 0; +free_mem: + memblock_free(base, size); err: pr_err("Failed to reserve %ld MiB\n", (unsigned long)size / SZ_1M); return ret; diff --git a/mm/frame_vector.c b/mm/frame_vector.c index 7cf2b7163222..c1e7926a41c4 100644 --- a/mm/frame_vector.c +++ b/mm/frame_vector.c @@ -11,10 +11,7 @@ * get_vaddr_frames() - map virtual addresses to pfns * @start: starting user address * @nr_frames: number of pages / pfns from start to map - * @write: whether pages will be written to by the caller - * @force: whether to force write access even if user mapping is - * readonly. See description of the same argument of - get_user_pages(). + * @gup_flags: flags modifying lookup behaviour * @vec: structure which receives pages / pfns of the addresses mapped. * It should have space for at least nr_frames entries. * @@ -34,7 +31,7 @@ * This function takes care of grabbing mmap_sem as necessary. */ int get_vaddr_frames(unsigned long start, unsigned int nr_frames, - bool write, bool force, struct frame_vector *vec) + unsigned int gup_flags, struct frame_vector *vec) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma; @@ -59,7 +56,7 @@ int get_vaddr_frames(unsigned long start, unsigned int nr_frames, vec->got_ref = true; vec->is_pfns = false; ret = get_user_pages_locked(current, mm, start, nr_frames, - write, force, (struct page **)(vec->ptrs), &locked); + gup_flags, (struct page **)(vec->ptrs), &locked); goto out; } diff --git a/mm/gup.c b/mm/gup.c index 018144c4b9ec..2cd3b31e3666 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -368,6 +368,9 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags) if (vm_flags & (VM_IO | VM_PFNMAP)) return -EFAULT; + if (gup_flags & FOLL_ANON && !vma_is_anonymous(vma)) + return -EFAULT; + if (gup_flags & FOLL_WRITE) { if (!(vm_flags & VM_WRITE)) { if (!(gup_flags & FOLL_FORCE)) @@ -627,7 +630,6 @@ static __always_inline long __get_user_pages_locked(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages, struct vm_area_struct **vmas, int *locked, bool notify_drop, @@ -645,10 +647,6 @@ static __always_inline long __get_user_pages_locked(struct task_struct *tsk, if (pages) flags |= FOLL_GET; - if (write) - flags |= FOLL_WRITE; - if (force) - flags |= FOLL_FORCE; pages_done = 0; lock_dropped = false; @@ -742,11 +740,12 @@ static __always_inline long __get_user_pages_locked(struct task_struct *tsk, */ long get_user_pages_locked(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages, + unsigned int gup_flags, struct page **pages, int *locked) { - return __get_user_pages_locked(tsk, mm, start, nr_pages, write, force, - pages, NULL, locked, true, FOLL_TOUCH); + return __get_user_pages_locked(tsk, mm, start, nr_pages, + pages, NULL, locked, true, + gup_flags | FOLL_TOUCH); } EXPORT_SYMBOL(get_user_pages_locked); @@ -762,14 +761,14 @@ EXPORT_SYMBOL(get_user_pages_locked); */ __always_inline long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages, - unsigned int gup_flags) + struct page **pages, unsigned int gup_flags) { long ret; int locked = 1; + down_read(&mm->mmap_sem); - ret = __get_user_pages_locked(tsk, mm, start, nr_pages, write, force, - pages, NULL, &locked, false, gup_flags); + ret = __get_user_pages_locked(tsk, mm, start, nr_pages, pages, NULL, + &locked, false, gup_flags); if (locked) up_read(&mm->mmap_sem); return ret; @@ -795,10 +794,10 @@ EXPORT_SYMBOL(__get_user_pages_unlocked); */ long get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages) + struct page **pages, unsigned int gup_flags) { - return __get_user_pages_unlocked(tsk, mm, start, nr_pages, write, - force, pages, FOLL_TOUCH); + return __get_user_pages_unlocked(tsk, mm, start, nr_pages, + pages, gup_flags | FOLL_TOUCH); } EXPORT_SYMBOL(get_user_pages_unlocked); @@ -858,11 +857,13 @@ EXPORT_SYMBOL(get_user_pages_unlocked); * FAULT_FLAG_ALLOW_RETRY to handle_mm_fault. */ long get_user_pages(struct task_struct *tsk, struct mm_struct *mm, - unsigned long start, unsigned long nr_pages, int write, - int force, struct page **pages, struct vm_area_struct **vmas) + unsigned long start, unsigned long nr_pages, + unsigned int gup_flags, struct page **pages, + struct vm_area_struct **vmas) { - return __get_user_pages_locked(tsk, mm, start, nr_pages, write, force, - pages, vmas, NULL, false, FOLL_TOUCH); + return __get_user_pages_locked(tsk, mm, start, nr_pages, + pages, vmas, NULL, false, + gup_flags | FOLL_TOUCH); } EXPORT_SYMBOL(get_user_pages); @@ -1411,7 +1412,8 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write, pages += nr; ret = get_user_pages_unlocked(current, mm, start, - nr_pages - nr, write, 0, pages); + nr_pages - nr, pages, + write ? FOLL_WRITE : 0); /* Have to be a bit careful with return values */ if (nr > 0) { diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 6f99a0f906bb..324b2953e57e 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3472,7 +3472,6 @@ retry_avoidcopy: copy_user_huge_page(new_page, old_page, address, vma, pages_per_huge_page(h)); __SetPageUptodate(new_page); - set_page_huge_active(new_page); mmun_start = address & huge_page_mask(h); mmun_end = mmun_start + huge_page_size(h); @@ -3494,6 +3493,7 @@ retry_avoidcopy: make_huge_pte(vma, new_page, 1)); page_remove_rmap(old_page); hugepage_add_new_anon_rmap(new_page, vma, address); + set_page_huge_active(new_page); /* Make the old page be freed below */ new_page = old_page; } @@ -3575,6 +3575,7 @@ static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma, struct page *page; pte_t new_pte; spinlock_t *ptl; + bool new_page = false; /* * Currently, we are forced to kill the process in the event the @@ -3608,7 +3609,7 @@ retry: } clear_huge_page(page, address, pages_per_huge_page(h)); __SetPageUptodate(page); - set_page_huge_active(page); + new_page = true; if (vma->vm_flags & VM_MAYSHARE) { int err = huge_add_to_page_cache(page, mapping, idx); @@ -3680,6 +3681,15 @@ retry: } spin_unlock(ptl); + + /* + * Only make newly allocated pages active. Existing pages found + * in the pagecache could be !page_huge_active() if they have been + * isolated for migration. + */ + if (new_page) + set_page_huge_active(page); + unlock_page(page); out: return ret; @@ -4053,6 +4063,14 @@ int hugetlb_reserve_pages(struct inode *inode, struct resv_map *resv_map; long gbl_reserve; + /* This should never happen */ + if (from > to) { +#ifdef CONFIG_DEBUG_VM + WARN(1, "%s called with a negative range\n", __func__); +#endif + return -EINVAL; + } + /* * Only apply hugepage reservation if asked. At fault time, an * attempt will be made for VM_NORESERVE to allocate a page @@ -4142,7 +4160,9 @@ int hugetlb_reserve_pages(struct inode *inode, return 0; out_err: if (!vma || vma->vm_flags & VM_MAYSHARE) - region_abort(resv_map, from, to); + /* Don't call region_abort if region_chg failed */ + if (chg >= 0) + region_abort(resv_map, from, to); if (vma && is_vma_resv_set(vma, HPAGE_RESV_OWNER)) kref_put(&resv_map->refs, resv_map_release); return ret; diff --git a/mm/memory.c b/mm/memory.c index 5aee9ec8b8c6..fa752df6dc85 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3711,10 +3711,11 @@ EXPORT_SYMBOL_GPL(generic_access_phys); * given task for page fault accounting. */ static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, - unsigned long addr, void *buf, int len, int write) + unsigned long addr, void *buf, int len, unsigned int gup_flags) { struct vm_area_struct *vma; void *old_buf = buf; + int write = gup_flags & FOLL_WRITE; down_read(&mm->mmap_sem); /* ignore errors, just check how much was successfully transferred */ @@ -3724,7 +3725,7 @@ static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, struct page *page = NULL; ret = get_user_pages(tsk, mm, addr, 1, - write, 1, &page, &vma); + gup_flags, &page, &vma); if (ret <= 0) { #ifndef CONFIG_HAVE_IOREMAP_PROT break; @@ -3776,14 +3777,14 @@ static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, * @addr: start address to access * @buf: source or destination buffer * @len: number of bytes to transfer - * @write: whether the access is a write + * @gup_flags: flags modifying lookup behaviour * * The caller must hold a reference on @mm. */ int access_remote_vm(struct mm_struct *mm, unsigned long addr, - void *buf, int len, int write) + void *buf, int len, unsigned int gup_flags) { - return __access_remote_vm(NULL, mm, addr, buf, len, write); + return __access_remote_vm(NULL, mm, addr, buf, len, gup_flags); } /* @@ -3796,12 +3797,17 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, { struct mm_struct *mm; int ret; + unsigned int flags = FOLL_FORCE; mm = get_task_mm(tsk); if (!mm) return 0; - ret = __access_remote_vm(tsk, mm, addr, buf, len, write); + if (write) + flags |= FOLL_WRITE; + + ret = __access_remote_vm(tsk, mm, addr, buf, len, flags); + mmput(mm); return ret; diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 4d91216dcab2..7598bf2bb493 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -32,6 +32,7 @@ #include #include #include +#include #include @@ -1357,7 +1358,8 @@ static struct page *next_active_pageblock(struct page *page) int is_mem_section_removable(unsigned long start_pfn, unsigned long nr_pages) { struct page *page = pfn_to_page(start_pfn); - struct page *end_page = page + nr_pages; + unsigned long end_pfn = min(start_pfn + nr_pages, zone_end_pfn(page_zone(page))); + struct page *end_page = pfn_to_page(end_pfn); /* Check the starting page of each pageblock within the range */ for (; page < end_page; page = next_active_pageblock(page)) { @@ -1397,6 +1399,9 @@ int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn, i++; if (i == MAX_ORDER_NR_PAGES) continue; + /* Check if we got outside of the zone */ + if (zone && !zone_spans_pfn(zone, pfn + i)) + return 0; page = pfn_to_page(pfn + i); if (zone && page_zone(page) != zone) return 0; @@ -1471,6 +1476,21 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) continue; } + /* + * HWPoison pages have elevated reference counts so the migration would + * fail on them. It also doesn't make any sense to migrate them in the + * first place. Still try to unmap such a page in case it is still mapped + * (e.g. current hwpoison implementation doesn't unmap KSM pages but keep + * the unmap as the catch all safety net). + */ + if (PageHWPoison(page)) { + if (WARN_ON(PageLRU(page))) + isolate_lru_page(page); + if (page_mapped(page)) + try_to_unmap(page, TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS); + continue; + } + if (!get_page_unless_zero(page)) continue; /* diff --git a/mm/mempolicy.c b/mm/mempolicy.c index e48fa18e1828..77167933686a 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -514,12 +514,16 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr, if (node_isset(nid, *qp->nmask) == !!(flags & MPOL_MF_INVERT)) continue; - if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) + if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) { + if (!vma_migratable(vma)) + break; migrate_page_add(page, qp->pagelist, flags); + } else + break; } pte_unmap_unlock(pte - 1, ptl); cond_resched(); - return 0; + return addr != end ? -EIO : 0; } static int queue_pages_hugetlb(pte_t *pte, unsigned long hmask, @@ -819,7 +823,7 @@ static int lookup_node(struct mm_struct *mm, unsigned long addr) struct page *p; int err; - err = get_user_pages(current, mm, addr & PAGE_MASK, 1, 0, 0, &p, NULL); + err = get_user_pages(current, mm, addr & PAGE_MASK, 1, 0, &p, NULL); if (err >= 0) { err = page_to_nid(p); put_page(p); @@ -1296,7 +1300,7 @@ static int copy_nodes_to_user(unsigned long __user *mask, unsigned long maxnode, nodemask_t *nodes) { unsigned long copy = ALIGN(maxnode-1, 64) / 8; - const int nbytes = BITS_TO_LONGS(MAX_NUMNODES) * sizeof(long); + unsigned int nbytes = BITS_TO_LONGS(nr_node_ids) * sizeof(long); if (copy > nbytes) { if (copy > PAGE_SIZE) @@ -1457,7 +1461,7 @@ SYSCALL_DEFINE5(get_mempolicy, int __user *, policy, int uninitialized_var(pval); nodemask_t nodes; - if (nmask != NULL && maxnode < MAX_NUMNODES) + if (nmask != NULL && maxnode < nr_node_ids) return -EINVAL; err = do_get_mempolicy(&pval, &nodes, addr, flags); @@ -1486,7 +1490,7 @@ COMPAT_SYSCALL_DEFINE5(get_mempolicy, int __user *, policy, unsigned long nr_bits, alloc_size; DECLARE_BITMAP(bm, MAX_NUMNODES); - nr_bits = min_t(unsigned long, maxnode-1, MAX_NUMNODES); + nr_bits = min_t(unsigned long, maxnode-1, nr_node_ids); alloc_size = ALIGN(nr_bits, BITS_PER_LONG) / 8; if (nmask) diff --git a/mm/migrate.c b/mm/migrate.c index afedcfab60e2..73da75d5e5b2 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -936,6 +936,7 @@ static ICE_noinline int unmap_and_move(new_page_t get_new_page, int rc = MIGRATEPAGE_SUCCESS; int *result = NULL; struct page *newpage; + bool is_lru = !isolated_balloon_page(page); newpage = get_new_page(page, private, &result); if (!newpage) @@ -983,11 +984,13 @@ out: /* * If migration was not successful and there's a freeing callback, use * it. Otherwise, putback_lru_page() will drop the reference grabbed - * during isolation. + * during isolation. Use the old state of the isolated source page to + * determine if we migrated a LRU page. newpage was already unlocked + * and possibly modified by its owner - don't rely on the page state. */ if (put_new_page) put_new_page(newpage, private); - else if (unlikely(__is_movable_balloon_page(newpage))) { + else if (rc == MIGRATEPAGE_SUCCESS && unlikely(!is_lru)) { /* drop our reference, page already in the balloon */ put_page(newpage); } else @@ -1053,6 +1056,16 @@ static int unmap_and_move_huge_page(new_page_t get_new_page, lock_page(hpage); } + /* + * Check for pages which are in the process of being freed. Without + * page_mapping() set, hugetlbfs specific move page routine will not + * be called and we could leak usage counts for subpools. + */ + if (page_private(hpage) && !page_mapping(hpage)) { + rc = -EBUSY; + goto out_unlock; + } + if (PageAnon(hpage)) anon_vma = page_get_anon_vma(hpage); @@ -1083,6 +1096,7 @@ put_anon: put_new_page = NULL; } +out_unlock: unlock_page(hpage); out: if (rc != -EAGAIN) diff --git a/mm/mmap.c b/mm/mmap.c index f325aa33f327..f412ca2ba6a0 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2315,12 +2315,11 @@ int expand_downwards(struct vm_area_struct *vma, struct mm_struct *mm = vma->vm_mm; struct vm_area_struct *prev; unsigned long gap_addr; - int error; + int error = 0; address &= PAGE_MASK; - error = security_mmap_addr(address); - if (error) - return error; + if (address < mmap_min_addr) + return -EPERM; /* Enforce stack_guard_gap */ gap_addr = address - stack_guard_gap; diff --git a/mm/nommu.c b/mm/nommu.c index 92be862c859b..2360546db065 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -184,40 +184,32 @@ finish_or_fault: */ long get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages, + unsigned int gup_flags, struct page **pages, struct vm_area_struct **vmas) { - int flags = 0; - - if (write) - flags |= FOLL_WRITE; - if (force) - flags |= FOLL_FORCE; - - return __get_user_pages(tsk, mm, start, nr_pages, flags, pages, vmas, - NULL); + return __get_user_pages(tsk, mm, start, nr_pages, + gup_flags, pages, vmas, NULL); } EXPORT_SYMBOL(get_user_pages); long get_user_pages_locked(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages, + unsigned int gup_flags, struct page **pages, int *locked) { - return get_user_pages(tsk, mm, start, nr_pages, write, force, + return get_user_pages(tsk, mm, start, nr_pages, gup_flags, pages, NULL); } EXPORT_SYMBOL(get_user_pages_locked); long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages, - unsigned int gup_flags) + struct page **pages, unsigned int gup_flags) { long ret; down_read(&mm->mmap_sem); - ret = get_user_pages(tsk, mm, start, nr_pages, write, force, - pages, NULL); + ret = __get_user_pages(tsk, mm, start, nr_pages, gup_flags, pages, + NULL, NULL); up_read(&mm->mmap_sem); return ret; } @@ -225,10 +217,10 @@ EXPORT_SYMBOL(__get_user_pages_unlocked); long get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages) + struct page **pages, unsigned int gup_flags) { - return __get_user_pages_unlocked(tsk, mm, start, nr_pages, write, - force, pages, 0); + return __get_user_pages_unlocked(tsk, mm, start, nr_pages, + pages, gup_flags); } EXPORT_SYMBOL(get_user_pages_unlocked); @@ -1937,9 +1929,10 @@ void filemap_map_pages(struct vm_area_struct *vma, struct vm_fault *vmf) EXPORT_SYMBOL(filemap_map_pages); static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, - unsigned long addr, void *buf, int len, int write) + unsigned long addr, void *buf, int len, unsigned int gup_flags) { struct vm_area_struct *vma; + int write = gup_flags & FOLL_WRITE; down_read(&mm->mmap_sem); @@ -1974,14 +1967,14 @@ static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, * @addr: start address to access * @buf: source or destination buffer * @len: number of bytes to transfer - * @write: whether the access is a write + * @gup_flags: flags modifying lookup behaviour * * The caller must hold a reference on @mm. */ int access_remote_vm(struct mm_struct *mm, unsigned long addr, - void *buf, int len, int write) + void *buf, int len, unsigned int gup_flags) { - return __access_remote_vm(NULL, mm, addr, buf, len, write); + return __access_remote_vm(NULL, mm, addr, buf, len, gup_flags); } /* @@ -1999,7 +1992,8 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, in if (!mm) return 0; - len = __access_remote_vm(tsk, mm, addr, buf, len, write); + len = __access_remote_vm(tsk, mm, addr, buf, len, + write ? FOLL_WRITE : 0); mmput(mm); return len; diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 6c1b9c43cc23..7e5d3467d6d2 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -543,6 +543,13 @@ void oom_kill_process(struct oom_control *oc, struct task_struct *p, * still freeing memory. */ read_lock(&tasklist_lock); + + /* + * The task 'p' might have already exited before reaching here. The + * put_task_struct() will free task_struct 'p' while the loop still try + * to access the field of 'p', so, get an extra reference. + */ + get_task_struct(p); for_each_thread(p, t) { list_for_each_entry(child, &t->children, sibling) { unsigned int child_points; @@ -562,6 +569,7 @@ void oom_kill_process(struct oom_control *oc, struct task_struct *p, } } } + put_task_struct(p); read_unlock(&tasklist_lock); p = find_lock_task_mm(victim); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index f6cf7379c3f9..cfd1109d9cd4 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2161,6 +2161,7 @@ int write_cache_pages(struct address_space *mapping, { int ret = 0; int done = 0; + int error; struct pagevec pvec; int nr_pages; pgoff_t uninitialized_var(writeback_index); @@ -2241,25 +2242,31 @@ continue_unlock: goto continue_unlock; trace_wbc_writepage(wbc, inode_to_bdi(mapping->host)); - ret = (*writepage)(page, wbc, data); - if (unlikely(ret)) { - if (ret == AOP_WRITEPAGE_ACTIVATE) { + error = (*writepage)(page, wbc, data); + if (unlikely(error)) { + /* + * Handle errors according to the type of + * writeback. There's no need to continue for + * background writeback. Just push done_index + * past this page so media errors won't choke + * writeout for the entire file. For integrity + * writeback, we must process the entire dirty + * set regardless of errors because the fs may + * still have state to clear for each page. In + * that case we continue processing and return + * the first error. + */ + if (error == AOP_WRITEPAGE_ACTIVATE) { unlock_page(page); - ret = 0; - } else { - /* - * done_index is set past this page, - * so media errors will not choke - * background writeout for the entire - * file. This has consequences for - * range_cyclic semantics (ie. it may - * not be suitable for data integrity - * writeout). - */ + error = 0; + } else if (wbc->sync_mode != WB_SYNC_ALL) { + ret = error; done_index = page->index + 1; done = 1; break; } + if (!ret) + ret = error; } /* diff --git a/mm/page_ext.c b/mm/page_ext.c index 4d1eac0d4fc5..de1f34c5a2f1 100644 --- a/mm/page_ext.c +++ b/mm/page_ext.c @@ -255,6 +255,7 @@ static void free_page_ext(void *addr) table_size = sizeof(struct page_ext) * PAGES_PER_SECTION; BUG_ON(PageReserved(page)); + kmemleak_free(addr); free_pages_exact(addr, table_size); } } diff --git a/mm/process_vm_access.c b/mm/process_vm_access.c index 5d453e58ddbf..1b5a6104c5fc 100644 --- a/mm/process_vm_access.c +++ b/mm/process_vm_access.c @@ -88,19 +88,23 @@ static int process_vm_rw_single_vec(unsigned long addr, ssize_t rc = 0; unsigned long max_pages_per_loop = PVM_MAX_KMALLOC_PAGES / sizeof(struct pages *); + unsigned int flags = 0; /* Work out address and page range required */ if (len == 0) return 0; nr_pages = (addr + len - 1) / PAGE_SIZE - addr / PAGE_SIZE + 1; + if (vm_write) + flags |= FOLL_WRITE; + while (!rc && nr_pages && iov_iter_count(iter)) { int pages = min(nr_pages, max_pages_per_loop); size_t bytes; /* Get the pages we're interested in */ pages = get_user_pages_unlocked(task, mm, pa, pages, - vm_write, 0, process_pages); + process_pages, flags); if (pages <= 0) return -EFAULT; diff --git a/mm/readahead.c b/mm/readahead.c index ba22d7fe0afb..f230b942cda2 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -208,12 +208,21 @@ out: * memory at once. */ int force_page_cache_readahead(struct address_space *mapping, struct file *filp, - pgoff_t offset, unsigned long nr_to_read) + pgoff_t offset, unsigned long nr_to_read) { + struct backing_dev_info *bdi = inode_to_bdi(mapping->host); + struct file_ra_state *ra = &filp->f_ra; + unsigned long max_pages; + if (unlikely(!mapping->a_ops->readpage && !mapping->a_ops->readpages)) return -EINVAL; - nr_to_read = min(nr_to_read, inode_to_bdi(mapping->host)->ra_pages); + /* + * If the request exceeds the readahead window, allow the read to + * be up to the optimal hardware IO size + */ + max_pages = max_t(unsigned long, bdi->io_pages, ra->ra_pages); + nr_to_read = min(nr_to_read, max_pages); while (nr_to_read) { int err; @@ -370,9 +379,18 @@ ondemand_readahead(struct address_space *mapping, bool hit_readahead_marker, pgoff_t offset, unsigned long req_size) { - unsigned long max = ra->ra_pages; + struct backing_dev_info *bdi = inode_to_bdi(mapping->host); + unsigned long max_pages = ra->ra_pages; + unsigned long add_pages; pgoff_t prev_offset; + /* + * If the request exceeds the readahead window, allow the read to + * be up to the optimal hardware IO size + */ + if (req_size > max_pages && bdi->io_pages > max_pages) + max_pages = min(req_size, bdi->io_pages); + /* * start of file */ @@ -386,7 +404,7 @@ ondemand_readahead(struct address_space *mapping, if ((offset == (ra->start + ra->size - ra->async_size) || offset == (ra->start + ra->size))) { ra->start += ra->size; - ra->size = get_next_ra_size(ra, max); + ra->size = get_next_ra_size(ra, max_pages); ra->async_size = ra->size; goto readit; } @@ -401,16 +419,16 @@ ondemand_readahead(struct address_space *mapping, pgoff_t start; rcu_read_lock(); - start = page_cache_next_hole(mapping, offset + 1, max); + start = page_cache_next_hole(mapping, offset + 1, max_pages); rcu_read_unlock(); - if (!start || start - offset > max) + if (!start || start - offset > max_pages) return 0; ra->start = start; ra->size = start - offset; /* old async_size */ ra->size += req_size; - ra->size = get_next_ra_size(ra, max); + ra->size = get_next_ra_size(ra, max_pages); ra->async_size = ra->size; goto readit; } @@ -418,7 +436,7 @@ ondemand_readahead(struct address_space *mapping, /* * oversize read */ - if (req_size > max) + if (req_size > max_pages) goto initial_readahead; /* @@ -434,7 +452,7 @@ ondemand_readahead(struct address_space *mapping, * Query the page cache and look for the traces(cached history pages) * that a sequential stream would leave behind. */ - if (try_context_readahead(mapping, ra, offset, req_size, max)) + if (try_context_readahead(mapping, ra, offset, req_size, max_pages)) goto readit; /* @@ -445,7 +463,7 @@ ondemand_readahead(struct address_space *mapping, initial_readahead: ra->start = offset; - ra->size = get_init_ra_size(req_size, max); + ra->size = get_init_ra_size(req_size, max_pages); ra->async_size = ra->size > req_size ? ra->size - req_size : ra->size; readit: @@ -453,10 +471,17 @@ readit: * Will this read hit the readahead marker made by itself? * If so, trigger the readahead marker hit now, and merge * the resulted next readahead window into the current one. + * Take care of maximum IO pages as above. */ if (offset == ra->start && ra->size == ra->async_size) { - ra->async_size = get_next_ra_size(ra, max); - ra->size += ra->async_size; + add_pages = get_next_ra_size(ra, max_pages); + if (ra->size + add_pages <= max_pages) { + ra->async_size = add_pages; + ra->size += add_pages; + } else { + ra->size = max_pages; + ra->async_size = max_pages >> 1; + } } return ra_submit(ra, mapping, filp); diff --git a/mm/rmap.c b/mm/rmap.c index 488dda209431..cf733fab230f 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -408,7 +408,7 @@ void unlink_anon_vmas(struct vm_area_struct *vma) list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) { struct anon_vma *anon_vma = avc->anon_vma; - BUG_ON(anon_vma->degree); + VM_WARN_ON(anon_vma->degree); put_anon_vma(anon_vma); list_del(&avc->same_vma); diff --git a/mm/shmem.c b/mm/shmem.c index a06b31b8ff8d..3a88c6622432 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2293,16 +2293,20 @@ static int shmem_create(struct inode *dir, struct dentry *dentry, umode_t mode, static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) { struct inode *inode = d_inode(old_dentry); - int ret; + int ret = 0; /* * No ordinary (disk based) filesystem counts links as inodes; * but each new link needs a new dentry, pinning lowmem, and * tmpfs dentries cannot be pruned until they are unlinked. + * But if an O_TMPFILE file is linked into the tmpfs, the + * first link must skip that, to get the accounting right. */ - ret = shmem_reserve_inode(inode->i_sb); - if (ret) - goto out; + if (inode->i_nlink) { + ret = shmem_reserve_inode(inode->i_sb); + if (ret) + goto out; + } dir->i_size += BOGO_DIRENT_SIZE; inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; diff --git a/mm/slab.c b/mm/slab.c index 80ca19a122f3..e0b0cf86b5b8 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -644,14 +644,6 @@ static void start_cpu_timer(int cpu) static void init_arraycache(struct array_cache *ac, int limit, int batch) { - /* - * The array_cache structures contain pointers to free object. - * However, when such objects are allocated or transferred to another - * cache the pointers are not cleared and they could be counted as - * valid references during a kmemleak scan. Therefore, kmemleak must - * not scan such objects. - */ - kmemleak_no_scan(ac); if (ac) { ac->avail = 0; ac->limit = limit; @@ -667,6 +659,14 @@ static struct array_cache *alloc_arraycache(int node, int entries, struct array_cache *ac = NULL; ac = kmalloc_node(memsize, gfp, node); + /* + * The array_cache structures contain pointers to free object. + * However, when such objects are allocated or transferred to another + * cache the pointers are not cleared and they could be counted as + * valid references during a kmemleak scan. Therefore, kmemleak must + * not scan such objects. + */ + kmemleak_no_scan(ac); init_arraycache(ac, entries, batchcount); return ac; } @@ -859,8 +859,11 @@ static struct alien_cache *__alloc_alien_cache(int node, int entries, struct alien_cache *alc = NULL; alc = kmalloc_node(memsize, gfp, node); - init_arraycache(&alc->ac, entries, batch); - spin_lock_init(&alc->lock); + if (alc) { + kmemleak_no_scan(alc); + init_arraycache(&alc->ac, entries, batch); + spin_lock_init(&alc->lock); + } return alc; } diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index 77fee9325a57..497248b93a4c 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -182,13 +182,9 @@ retry: goto out_unlock; /* - * Be strict and only allow __mcopy_atomic on userfaultfd - * registered ranges to prevent userland errors going - * unnoticed. As far as the VM consistency is concerned, it - * would be perfectly safe to remove this check, but there's - * no useful usage for __mcopy_atomic ouside of userfaultfd - * registered ranges. This is after all why these are ioctls - * belonging to the userfaultfd and not syscalls. + * Check the vma is registered in uffd, this is required to + * enforce the VM_MAYWRITE check done at uffd registration + * time. */ if (!dst_vma->vm_userfaultfd_ctx.ctx) goto out_unlock; diff --git a/mm/util.c b/mm/util.c index 6ca10015fd41..4d59b4ffe66b 100644 --- a/mm/util.c +++ b/mm/util.c @@ -278,7 +278,7 @@ int __weak get_user_pages_fast(unsigned long start, { struct mm_struct *mm = current->mm; return get_user_pages_unlocked(current, mm, start, nr_pages, - write, 0, pages); + pages, write ? FOLL_WRITE : 0); } EXPORT_SYMBOL_GPL(get_user_pages_fast); diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 6e7d513467c6..c5bbe08d156a 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -446,7 +446,11 @@ nocache: } found: - if (addr + size > vend) + /* + * Check also calculated address against the vstart, + * because it can be 0 because of big align request. + */ + if (addr + size > vend || addr < vstart) goto overflow; va->va_start = addr; @@ -2162,7 +2166,7 @@ int remap_vmalloc_range_partial(struct vm_area_struct *vma, unsigned long uaddr, if (!(area->flags & VM_USERMAP)) return -EINVAL; - if (kaddr + size > area->addr + area->size) + if (kaddr + size > area->addr + get_vm_area_size(area)) return -EINVAL; do { diff --git a/mm/vmstat.c b/mm/vmstat.c index 6af9bbad94c7..dd0a13013cb4 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -857,13 +857,8 @@ const char * const vmstat_text[] = { #endif #endif /* CONFIG_MEMORY_BALLOON */ #ifdef CONFIG_DEBUG_TLBFLUSH -#ifdef CONFIG_SMP "nr_tlb_remote_flush", "nr_tlb_remote_flush_received", -#else - "", /* nr_tlb_remote_flush */ - "", /* nr_tlb_remote_flush_received */ -#endif /* CONFIG_SMP */ "nr_tlb_local_flush_all", "nr_tlb_local_flush_one", #endif /* CONFIG_DEBUG_TLBFLUSH */ diff --git a/net/9p/client.c b/net/9p/client.c index ed8738c4dc09..443db202db09 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -156,6 +156,12 @@ static int parse_opts(char *opts, struct p9_client *clnt) ret = r; continue; } + if (option < 4096) { + p9_debug(P9_DEBUG_ERROR, + "msize should be at least 4k\n"); + ret = -EINVAL; + continue; + } clnt->msize = option; break; case Opt_trans: @@ -972,10 +978,18 @@ static int p9_client_version(struct p9_client *c) else if (!strncmp(version, "9P2000", 6)) c->proto_version = p9_proto_legacy; else { + p9_debug(P9_DEBUG_ERROR, + "server returned an unknown version: %s\n", version); err = -EREMOTEIO; goto error; } + if (msize < 4096) { + p9_debug(P9_DEBUG_ERROR, + "server returned a msize < 4096: %d\n", msize); + err = -EREMOTEIO; + goto error; + } if (msize < c->msize) c->msize = msize; @@ -1040,6 +1054,13 @@ struct p9_client *p9_client_create(const char *dev_name, char *options) if (clnt->msize > clnt->trans_mod->maxsize) clnt->msize = clnt->trans_mod->maxsize; + if (clnt->msize < 4096) { + p9_debug(P9_DEBUG_ERROR, + "Please specify a msize of at least 4k\n"); + err = -EINVAL; + goto close_trans; + } + err = p9_client_version(clnt); if (err) goto close_trans; diff --git a/net/9p/protocol.c b/net/9p/protocol.c index 145f80518064..7f1b45c082c9 100644 --- a/net/9p/protocol.c +++ b/net/9p/protocol.c @@ -570,9 +570,10 @@ int p9stat_read(struct p9_client *clnt, char *buf, int len, struct p9_wstat *st) if (ret) { p9_debug(P9_DEBUG_9P, "<<< p9stat_read failed: %d\n", ret); trace_9p_protocol_dump(clnt, &fake_pdu); + return ret; } - return ret; + return fake_pdu.offset; } EXPORT_SYMBOL(p9stat_read); diff --git a/net/appletalk/atalk_proc.c b/net/appletalk/atalk_proc.c index af46bc49e1e9..b5f84f428aa6 100644 --- a/net/appletalk/atalk_proc.c +++ b/net/appletalk/atalk_proc.c @@ -293,7 +293,7 @@ out_interface: goto out; } -void __exit atalk_proc_exit(void) +void atalk_proc_exit(void) { remove_proc_entry("interface", atalk_proc_dir); remove_proc_entry("route", atalk_proc_dir); diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index d5871ac493eb..4246df3b7ae8 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1912,12 +1912,16 @@ static const char atalk_err_snap[] __initconst = /* Called by proto.c on kernel start up */ static int __init atalk_init(void) { - int rc = proto_register(&ddp_proto, 0); + int rc; - if (rc != 0) + rc = proto_register(&ddp_proto, 0); + if (rc) goto out; - (void)sock_register(&atalk_family_ops); + rc = sock_register(&atalk_family_ops); + if (rc) + goto out_proto; + ddp_dl = register_snap_client(ddp_snap_id, atalk_rcv); if (!ddp_dl) printk(atalk_err_snap); @@ -1925,12 +1929,33 @@ static int __init atalk_init(void) dev_add_pack(<alk_packet_type); dev_add_pack(&ppptalk_packet_type); - register_netdevice_notifier(&ddp_notifier); + rc = register_netdevice_notifier(&ddp_notifier); + if (rc) + goto out_sock; + aarp_proto_init(); - atalk_proc_init(); - atalk_register_sysctl(); + rc = atalk_proc_init(); + if (rc) + goto out_aarp; + + rc = atalk_register_sysctl(); + if (rc) + goto out_proc; out: return rc; +out_proc: + atalk_proc_exit(); +out_aarp: + aarp_cleanup_module(); + unregister_netdevice_notifier(&ddp_notifier); +out_sock: + dev_remove_pack(&ppptalk_packet_type); + dev_remove_pack(<alk_packet_type); + unregister_snap_client(ddp_dl); + sock_unregister(PF_APPLETALK); +out_proto: + proto_unregister(&ddp_proto); + goto out; } module_init(atalk_init); diff --git a/net/appletalk/sysctl_net_atalk.c b/net/appletalk/sysctl_net_atalk.c index ebb864361f7a..4e6042e0fcac 100644 --- a/net/appletalk/sysctl_net_atalk.c +++ b/net/appletalk/sysctl_net_atalk.c @@ -44,9 +44,12 @@ static struct ctl_table atalk_table[] = { static struct ctl_table_header *atalk_table_header; -void atalk_register_sysctl(void) +int __init atalk_register_sysctl(void) { atalk_table_header = register_net_sysctl(&init_net, "net/appletalk", atalk_table); + if (!atalk_table_header) + return -ENOMEM; + return 0; } void atalk_unregister_sysctl(void) diff --git a/net/atm/lec.c b/net/atm/lec.c index 10e4066991b8..e4afac94ff15 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -721,7 +721,10 @@ static int lec_vcc_attach(struct atm_vcc *vcc, void __user *arg) static int lec_mcast_attach(struct atm_vcc *vcc, int arg) { - if (arg < 0 || arg >= MAX_LEC_ITF || !dev_lec[arg]) + if (arg < 0 || arg >= MAX_LEC_ITF) + return -EINVAL; + arg = array_index_nospec(arg, MAX_LEC_ITF); + if (!dev_lec[arg]) return -EINVAL; vcc->proto_data = dev_lec[arg]; return lec_mcast_make(netdev_priv(dev_lec[arg]), vcc); @@ -739,6 +742,7 @@ static int lecd_attach(struct atm_vcc *vcc, int arg) i = arg; if (arg >= MAX_LEC_ITF) return -EINVAL; + i = array_index_nospec(arg, MAX_LEC_ITF); if (!dev_lec[i]) { int size; diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 2fdebabbfacd..2772f6a13fcb 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -654,15 +654,22 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname, break; } - dev = dev_get_by_name(&init_net, devname); + rtnl_lock(); + dev = __dev_get_by_name(&init_net, devname); if (!dev) { + rtnl_unlock(); res = -ENODEV; break; } ax25->ax25_dev = ax25_dev_ax25dev(dev); + if (!ax25->ax25_dev) { + rtnl_unlock(); + res = -ENODEV; + break; + } ax25_fillin_cb(ax25, ax25->ax25_dev); - dev_put(dev); + rtnl_unlock(); break; default: diff --git a/net/ax25/ax25_dev.c b/net/ax25/ax25_dev.c index 3d106767b272..5faca5db6385 100644 --- a/net/ax25/ax25_dev.c +++ b/net/ax25/ax25_dev.c @@ -116,6 +116,7 @@ void ax25_dev_device_down(struct net_device *dev) if ((s = ax25_dev_list) == ax25_dev) { ax25_dev_list = s->next; spin_unlock_bh(&ax25_dev_lock); + dev->ax25_ptr = NULL; dev_put(dev); kfree(ax25_dev); return; @@ -125,6 +126,7 @@ void ax25_dev_device_down(struct net_device *dev) if (s->next == ax25_dev) { s->next = ax25_dev->next; spin_unlock_bh(&ax25_dev_lock); + dev->ax25_ptr = NULL; dev_put(dev); kfree(ax25_dev); return; diff --git a/net/ax25/ax25_ip.c b/net/ax25/ax25_ip.c index 2fa3be965101..cd9a24e5b97a 100644 --- a/net/ax25/ax25_ip.c +++ b/net/ax25/ax25_ip.c @@ -114,6 +114,7 @@ netdev_tx_t ax25_ip_xmit(struct sk_buff *skb) dst = (ax25_address *)(bp + 1); src = (ax25_address *)(bp + 8); + ax25_route_lock_use(); route = ax25_get_route(dst, NULL); if (route) { digipeat = route->digipeat; @@ -206,9 +207,8 @@ netdev_tx_t ax25_ip_xmit(struct sk_buff *skb) ax25_queue_xmit(skb, dev); put: - if (route) - ax25_put_route(route); + ax25_route_lock_unuse(); return NETDEV_TX_OK; } diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c index d39097737e38..149f82bd83fd 100644 --- a/net/ax25/ax25_route.c +++ b/net/ax25/ax25_route.c @@ -40,7 +40,7 @@ #include static ax25_route *ax25_route_list; -static DEFINE_RWLOCK(ax25_route_lock); +DEFINE_RWLOCK(ax25_route_lock); void ax25_rt_device_down(struct net_device *dev) { @@ -349,6 +349,7 @@ const struct file_operations ax25_route_fops = { * Find AX.25 route * * Only routes with a reference count of zero can be destroyed. + * Must be called with ax25_route_lock read locked. */ ax25_route *ax25_get_route(ax25_address *addr, struct net_device *dev) { @@ -356,7 +357,6 @@ ax25_route *ax25_get_route(ax25_address *addr, struct net_device *dev) ax25_route *ax25_def_rt = NULL; ax25_route *ax25_rt; - read_lock(&ax25_route_lock); /* * Bind to the physical interface we heard them on, or the default * route if none is found; @@ -379,11 +379,6 @@ ax25_route *ax25_get_route(ax25_address *addr, struct net_device *dev) if (ax25_spe_rt != NULL) ax25_rt = ax25_spe_rt; - if (ax25_rt != NULL) - ax25_hold_route(ax25_rt); - - read_unlock(&ax25_route_lock); - return ax25_rt; } @@ -414,9 +409,12 @@ int ax25_rt_autobind(ax25_cb *ax25, ax25_address *addr) ax25_route *ax25_rt; int err = 0; - if ((ax25_rt = ax25_get_route(addr, NULL)) == NULL) + ax25_route_lock_use(); + ax25_rt = ax25_get_route(addr, NULL); + if (!ax25_rt) { + ax25_route_lock_unuse(); return -EHOSTUNREACH; - + } if ((ax25->ax25_dev = ax25_dev_ax25dev(ax25_rt->dev)) == NULL) { err = -EHOSTUNREACH; goto put; @@ -451,8 +449,7 @@ int ax25_rt_autobind(ax25_cb *ax25, ax25_address *addr) } put: - ax25_put_route(ax25_rt); - + ax25_route_lock_unuse(); return err; } diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index f11345e163d7..3c8d8142e8c6 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -18,7 +18,6 @@ #include "hard-interface.h" #include "main.h" -#include #include #include #include @@ -104,8 +103,10 @@ static bool batadv_is_on_batman_iface(const struct net_device *net_dev) /* recurse over the parent device */ parent_dev = __dev_get_by_index(&init_net, dev_get_iflink(net_dev)); /* if we got a NULL parent_dev there is something broken.. */ - if (WARN(!parent_dev, "Cannot find parent device")) + if (!parent_dev) { + pr_err("Cannot find parent device\n"); return false; + } ret = batadv_is_on_batman_iface(parent_dev); diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 9f1fe6169bef..4812123e0a2c 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -209,10 +209,14 @@ static int batadv_interface_tx(struct sk_buff *skb, soft_iface->trans_start = jiffies; vid = batadv_get_vid(skb, 0); + + skb_reset_mac_header(skb); ethhdr = eth_hdr(skb); switch (ntohs(ethhdr->h_proto)) { case ETH_P_8021Q: + if (!pskb_may_pull(skb, sizeof(*vhdr))) + goto dropped; vhdr = vlan_eth_hdr(skb); if (vhdr->h_vlan_encapsulated_proto != ethertype) { diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index d40d32a2c12d..37fe2b158c2a 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -5185,6 +5185,12 @@ static bool hci_get_cmd_complete(struct hci_dev *hdev, u16 opcode, return true; } + /* Check if request ended in Command Status - no way to retreive + * any extra parameters in this case. + */ + if (hdr->evt == HCI_EV_CMD_STATUS) + return false; + if (hdr->evt != HCI_EV_CMD_COMPLETE) { BT_DBG("Last event is not cmd complete (0x%2.2x)", hdr->evt); return false; diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index c842f40c1173..ea1cd8b21708 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -558,13 +558,12 @@ static int hci_sock_release(struct socket *sock) if (!sk) return 0; - hdev = hci_pi(sk)->hdev; - if (hci_pi(sk)->channel == HCI_CHANNEL_MONITOR) atomic_dec(&monitor_promisc); bt_sock_unlink(&hci_sk_list, sk); + hdev = hci_pi(sk)->hdev; if (hdev) { if (hci_pi(sk)->channel == HCI_CHANNEL_USER) { /* When releasing an user channel exclusive access, diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index af68674690af..f76e9c1e9f17 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -3315,16 +3315,22 @@ static int l2cap_parse_conf_req(struct l2cap_chan *chan, void *data, size_t data while (len >= L2CAP_CONF_OPT_SIZE) { len -= l2cap_get_conf_opt(&req, &type, &olen, &val); + if (len < 0) + break; hint = type & L2CAP_CONF_HINT; type &= L2CAP_CONF_MASK; switch (type) { case L2CAP_CONF_MTU: + if (olen != 2) + break; mtu = val; break; case L2CAP_CONF_FLUSH_TO: + if (olen != 2) + break; chan->flush_to = val; break; @@ -3332,26 +3338,30 @@ static int l2cap_parse_conf_req(struct l2cap_chan *chan, void *data, size_t data break; case L2CAP_CONF_RFC: - if (olen == sizeof(rfc)) - memcpy(&rfc, (void *) val, olen); + if (olen != sizeof(rfc)) + break; + memcpy(&rfc, (void *) val, olen); break; case L2CAP_CONF_FCS: + if (olen != 1) + break; if (val == L2CAP_FCS_NONE) set_bit(CONF_RECV_NO_FCS, &chan->conf_state); break; case L2CAP_CONF_EFS: - if (olen == sizeof(efs)) { - remote_efs = 1; - memcpy(&efs, (void *) val, olen); - } + if (olen != sizeof(efs)) + break; + remote_efs = 1; + memcpy(&efs, (void *) val, olen); break; case L2CAP_CONF_EWS: + if (olen != 2) + break; if (!(chan->conn->local_fixed_chan & L2CAP_FC_A2MP)) return -ECONNREFUSED; - set_bit(FLAG_EXT_CTRL, &chan->flags); set_bit(CONF_EWS_RECV, &chan->conf_state); chan->tx_win_max = L2CAP_DEFAULT_EXT_WINDOW; @@ -3361,7 +3371,6 @@ static int l2cap_parse_conf_req(struct l2cap_chan *chan, void *data, size_t data default: if (hint) break; - result = L2CAP_CONF_UNKNOWN; *((u8 *) ptr++) = type; break; @@ -3526,58 +3535,65 @@ static int l2cap_parse_conf_rsp(struct l2cap_chan *chan, void *rsp, int len, while (len >= L2CAP_CONF_OPT_SIZE) { len -= l2cap_get_conf_opt(&rsp, &type, &olen, &val); + if (len < 0) + break; switch (type) { case L2CAP_CONF_MTU: + if (olen != 2) + break; if (val < L2CAP_DEFAULT_MIN_MTU) { *result = L2CAP_CONF_UNACCEPT; chan->imtu = L2CAP_DEFAULT_MIN_MTU; } else chan->imtu = val; - l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, chan->imtu, endptr - ptr); + l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, chan->imtu, + endptr - ptr); break; case L2CAP_CONF_FLUSH_TO: + if (olen != 2) + break; chan->flush_to = val; - l2cap_add_conf_opt(&ptr, L2CAP_CONF_FLUSH_TO, - 2, chan->flush_to, endptr - ptr); + l2cap_add_conf_opt(&ptr, L2CAP_CONF_FLUSH_TO, 2, + chan->flush_to, endptr - ptr); break; case L2CAP_CONF_RFC: - if (olen == sizeof(rfc)) - memcpy(&rfc, (void *)val, olen); - + if (olen != sizeof(rfc)) + break; + memcpy(&rfc, (void *)val, olen); if (test_bit(CONF_STATE2_DEVICE, &chan->conf_state) && rfc.mode != chan->mode) return -ECONNREFUSED; - chan->fcs = 0; - - l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, - sizeof(rfc), (unsigned long) &rfc, endptr - ptr); + l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, sizeof(rfc), + (unsigned long) &rfc, endptr - ptr); break; case L2CAP_CONF_EWS: + if (olen != 2) + break; chan->ack_win = min_t(u16, val, chan->ack_win); l2cap_add_conf_opt(&ptr, L2CAP_CONF_EWS, 2, chan->tx_win, endptr - ptr); break; case L2CAP_CONF_EFS: - if (olen == sizeof(efs)) { - memcpy(&efs, (void *)val, olen); - - if (chan->local_stype != L2CAP_SERV_NOTRAFIC && - efs.stype != L2CAP_SERV_NOTRAFIC && - efs.stype != chan->local_stype) - return -ECONNREFUSED; - - l2cap_add_conf_opt(&ptr, L2CAP_CONF_EFS, sizeof(efs), - (unsigned long) &efs, endptr - ptr); - } + if (olen != sizeof(efs)) + break; + memcpy(&efs, (void *)val, olen); + if (chan->local_stype != L2CAP_SERV_NOTRAFIC && + efs.stype != L2CAP_SERV_NOTRAFIC && + efs.stype != chan->local_stype) + return -ECONNREFUSED; + l2cap_add_conf_opt(&ptr, L2CAP_CONF_EFS, sizeof(efs), + (unsigned long) &efs, endptr - ptr); break; case L2CAP_CONF_FCS: + if (olen != 1) + break; if (*result == L2CAP_CONF_PENDING) if (val == L2CAP_FCS_NONE) set_bit(CONF_RECV_NO_FCS, @@ -3706,13 +3722,18 @@ static void l2cap_conf_rfc_get(struct l2cap_chan *chan, void *rsp, int len) while (len >= L2CAP_CONF_OPT_SIZE) { len -= l2cap_get_conf_opt(&rsp, &type, &olen, &val); + if (len < 0) + break; switch (type) { case L2CAP_CONF_RFC: - if (olen == sizeof(rfc)) - memcpy(&rfc, (void *)val, olen); + if (olen != sizeof(rfc)) + break; + memcpy(&rfc, (void *)val, olen); break; case L2CAP_CONF_EWS: + if (olen != 2) + break; txwin_ext = val; break; } diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index fcdb86dd5a23..c21209aada8c 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -39,10 +39,10 @@ static inline int should_deliver(const struct net_bridge_port *p, int br_dev_queue_push_xmit(struct net *net, struct sock *sk, struct sk_buff *skb) { + skb_push(skb, ETH_HLEN); if (!is_skb_forwardable(skb->dev, skb)) goto drop; - skb_push(skb, ETH_HLEN); br_drop_fake_rtable(skb); skb_sender_cpu_clear(skb); @@ -88,12 +88,11 @@ static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb) skb->dev = to->dev; if (unlikely(netpoll_tx_running(to->br->dev))) { + skb_push(skb, ETH_HLEN); if (!is_skb_forwardable(skb->dev, skb)) kfree_skb(skb); - else { - skb_push(skb, ETH_HLEN); + else br_netpoll_send_skb(to, skb); - } return; } diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 270d9c9a5331..a52b4ffe30f4 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1261,14 +1261,7 @@ static void br_multicast_query_received(struct net_bridge *br, return; br_multicast_update_query_timer(br, query, max_delay); - - /* Based on RFC4541, section 2.1.1 IGMP Forwarding Rules, - * the arrival port for IGMP Queries where the source address - * is 0.0.0.0 should not be added to router port list. - */ - if ((saddr->proto == htons(ETH_P_IP) && saddr->u.ip4) || - saddr->proto == htons(ETH_P_IPV6)) - br_multicast_mark_router(br, port); + br_multicast_mark_router(br, port); } static int br_ip4_multicast_query(struct net_bridge *br, @@ -1901,7 +1894,8 @@ static void br_multicast_start_querier(struct net_bridge *br, __br_multicast_open(br, query); - list_for_each_entry(port, &br->port_list, list) { + rcu_read_lock(); + list_for_each_entry_rcu(port, &br->port_list, list) { if (port->state == BR_STATE_DISABLED || port->state == BR_STATE_BLOCKING) continue; @@ -1913,6 +1907,7 @@ static void br_multicast_start_querier(struct net_bridge *br, br_multicast_enable(&port->ip6_own_query); #endif } + rcu_read_unlock(); } int br_multicast_toggle(struct net_bridge *br, unsigned long val) diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 55dcb2b20b59..93b5525bcccf 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -267,7 +267,7 @@ int br_nf_pre_routing_finish_bridge(struct net *net, struct sock *sk, struct sk_ struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); int ret; - if (neigh->hh.hh_len) { + if ((neigh->nud_state & NUD_CONNECTED) && neigh->hh.hh_len) { neigh_hh_bridge(&neigh->hh, skb); skb->dev = nf_bridge->physindev; ret = br_handle_frame_finish(net, sk, skb); @@ -873,11 +873,6 @@ static const struct nf_br_ops br_ops = { .br_dev_xmit_hook = br_nf_dev_xmit, }; -void br_netfilter_enable(void) -{ -} -EXPORT_SYMBOL_GPL(br_netfilter_enable); - /* For br_nf_post_routing, we need (prio = NF_BR_PRI_LAST), because * br_dev_queue_push_xmit is called afterwards */ static struct nf_hook_ops br_nf_ops[] __read_mostly = { diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c index d61f56efc8dc..69dfd212e50d 100644 --- a/net/bridge/br_netfilter_ipv6.c +++ b/net/bridge/br_netfilter_ipv6.c @@ -131,6 +131,7 @@ int br_validate_ipv6(struct net *net, struct sk_buff *skb) IPSTATS_MIB_INDISCARDS); goto drop; } + hdr = ipv6_hdr(skb); } if (hdr->nexthdr == NEXTHDR_HOP && br_nf_check_hbh_len(skb)) goto drop; diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 8b8a43fda6ca..f13402d407e4 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1528,6 +1528,8 @@ static int do_ebt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) if (copy_from_user(&tmp, user, sizeof(tmp))) return -EFAULT; + tmp.name[sizeof(tmp.name) - 1] = '\0'; + t = find_table_lock(net, tmp.name, &ret, &ebt_mutex); if (!t) return ret; @@ -2368,6 +2370,8 @@ static int compat_do_ebt_get_ctl(struct sock *sk, int cmd, if (copy_from_user(&tmp, user, sizeof(tmp))) return -EFAULT; + tmp.name[sizeof(tmp.name) - 1] = '\0'; + t = find_table_lock(net, tmp.name, &ret, &ebt_mutex); if (!t) return ret; diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c index fdba3d9fbff3..6e48aa69fa24 100644 --- a/net/bridge/netfilter/nft_reject_bridge.c +++ b/net/bridge/netfilter/nft_reject_bridge.c @@ -192,6 +192,7 @@ static bool reject6_br_csum_ok(struct sk_buff *skb, int hook) pskb_trim_rcsum(skb, ntohs(ip6h->payload_len) + sizeof(*ip6h))) return false; + ip6h = ipv6_hdr(skb); thoff = ipv6_skip_exthdr(skb, ((u8*)(ip6h+1) - skb->data), &proto, &fo); if (thoff < 0 || thoff >= skb->len || (fo & htons(~0x7)) != 0) return false; diff --git a/net/can/bcm.c b/net/can/bcm.c index 4ccfd356baed..1f15622d3c65 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -67,6 +67,9 @@ */ #define MAX_NFRAMES 256 +/* limit timers to 400 days for sending/timeouts */ +#define BCM_TIMER_SEC_MAX (400 * 24 * 60 * 60) + /* use of last_frames[index].can_dlc */ #define RX_RECV 0x40 /* received data for this element */ #define RX_THR 0x80 /* element not been sent due to throttle feature */ @@ -136,6 +139,22 @@ static inline ktime_t bcm_timeval_to_ktime(struct bcm_timeval tv) return ktime_set(tv.tv_sec, tv.tv_usec * NSEC_PER_USEC); } +/* check limitations for timeval provided by user */ +static bool bcm_is_invalid_tv(struct bcm_msg_head *msg_head) +{ + if ((msg_head->ival1.tv_sec < 0) || + (msg_head->ival1.tv_sec > BCM_TIMER_SEC_MAX) || + (msg_head->ival1.tv_usec < 0) || + (msg_head->ival1.tv_usec >= USEC_PER_SEC) || + (msg_head->ival2.tv_sec < 0) || + (msg_head->ival2.tv_sec > BCM_TIMER_SEC_MAX) || + (msg_head->ival2.tv_usec < 0) || + (msg_head->ival2.tv_usec >= USEC_PER_SEC)) + return true; + + return false; +} + #define CFSIZ sizeof(struct can_frame) #define OPSIZ sizeof(struct bcm_op) #define MHSIZ sizeof(struct bcm_msg_head) @@ -855,6 +874,10 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, if (msg_head->nframes < 1 || msg_head->nframes > MAX_NFRAMES) return -EINVAL; + /* check timeval limitations */ + if ((msg_head->flags & SETTIMER) && bcm_is_invalid_tv(msg_head)) + return -EINVAL; + /* check the given can_id */ op = bcm_find_op(&bo->tx_ops, msg_head->can_id, ifindex); @@ -1020,6 +1043,10 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, (!(msg_head->can_id & CAN_RTR_FLAG)))) return -EINVAL; + /* check timeval limitations */ + if ((msg_head->flags & SETTIMER) && bcm_is_invalid_tv(msg_head)) + return -EINVAL; + /* check the given can_id */ op = bcm_find_op(&bo->rx_ops, msg_head->can_id, ifindex); if (op) { diff --git a/net/can/gw.c b/net/can/gw.c index 77c8af4047ef..81650affa3fa 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -418,13 +418,29 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data) while (modidx < MAX_MODFUNCTIONS && gwj->mod.modfunc[modidx]) (*gwj->mod.modfunc[modidx++])(cf, &gwj->mod); - /* check for checksum updates when the CAN frame has been modified */ + /* Has the CAN frame been modified? */ if (modidx) { - if (gwj->mod.csumfunc.crc8) - (*gwj->mod.csumfunc.crc8)(cf, &gwj->mod.csum.crc8); + /* get available space for the processed CAN frame type */ + int max_len = nskb->len - offsetof(struct can_frame, data); + + /* dlc may have changed, make sure it fits to the CAN frame */ + if (cf->can_dlc > max_len) + goto out_delete; + + /* check for checksum updates in classic CAN length only */ + if (gwj->mod.csumfunc.crc8) { + if (cf->can_dlc > 8) + goto out_delete; + + (*gwj->mod.csumfunc.crc8)(cf, &gwj->mod.csum.crc8); + } + + if (gwj->mod.csumfunc.xor) { + if (cf->can_dlc > 8) + goto out_delete; - if (gwj->mod.csumfunc.xor) (*gwj->mod.csumfunc.xor)(cf, &gwj->mod.csum.xor); + } } /* clear the skb timestamp if not configured the other way */ @@ -436,6 +452,14 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data) gwj->dropped_frames++; else gwj->handled_frames++; + + return; + + out_delete: + /* delete frame due to misconfiguration */ + gwj->deleted_frames++; + kfree_skb(nskb); + return; } static inline int cgw_register_filter(struct cgw_job *gwj) diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index ad3c9e96a275..3ed2796d008b 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -2049,15 +2049,19 @@ static int process_connect(struct ceph_connection *con) dout("process_connect on %p tag %d\n", con, (int)con->in_tag); if (con->auth_reply_buf) { + int len = le32_to_cpu(con->in_reply.authorizer_len); + /* * Any connection that defines ->get_authorizer() * should also define ->verify_authorizer_reply(). * See get_connect_authorizer(). */ - ret = con->ops->verify_authorizer_reply(con, 0); - if (ret < 0) { - con->error_msg = "bad authorize reply"; - return ret; + if (len) { + ret = con->ops->verify_authorizer_reply(con, 0); + if (ret < 0) { + con->error_msg = "bad authorize reply"; + return ret; + } } } @@ -3181,9 +3185,10 @@ void ceph_con_keepalive(struct ceph_connection *con) dout("con_keepalive %p\n", con); mutex_lock(&con->mutex); clear_standby(con); + con_flag_set(con, CON_FLAG_KEEPALIVE_PENDING); mutex_unlock(&con->mutex); - if (con_flag_test_and_set(con, CON_FLAG_KEEPALIVE_PENDING) == 0 && - con_flag_test_and_set(con, CON_FLAG_WRITE_PENDING) == 0) + + if (con_flag_test_and_set(con, CON_FLAG_WRITE_PENDING) == 0) queue_con(con); } EXPORT_SYMBOL(ceph_con_keepalive); diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c index d4f5f220a8e5..28453d698d86 100644 --- a/net/ceph/pagevec.c +++ b/net/ceph/pagevec.c @@ -26,7 +26,7 @@ struct page **ceph_get_direct_page_vector(const void __user *data, while (got < num_pages) { rc = get_user_pages_unlocked(current, current->mm, (unsigned long)data + ((unsigned long)got * PAGE_SIZE), - num_pages - got, write_page, 0, pages + got); + num_pages - got, pages + got, write_page ? FOLL_WRITE : 0); if (rc < 0) break; BUG_ON(rc == 0); diff --git a/net/compat.c b/net/compat.c index 17e97b106458..d67684010455 100644 --- a/net/compat.c +++ b/net/compat.c @@ -443,12 +443,14 @@ int compat_sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp) err = -ENOENT; if (!sock_flag(sk, SOCK_TIMESTAMP)) sock_enable_timestamp(sk, SOCK_TIMESTAMP); - tv = ktime_to_timeval(sk->sk_stamp); + tv = ktime_to_timeval(sock_read_timestamp(sk)); + if (tv.tv_sec == -1) return err; if (tv.tv_sec == 0) { - sk->sk_stamp = ktime_get_real(); - tv = ktime_to_timeval(sk->sk_stamp); + ktime_t kt = ktime_get_real(); + sock_write_timestamp(sk, kt); + tv = ktime_to_timeval(kt); } err = 0; if (put_user(tv.tv_sec, &ctv->tv_sec) || @@ -471,12 +473,13 @@ int compat_sock_get_timestampns(struct sock *sk, struct timespec __user *usersta err = -ENOENT; if (!sock_flag(sk, SOCK_TIMESTAMP)) sock_enable_timestamp(sk, SOCK_TIMESTAMP); - ts = ktime_to_timespec(sk->sk_stamp); + ts = ktime_to_timespec(sock_read_timestamp(sk)); if (ts.tv_sec == -1) return err; if (ts.tv_sec == 0) { - sk->sk_stamp = ktime_get_real(); - ts = ktime_to_timespec(sk->sk_stamp); + ktime_t kt = ktime_get_real(); + sock_write_timestamp(sk, kt); + ts = ktime_to_timespec(kt); } err = 0; if (put_user(ts.tv_sec, &ctv->tv_sec) || diff --git a/net/core/dev.c b/net/core/dev.c index e03c1d2f6707..5e78923dfd63 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -183,7 +183,7 @@ EXPORT_SYMBOL(dev_base_lock); static DEFINE_SPINLOCK(napi_hash_lock); static unsigned int napi_gen_id = NR_CPUS; -static DEFINE_HASHTABLE(napi_hash, 8); +static DEFINE_READ_MOSTLY_HASHTABLE(napi_hash, 8); static seqcount_t devnet_rename_seq; @@ -6421,7 +6421,7 @@ static netdev_features_t netdev_sync_upper_features(struct net_device *lower, netdev_features_t feature; int feature_bit; - for_each_netdev_feature(&upper_disables, feature_bit) { + for_each_netdev_feature(upper_disables, feature_bit) { feature = __NETIF_F_BIT(feature_bit); if (!(upper->wanted_features & feature) && (features & feature)) { @@ -6441,7 +6441,7 @@ static void netdev_sync_lower_features(struct net_device *upper, netdev_features_t feature; int feature_bit; - for_each_netdev_feature(&upper_disables, feature_bit) { + for_each_netdev_feature(upper_disables, feature_bit) { feature = __NETIF_F_BIT(feature_bit); if (!(features & feature) && (lower->features & feature)) { netdev_dbg(upper, "Disabling feature %pNF on lower dev %s.\n", diff --git a/net/core/ethtool.c b/net/core/ethtool.c index b6bca625b0d2..9a53c66deb64 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -1287,17 +1287,22 @@ static int ethtool_get_strings(struct net_device *dev, void __user *useraddr) gstrings.len = ret; - data = kcalloc(gstrings.len, ETH_GSTRING_LEN, GFP_USER); - if (!data) - return -ENOMEM; + if (gstrings.len) { + data = kcalloc(gstrings.len, ETH_GSTRING_LEN, GFP_USER); + if (!data) + return -ENOMEM; - __ethtool_get_strings(dev, gstrings.string_set, data); + __ethtool_get_strings(dev, gstrings.string_set, data); + } else { + data = NULL; + } ret = -EFAULT; if (copy_to_user(useraddr, &gstrings, sizeof(gstrings))) goto out; useraddr += sizeof(gstrings); - if (copy_to_user(useraddr, data, gstrings.len * ETH_GSTRING_LEN)) + if (gstrings.len && + copy_to_user(useraddr, data, gstrings.len * ETH_GSTRING_LEN)) goto out; ret = 0; @@ -1385,17 +1390,21 @@ static int ethtool_get_stats(struct net_device *dev, void __user *useraddr) return -EFAULT; stats.n_stats = n_stats; - data = kmalloc(n_stats * sizeof(u64), GFP_USER); - if (!data) - return -ENOMEM; + if (n_stats) { + data = kmalloc(n_stats * sizeof(u64), GFP_USER); + if (!data) + return -ENOMEM; - ops->get_ethtool_stats(dev, &stats, data); + ops->get_ethtool_stats(dev, &stats, data); + } else { + data = NULL; + } ret = -EFAULT; if (copy_to_user(useraddr, &stats, sizeof(stats))) goto out; useraddr += sizeof(stats); - if (copy_to_user(useraddr, data, stats.n_stats * sizeof(u64))) + if (n_stats && copy_to_user(useraddr, data, n_stats * sizeof(u64))) goto out; ret = 0; diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index f88a62ab019d..579d351f6ddd 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1361,6 +1361,9 @@ static int register_queue_kobjects(struct net_device *dev) error: netdev_queue_update_kobjects(dev, txq, 0); net_rx_queue_update_kobjects(dev, rxq, 0); +#ifdef CONFIG_SYSFS + kset_unregister(dev->queues_kset); +#endif return error; } diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index ccd20669ac00..087ce1598b74 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -280,6 +280,7 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns) atomic_set(&net->count, 1); atomic_set(&net->passive, 1); + get_random_bytes(&net->hash_mix, sizeof(u32)); net->dev_base_seq = 1; net->user_ns = user_ns; idr_init(&net->netns_ids); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index d2a46ffe6382..d52b633164c9 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2931,6 +2931,9 @@ int ndo_dflt_fdb_dump(struct sk_buff *skb, { int err; + if (dev->type != ARPHRD_ETHER) + return -EINVAL; + netif_addr_lock_bh(dev); err = nlmsg_populate_fdb(skb, cb, dev, &idx, &dev->uc); if (err) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 9703924ed071..2f63a90065e6 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -374,6 +374,8 @@ static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) */ void *netdev_alloc_frag(unsigned int fragsz) { + fragsz = SKB_DATA_ALIGN(fragsz); + return __netdev_alloc_frag(fragsz, GFP_ATOMIC | __GFP_COLD); } EXPORT_SYMBOL(netdev_alloc_frag); @@ -387,6 +389,8 @@ static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) void *napi_alloc_frag(unsigned int fragsz) { + fragsz = SKB_DATA_ALIGN(fragsz); + return __napi_alloc_frag(fragsz, GFP_ATOMIC | __GFP_COLD); } EXPORT_SYMBOL(napi_alloc_frag); @@ -1502,6 +1506,21 @@ done: } EXPORT_SYMBOL(___pskb_trim); +/* Note : use pskb_trim_rcsum() instead of calling this directly + */ +int pskb_trim_rcsum_slow(struct sk_buff *skb, unsigned int len) +{ + if (skb->ip_summed == CHECKSUM_COMPLETE) { + int delta = skb->len - len; + + skb->csum = csum_block_sub(skb->csum, + skb_checksum(skb, len, delta, 0), + len); + } + return __pskb_trim(skb, len); +} +EXPORT_SYMBOL(pskb_trim_rcsum_slow); + /** * __pskb_pull_tail - advance tail of skb header * @skb: buffer to reallocate @@ -2380,20 +2399,27 @@ EXPORT_SYMBOL(skb_queue_purge); /** * skb_rbtree_purge - empty a skb rbtree * @root: root of the rbtree to empty + * Return value: the sum of truesizes of all purged skbs. * * Delete all buffers on an &sk_buff rbtree. Each buffer is removed from * the list and one reference dropped. This function does not take * any lock. Synchronization should be handled by the caller (e.g., TCP * out-of-order queue is protected by the socket lock). */ -void skb_rbtree_purge(struct rb_root *root) +unsigned int skb_rbtree_purge(struct rb_root *root) { - struct sk_buff *skb, *next; + struct rb_node *p = rb_first(root); + unsigned int sum = 0; - rbtree_postorder_for_each_entry_safe(skb, next, root, rbnode) + while (p) { + struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode); + + p = rb_next(p); + rb_erase(&skb->rbnode, root); + sum += skb->truesize; kfree_skb(skb); - - *root = RB_ROOT; + } + return sum; } /** diff --git a/net/core/sock.c b/net/core/sock.c index ba656a2771fb..8f2f5d497dc7 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -732,6 +732,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname, break; case SO_DONTROUTE: sock_valbool_flag(sk, SOCK_LOCALROUTE, valbool); + sk_dst_reset(sk); break; case SO_BROADCAST: sock_valbool_flag(sk, SOCK_BROADCAST, valbool); @@ -2426,6 +2427,9 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT; sk->sk_stamp = ktime_set(-1L, 0); +#if BITS_PER_LONG==32 + seqlock_init(&sk->sk_stamp_seq); +#endif #ifdef CONFIG_NET_RX_BUSY_POLL sk->sk_napi_id = 0; diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h index 6eb837a47b5c..baaaeb2b2c42 100644 --- a/net/dccp/ccid.h +++ b/net/dccp/ccid.h @@ -202,7 +202,7 @@ static inline void ccid_hc_tx_packet_recv(struct ccid *ccid, struct sock *sk, static inline int ccid_hc_tx_parse_options(struct ccid *ccid, struct sock *sk, u8 pkt, u8 opt, u8 *val, u8 len) { - if (ccid->ccid_ops->ccid_hc_tx_parse_options == NULL) + if (!ccid || !ccid->ccid_ops->ccid_hc_tx_parse_options) return 0; return ccid->ccid_ops->ccid_hc_tx_parse_options(sk, pkt, opt, val, len); } @@ -214,7 +214,7 @@ static inline int ccid_hc_tx_parse_options(struct ccid *ccid, struct sock *sk, static inline int ccid_hc_rx_parse_options(struct ccid *ccid, struct sock *sk, u8 pkt, u8 opt, u8 *val, u8 len) { - if (ccid->ccid_ops->ccid_hc_rx_parse_options == NULL) + if (!ccid || !ccid->ccid_ops->ccid_hc_rx_parse_options) return 0; return ccid->ccid_ops->ccid_hc_rx_parse_options(sk, pkt, opt, val, len); } diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 18bb2a42f0d1..d2caa4d69159 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -427,8 +427,8 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk, newnp->ipv6_mc_list = NULL; newnp->ipv6_ac_list = NULL; newnp->ipv6_fl_list = NULL; - newnp->mcast_oif = inet6_iif(skb); - newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; + newnp->mcast_oif = inet_iif(skb); + newnp->mcast_hops = ip_hdr(skb)->ttl; /* * No need to charge this sock to the relevant IPv6 refcnt debug socks count diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 48b28a7ecc7a..4256ac95a141 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -157,10 +157,14 @@ static void dsa_slave_change_rx_flags(struct net_device *dev, int change) struct dsa_slave_priv *p = netdev_priv(dev); struct net_device *master = p->parent->dst->master_netdev; - if (change & IFF_ALLMULTI) - dev_set_allmulti(master, dev->flags & IFF_ALLMULTI ? 1 : -1); - if (change & IFF_PROMISC) - dev_set_promiscuity(master, dev->flags & IFF_PROMISC ? 1 : -1); + if (dev->flags & IFF_UP) { + if (change & IFF_ALLMULTI) + dev_set_allmulti(master, + dev->flags & IFF_ALLMULTI ? 1 : -1); + if (change & IFF_PROMISC) + dev_set_promiscuity(master, + dev->flags & IFF_PROMISC ? 1 : -1); + } } static void dsa_slave_set_rx_mode(struct net_device *dev) diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index c7d1adca30d8..943378d6e4c3 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -93,9 +93,8 @@ static void hsr_check_announce(struct net_device *hsr_dev, if ((hsr_dev->operstate == IF_OPER_UP) && (old_operstate != IF_OPER_UP)) { /* Went up */ hsr->announce_count = 0; - hsr->announce_timer.expires = jiffies + - msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL); - add_timer(&hsr->announce_timer); + mod_timer(&hsr->announce_timer, + jiffies + msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL)); } if ((hsr_dev->operstate != IF_OPER_UP) && (old_operstate == IF_OPER_UP)) @@ -323,6 +322,7 @@ static void hsr_announce(unsigned long data) { struct hsr_priv *hsr; struct hsr_port *master; + unsigned long interval; hsr = (struct hsr_priv *) data; @@ -337,14 +337,12 @@ static void hsr_announce(unsigned long data) } if (hsr->announce_count < 3) - hsr->announce_timer.expires = jiffies + - msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL); + interval = msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL); else - hsr->announce_timer.expires = jiffies + - msecs_to_jiffies(HSR_LIFE_CHECK_INTERVAL); + interval = msecs_to_jiffies(HSR_LIFE_CHECK_INTERVAL); if (is_admin_up(master->dev)) - add_timer(&hsr->announce_timer); + mod_timer(&hsr->announce_timer, jiffies + interval); rcu_read_unlock(); } @@ -477,7 +475,7 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], res = hsr_add_port(hsr, hsr_dev, HSR_PT_MASTER); if (res) - return res; + goto err_add_port; res = register_netdevice(hsr_dev); if (res) @@ -498,6 +496,8 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], fail: hsr_for_each_port(hsr, port) hsr_del_port(port); +err_add_port: + hsr_del_node(&hsr->self_node_db); return res; } diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c index bace124d14ef..46415839e67e 100644 --- a/net/hsr/hsr_framereg.c +++ b/net/hsr/hsr_framereg.c @@ -124,6 +124,18 @@ int hsr_create_self_node(struct list_head *self_node_db, return 0; } +void hsr_del_node(struct list_head *self_node_db) +{ + struct hsr_node *node; + + rcu_read_lock(); + node = list_first_or_null_rcu(self_node_db, struct hsr_node, mac_list); + rcu_read_unlock(); + if (node) { + list_del_rcu(&node->mac_list); + kfree(node); + } +} /* Allocate an hsr_node and add it to node_db. 'addr' is the node's AddressA; * seq_out is used to initialize filtering of outgoing duplicate frames diff --git a/net/hsr/hsr_framereg.h b/net/hsr/hsr_framereg.h index 438b40f98f5a..7a8f4e98f515 100644 --- a/net/hsr/hsr_framereg.h +++ b/net/hsr/hsr_framereg.h @@ -16,6 +16,7 @@ struct hsr_node; +void hsr_del_node(struct list_head *self_node_db); struct hsr_node *hsr_add_node(struct list_head *node_db, unsigned char addr[], u16 seq_out); struct hsr_node *hsr_get_node(struct list_head *node_db, struct sk_buff *skb, diff --git a/net/ieee802154/6lowpan/6lowpan_i.h b/net/ieee802154/6lowpan/6lowpan_i.h index b4e17a7c0df0..fdbebe51446f 100644 --- a/net/ieee802154/6lowpan/6lowpan_i.h +++ b/net/ieee802154/6lowpan/6lowpan_i.h @@ -16,37 +16,19 @@ typedef unsigned __bitwise__ lowpan_rx_result; #define LOWPAN_DISPATCH_FRAG1 0xc0 #define LOWPAN_DISPATCH_FRAGN 0xe0 -struct lowpan_create_arg { +struct frag_lowpan_compare_key { u16 tag; u16 d_size; - const struct ieee802154_addr *src; - const struct ieee802154_addr *dst; + struct ieee802154_addr src; + struct ieee802154_addr dst; }; -/* Equivalent of ipv4 struct ip +/* Equivalent of ipv4 struct ipq */ struct lowpan_frag_queue { struct inet_frag_queue q; - - u16 tag; - u16 d_size; - struct ieee802154_addr saddr; - struct ieee802154_addr daddr; }; -static inline u32 ieee802154_addr_hash(const struct ieee802154_addr *a) -{ - switch (a->mode) { - case IEEE802154_ADDR_LONG: - return (((__force u64)a->extended_addr) >> 32) ^ - (((__force u64)a->extended_addr) & 0xffffffff); - case IEEE802154_ADDR_SHORT: - return (__force u32)(a->short_addr); - default: - return 0; - } -} - /* private device info */ struct lowpan_dev_info { struct net_device *wdev; /* wpan device ptr */ diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c index 12e8cf4bda9f..6183730d38db 100644 --- a/net/ieee802154/6lowpan/reassembly.c +++ b/net/ieee802154/6lowpan/reassembly.c @@ -37,47 +37,15 @@ static struct inet_frags lowpan_frags; static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, struct sk_buff *prev, struct net_device *ldev); -static unsigned int lowpan_hash_frag(u16 tag, u16 d_size, - const struct ieee802154_addr *saddr, - const struct ieee802154_addr *daddr) -{ - net_get_random_once(&lowpan_frags.rnd, sizeof(lowpan_frags.rnd)); - return jhash_3words(ieee802154_addr_hash(saddr), - ieee802154_addr_hash(daddr), - (__force u32)(tag + (d_size << 16)), - lowpan_frags.rnd); -} - -static unsigned int lowpan_hashfn(const struct inet_frag_queue *q) -{ - const struct lowpan_frag_queue *fq; - - fq = container_of(q, struct lowpan_frag_queue, q); - return lowpan_hash_frag(fq->tag, fq->d_size, &fq->saddr, &fq->daddr); -} - -static bool lowpan_frag_match(const struct inet_frag_queue *q, const void *a) -{ - const struct lowpan_frag_queue *fq; - const struct lowpan_create_arg *arg = a; - - fq = container_of(q, struct lowpan_frag_queue, q); - return fq->tag == arg->tag && fq->d_size == arg->d_size && - ieee802154_addr_equal(&fq->saddr, arg->src) && - ieee802154_addr_equal(&fq->daddr, arg->dst); -} - static void lowpan_frag_init(struct inet_frag_queue *q, const void *a) { - const struct lowpan_create_arg *arg = a; + const struct frag_lowpan_compare_key *key = a; struct lowpan_frag_queue *fq; fq = container_of(q, struct lowpan_frag_queue, q); - fq->tag = arg->tag; - fq->d_size = arg->d_size; - fq->saddr = *arg->src; - fq->daddr = *arg->dst; + BUILD_BUG_ON(sizeof(*key) > sizeof(q->key)); + memcpy(&q->key, key, sizeof(*key)); } static void lowpan_frag_expire(unsigned long data) @@ -93,10 +61,10 @@ static void lowpan_frag_expire(unsigned long data) if (fq->q.flags & INET_FRAG_COMPLETE) goto out; - inet_frag_kill(&fq->q, &lowpan_frags); + inet_frag_kill(&fq->q); out: spin_unlock(&fq->q.lock); - inet_frag_put(&fq->q, &lowpan_frags); + inet_frag_put(&fq->q); } static inline struct lowpan_frag_queue * @@ -104,25 +72,20 @@ fq_find(struct net *net, const struct lowpan_802154_cb *cb, const struct ieee802154_addr *src, const struct ieee802154_addr *dst) { - struct inet_frag_queue *q; - struct lowpan_create_arg arg; - unsigned int hash; struct netns_ieee802154_lowpan *ieee802154_lowpan = net_ieee802154_lowpan(net); + struct frag_lowpan_compare_key key = {}; + struct inet_frag_queue *q; - arg.tag = cb->d_tag; - arg.d_size = cb->d_size; - arg.src = src; - arg.dst = dst; + key.tag = cb->d_tag; + key.d_size = cb->d_size; + key.src = *src; + key.dst = *dst; - hash = lowpan_hash_frag(cb->d_tag, cb->d_size, src, dst); - - q = inet_frag_find(&ieee802154_lowpan->frags, - &lowpan_frags, &arg, hash); - if (IS_ERR_OR_NULL(q)) { - inet_frag_maybe_warn_overflow(q, pr_fmt()); + q = inet_frag_find(&ieee802154_lowpan->frags, &key); + if (!q) return NULL; - } + return container_of(q, struct lowpan_frag_queue, q); } @@ -229,7 +192,7 @@ static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, struct sk_buff *prev, struct sk_buff *fp, *head = fq->q.fragments; int sum_truesize; - inet_frag_kill(&fq->q, &lowpan_frags); + inet_frag_kill(&fq->q); /* Make the one we just received the head. */ if (prev) { @@ -408,7 +371,7 @@ int lowpan_frag_rcv(struct sk_buff *skb, u8 frag_type) struct lowpan_frag_queue *fq; struct net *net = dev_net(skb->dev); struct lowpan_802154_cb *cb = lowpan_802154_cb(skb); - struct ieee802154_hdr hdr; + struct ieee802154_hdr hdr = {}; int err; if (ieee802154_hdr_peek_addrs(skb, &hdr) < 0) @@ -437,7 +400,7 @@ int lowpan_frag_rcv(struct sk_buff *skb, u8 frag_type) ret = lowpan_frag_queue(fq, skb, frag_type); spin_unlock(&fq->q.lock); - inet_frag_put(&fq->q, &lowpan_frags); + inet_frag_put(&fq->q); return ret; } @@ -447,24 +410,22 @@ err: } #ifdef CONFIG_SYSCTL -static int zero; static struct ctl_table lowpan_frags_ns_ctl_table[] = { { .procname = "6lowpanfrag_high_thresh", .data = &init_net.ieee802154_lowpan.frags.high_thresh, - .maxlen = sizeof(int), + .maxlen = sizeof(unsigned long), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_doulongvec_minmax, .extra1 = &init_net.ieee802154_lowpan.frags.low_thresh }, { .procname = "6lowpanfrag_low_thresh", .data = &init_net.ieee802154_lowpan.frags.low_thresh, - .maxlen = sizeof(int), + .maxlen = sizeof(unsigned long), .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, + .proc_handler = proc_doulongvec_minmax, .extra2 = &init_net.ieee802154_lowpan.frags.high_thresh }, { @@ -580,14 +541,20 @@ static int __net_init lowpan_frags_init_net(struct net *net) { struct netns_ieee802154_lowpan *ieee802154_lowpan = net_ieee802154_lowpan(net); + int res; ieee802154_lowpan->frags.high_thresh = IPV6_FRAG_HIGH_THRESH; ieee802154_lowpan->frags.low_thresh = IPV6_FRAG_LOW_THRESH; ieee802154_lowpan->frags.timeout = IPV6_FRAG_TIMEOUT; + ieee802154_lowpan->frags.f = &lowpan_frags; - inet_frags_init_net(&ieee802154_lowpan->frags); - - return lowpan_frags_ns_sysctl_register(net); + res = inet_frags_init_net(&ieee802154_lowpan->frags); + if (res < 0) + return res; + res = lowpan_frags_ns_sysctl_register(net); + if (res < 0) + inet_frags_exit_net(&ieee802154_lowpan->frags); + return res; } static void __net_exit lowpan_frags_exit_net(struct net *net) @@ -596,7 +563,7 @@ static void __net_exit lowpan_frags_exit_net(struct net *net) net_ieee802154_lowpan(net); lowpan_frags_ns_sysctl_unregister(net); - inet_frags_exit_net(&ieee802154_lowpan->frags, &lowpan_frags); + inet_frags_exit_net(&ieee802154_lowpan->frags); } static struct pernet_operations lowpan_frags_ops = { @@ -604,33 +571,64 @@ static struct pernet_operations lowpan_frags_ops = { .exit = lowpan_frags_exit_net, }; +static u32 lowpan_key_hashfn(const void *data, u32 len, u32 seed) +{ + return jhash2(data, + sizeof(struct frag_lowpan_compare_key) / sizeof(u32), seed); +} + +static u32 lowpan_obj_hashfn(const void *data, u32 len, u32 seed) +{ + const struct inet_frag_queue *fq = data; + + return jhash2((const u32 *)&fq->key, + sizeof(struct frag_lowpan_compare_key) / sizeof(u32), seed); +} + +static int lowpan_obj_cmpfn(struct rhashtable_compare_arg *arg, const void *ptr) +{ + const struct frag_lowpan_compare_key *key = arg->key; + const struct inet_frag_queue *fq = ptr; + + return !!memcmp(&fq->key, key, sizeof(*key)); +} + +static const struct rhashtable_params lowpan_rhash_params = { + .head_offset = offsetof(struct inet_frag_queue, node), + .hashfn = lowpan_key_hashfn, + .obj_hashfn = lowpan_obj_hashfn, + .obj_cmpfn = lowpan_obj_cmpfn, + .automatic_shrinking = true, +}; + int __init lowpan_net_frag_init(void) { int ret; - ret = lowpan_frags_sysctl_register(); - if (ret) - return ret; - - ret = register_pernet_subsys(&lowpan_frags_ops); - if (ret) - goto err_pernet; - - lowpan_frags.hashfn = lowpan_hashfn; lowpan_frags.constructor = lowpan_frag_init; lowpan_frags.destructor = NULL; lowpan_frags.skb_free = NULL; lowpan_frags.qsize = sizeof(struct frag_queue); - lowpan_frags.match = lowpan_frag_match; lowpan_frags.frag_expire = lowpan_frag_expire; lowpan_frags.frags_cache_name = lowpan_frags_cache_name; + lowpan_frags.rhash_params = lowpan_rhash_params; ret = inet_frags_init(&lowpan_frags); if (ret) - goto err_pernet; + goto out; + ret = lowpan_frags_sysctl_register(); + if (ret) + goto err_sysctl; + + ret = register_pernet_subsys(&lowpan_frags_ops); + if (ret) + goto err_pernet; +out: return ret; err_pernet: lowpan_frags_sysctl_unregister(); +err_sysctl: + inet_frags_fini(&lowpan_frags); return ret; } diff --git a/net/ieee802154/6lowpan/tx.c b/net/ieee802154/6lowpan/tx.c index a10db45b2e1e..df32134da924 100644 --- a/net/ieee802154/6lowpan/tx.c +++ b/net/ieee802154/6lowpan/tx.c @@ -55,6 +55,9 @@ int lowpan_header_create(struct sk_buff *skb, struct net_device *ldev, const u8 *daddr = _daddr; struct lowpan_addr_info *info; + if (!daddr) + return -EINVAL; + /* TODO: * if this package isn't ipv6 one, where should it be routed? */ diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index cfaacaa023e6..7fe643062013 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -167,7 +167,8 @@ static int cipso_v4_bitmap_walk(const unsigned char *bitmap, (state == 0 && (byte & bitmask) == 0)) return bit_spot; - bit_spot++; + if (++bit_spot >= bitmap_len) + return -1; bitmask >>= 1; if (bitmask == 0) { byte = bitmap[++byte_offset]; @@ -737,7 +738,8 @@ static int cipso_v4_map_lvl_valid(const struct cipso_v4_doi *doi_def, u8 level) case CIPSO_V4_MAP_PASS: return 0; case CIPSO_V4_MAP_TRANS: - if (doi_def->map.std->lvl.cipso[level] < CIPSO_V4_INV_LVL) + if ((level < doi_def->map.std->lvl.cipso_size) && + (doi_def->map.std->lvl.cipso[level] < CIPSO_V4_INV_LVL)) return 0; break; } @@ -1805,13 +1807,26 @@ validate_return: */ void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway) { + unsigned char optbuf[sizeof(struct ip_options) + 40]; + struct ip_options *opt = (struct ip_options *)optbuf; + if (ip_hdr(skb)->protocol == IPPROTO_ICMP || error != -EACCES) return; + /* + * We might be called above the IP layer, + * so we can not use icmp_send and IPCB here. + */ + + memset(opt, 0, sizeof(struct ip_options)); + opt->optlen = ip_hdr(skb)->ihl*4 - sizeof(struct iphdr); + if (__ip_options_compile(dev_net(skb->dev), opt, skb, NULL)) + return; + if (gateway) - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_ANO, 0); + __icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_ANO, 0, opt); else - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_ANO, 0); + __icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_ANO, 0, opt); } /** diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 860e33dd4030..8dc9073d4a76 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -187,7 +187,7 @@ static void fib_flush(struct net *net) struct fib_table *tb; hlist_for_each_entry_safe(tb, tmp, head, tb_hlist) - flushed += fib_table_flush(tb); + flushed += fib_table_flush(tb, false); } if (flushed) @@ -1278,7 +1278,7 @@ static void ip_fib_net_exit(struct net *net) hlist_for_each_entry_safe(tb, tmp, head, tb_hlist) { hlist_del(&tb->tb_hlist); - fib_table_flush(tb); + fib_table_flush(tb, true); fib_free_table(tb); } } diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 5c598f99a500..fdaa905dccdd 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1806,7 +1806,7 @@ void fib_table_flush_external(struct fib_table *tb) } /* Caller must hold RTNL. */ -int fib_table_flush(struct fib_table *tb) +int fib_table_flush(struct fib_table *tb, bool flush_all) { struct trie *t = (struct trie *)tb->tb_data; struct key_vector *pn = t->kv; @@ -1850,7 +1850,17 @@ int fib_table_flush(struct fib_table *tb) hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) { struct fib_info *fi = fa->fa_info; - if (!fi || !(fi->fib_flags & RTNH_F_DEAD)) { + if (!fi || + (!(fi->fib_flags & RTNH_F_DEAD) && + !fib_props[fa->fa_type].error)) { + slen = fa->fa_slen; + continue; + } + + /* Do not flush error routes if network namespace is + * not being dismantled + */ + if (!flush_all && fib_props[fa->fa_type].error) { slen = fa->fa_slen; continue; } diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index d83888bc33d3..b5a137338e50 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -116,6 +116,7 @@ static int gue_udp_recv(struct sock *sk, struct sk_buff *skb) struct guehdr *guehdr; void *data; u16 doffset = 0; + u8 proto_ctype; if (!fou) return 1; @@ -173,13 +174,14 @@ static int gue_udp_recv(struct sock *sk, struct sk_buff *skb) if (unlikely(guehdr->control)) return gue_control_message(skb, guehdr); + proto_ctype = guehdr->proto_ctype; __skb_pull(skb, sizeof(struct udphdr) + hdrlen); skb_reset_transport_header(skb); if (iptunnel_pull_offloads(skb)) goto drop; - return -guehdr->proto_ctype; + return -proto_ctype; drop: kfree_skb(skb); diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index ef2d4322aba7..a51f0dd6a49e 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -567,7 +567,8 @@ relookup_failed: * MUST reply to only the first fragment. */ -void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) +void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, + const struct ip_options *opt) { struct iphdr *iph; int room; @@ -681,7 +682,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) iph->tos; mark = IP4_REPLY_MARK(net, skb_in->mark); - if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb_in)) + if (__ip_options_echo(&icmp_param->replyopts.opt.opt, skb_in, opt)) goto out_unlock; @@ -733,7 +734,7 @@ out_free: kfree(icmp_param); out:; } -EXPORT_SYMBOL(icmp_send); +EXPORT_SYMBOL(__icmp_send); static void icmp_socket_deliver(struct sk_buff *skb, u32 info) diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 51512742f21f..9ec181c077b6 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -787,7 +787,6 @@ static void inet_child_forget(struct sock *sk, struct request_sock *req, tcp_sk(child)->fastopen_rsk = NULL; } inet_csk_destroy_sock(child); - reqsk_put(req); } struct sock *inet_csk_reqsk_queue_add(struct sock *sk, @@ -858,6 +857,7 @@ void inet_csk_listen_stop(struct sock *sk) sock_hold(child); inet_child_forget(sk, req, child); + reqsk_put(req); bh_unlock_sock(child); local_bh_enable(); sock_put(child); diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index b2001b20e029..c03e5f5859e1 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -25,12 +25,6 @@ #include #include -#define INETFRAGS_EVICT_BUCKETS 128 -#define INETFRAGS_EVICT_MAX 512 - -/* don't rebuild inetfrag table with new secret more often than this */ -#define INETFRAGS_MIN_REBUILD_INTERVAL (5 * HZ) - /* Given the OR values of all fragments, apply RFC 3168 5.3 requirements * Value : 0xff if frame should be dropped. * 0 or INET_ECN_CE value, to be ORed in to final iph->tos field @@ -52,157 +46,8 @@ const u8 ip_frag_ecn_table[16] = { }; EXPORT_SYMBOL(ip_frag_ecn_table); -static unsigned int -inet_frag_hashfn(const struct inet_frags *f, const struct inet_frag_queue *q) -{ - return f->hashfn(q) & (INETFRAGS_HASHSZ - 1); -} - -static bool inet_frag_may_rebuild(struct inet_frags *f) -{ - return time_after(jiffies, - f->last_rebuild_jiffies + INETFRAGS_MIN_REBUILD_INTERVAL); -} - -static void inet_frag_secret_rebuild(struct inet_frags *f) -{ - int i; - - write_seqlock_bh(&f->rnd_seqlock); - - if (!inet_frag_may_rebuild(f)) - goto out; - - get_random_bytes(&f->rnd, sizeof(u32)); - - for (i = 0; i < INETFRAGS_HASHSZ; i++) { - struct inet_frag_bucket *hb; - struct inet_frag_queue *q; - struct hlist_node *n; - - hb = &f->hash[i]; - spin_lock(&hb->chain_lock); - - hlist_for_each_entry_safe(q, n, &hb->chain, list) { - unsigned int hval = inet_frag_hashfn(f, q); - - if (hval != i) { - struct inet_frag_bucket *hb_dest; - - hlist_del(&q->list); - - /* Relink to new hash chain. */ - hb_dest = &f->hash[hval]; - - /* This is the only place where we take - * another chain_lock while already holding - * one. As this will not run concurrently, - * we cannot deadlock on hb_dest lock below, if its - * already locked it will be released soon since - * other caller cannot be waiting for hb lock - * that we've taken above. - */ - spin_lock_nested(&hb_dest->chain_lock, - SINGLE_DEPTH_NESTING); - hlist_add_head(&q->list, &hb_dest->chain); - spin_unlock(&hb_dest->chain_lock); - } - } - spin_unlock(&hb->chain_lock); - } - - f->rebuild = false; - f->last_rebuild_jiffies = jiffies; -out: - write_sequnlock_bh(&f->rnd_seqlock); -} - -static bool inet_fragq_should_evict(const struct inet_frag_queue *q) -{ - if (!hlist_unhashed(&q->list_evictor)) - return false; - - return q->net->low_thresh == 0 || - frag_mem_limit(q->net) >= q->net->low_thresh; -} - -static unsigned int -inet_evict_bucket(struct inet_frags *f, struct inet_frag_bucket *hb) -{ - struct inet_frag_queue *fq; - struct hlist_node *n; - unsigned int evicted = 0; - HLIST_HEAD(expired); - - spin_lock(&hb->chain_lock); - - hlist_for_each_entry_safe(fq, n, &hb->chain, list) { - if (!inet_fragq_should_evict(fq)) - continue; - - if (!del_timer(&fq->timer)) - continue; - - hlist_add_head(&fq->list_evictor, &expired); - ++evicted; - } - - spin_unlock(&hb->chain_lock); - - hlist_for_each_entry_safe(fq, n, &expired, list_evictor) - f->frag_expire((unsigned long) fq); - - return evicted; -} - -static void inet_frag_worker(struct work_struct *work) -{ - unsigned int budget = INETFRAGS_EVICT_BUCKETS; - unsigned int i, evicted = 0; - struct inet_frags *f; - - f = container_of(work, struct inet_frags, frags_work); - - BUILD_BUG_ON(INETFRAGS_EVICT_BUCKETS >= INETFRAGS_HASHSZ); - - local_bh_disable(); - - for (i = ACCESS_ONCE(f->next_bucket); budget; --budget) { - evicted += inet_evict_bucket(f, &f->hash[i]); - i = (i + 1) & (INETFRAGS_HASHSZ - 1); - if (evicted > INETFRAGS_EVICT_MAX) - break; - } - - f->next_bucket = i; - - local_bh_enable(); - - if (f->rebuild && inet_frag_may_rebuild(f)) - inet_frag_secret_rebuild(f); -} - -static void inet_frag_schedule_worker(struct inet_frags *f) -{ - if (unlikely(!work_pending(&f->frags_work))) - schedule_work(&f->frags_work); -} - int inet_frags_init(struct inet_frags *f) { - int i; - - INIT_WORK(&f->frags_work, inet_frag_worker); - - for (i = 0; i < INETFRAGS_HASHSZ; i++) { - struct inet_frag_bucket *hb = &f->hash[i]; - - spin_lock_init(&hb->chain_lock); - INIT_HLIST_HEAD(&hb->chain); - } - - seqlock_init(&f->rnd_seqlock); - f->last_rebuild_jiffies = 0; f->frags_cachep = kmem_cache_create(f->frags_cache_name, f->qsize, 0, 0, NULL); if (!f->frags_cachep) @@ -214,73 +59,53 @@ EXPORT_SYMBOL(inet_frags_init); void inet_frags_fini(struct inet_frags *f) { - cancel_work_sync(&f->frags_work); + /* We must wait that all inet_frag_destroy_rcu() have completed. */ + rcu_barrier(); + kmem_cache_destroy(f->frags_cachep); + f->frags_cachep = NULL; } EXPORT_SYMBOL(inet_frags_fini); -void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f) +static void inet_frags_free_cb(void *ptr, void *arg) { - unsigned int seq; - int i; + struct inet_frag_queue *fq = ptr; - nf->low_thresh = 0; + /* If we can not cancel the timer, it means this frag_queue + * is already disappearing, we have nothing to do. + * Otherwise, we own a refcount until the end of this function. + */ + if (!del_timer(&fq->timer)) + return; -evict_again: - local_bh_disable(); - seq = read_seqbegin(&f->rnd_seqlock); + spin_lock_bh(&fq->lock); + if (!(fq->flags & INET_FRAG_COMPLETE)) { + fq->flags |= INET_FRAG_COMPLETE; + atomic_dec(&fq->refcnt); + } + spin_unlock_bh(&fq->lock); - for (i = 0; i < INETFRAGS_HASHSZ ; i++) - inet_evict_bucket(f, &f->hash[i]); + inet_frag_put(fq); +} - local_bh_enable(); - cond_resched(); +void inet_frags_exit_net(struct netns_frags *nf) +{ + nf->high_thresh = 0; /* prevent creation of new frags */ - if (read_seqretry(&f->rnd_seqlock, seq) || - sum_frag_mem_limit(nf)) - goto evict_again; + rhashtable_free_and_destroy(&nf->rhashtable, inet_frags_free_cb, NULL); } EXPORT_SYMBOL(inet_frags_exit_net); -static struct inet_frag_bucket * -get_frag_bucket_locked(struct inet_frag_queue *fq, struct inet_frags *f) -__acquires(hb->chain_lock) -{ - struct inet_frag_bucket *hb; - unsigned int seq, hash; - - restart: - seq = read_seqbegin(&f->rnd_seqlock); - - hash = inet_frag_hashfn(f, fq); - hb = &f->hash[hash]; - - spin_lock(&hb->chain_lock); - if (read_seqretry(&f->rnd_seqlock, seq)) { - spin_unlock(&hb->chain_lock); - goto restart; - } - - return hb; -} - -static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f) -{ - struct inet_frag_bucket *hb; - - hb = get_frag_bucket_locked(fq, f); - hlist_del(&fq->list); - fq->flags |= INET_FRAG_COMPLETE; - spin_unlock(&hb->chain_lock); -} - -void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f) +void inet_frag_kill(struct inet_frag_queue *fq) { if (del_timer(&fq->timer)) atomic_dec(&fq->refcnt); if (!(fq->flags & INET_FRAG_COMPLETE)) { - fq_unlink(fq, f); + struct netns_frags *nf = fq->net; + + fq->flags |= INET_FRAG_COMPLETE; + rhashtable_remove_fast(&nf->rhashtable, &fq->node, nf->f->rhash_params); atomic_dec(&fq->refcnt); } } @@ -294,11 +119,23 @@ static inline void frag_kfree_skb(struct netns_frags *nf, struct inet_frags *f, kfree_skb(skb); } -void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f) +static void inet_frag_destroy_rcu(struct rcu_head *head) +{ + struct inet_frag_queue *q = container_of(head, struct inet_frag_queue, + rcu); + struct inet_frags *f = q->net->f; + + if (f->destructor) + f->destructor(q); + kmem_cache_free(f->frags_cachep, q); +} + +void inet_frag_destroy(struct inet_frag_queue *q) { struct sk_buff *fp; struct netns_frags *nf; unsigned int sum, sum_truesize = 0; + struct inet_frags *f; WARN_ON(!(q->flags & INET_FRAG_COMPLETE)); WARN_ON(del_timer(&q->timer) != 0); @@ -306,64 +143,35 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f) /* Release all fragment data. */ fp = q->fragments; nf = q->net; - while (fp) { - struct sk_buff *xp = fp->next; + f = nf->f; + if (fp) { + do { + struct sk_buff *xp = fp->next; - sum_truesize += fp->truesize; - frag_kfree_skb(nf, f, fp); - fp = xp; + sum_truesize += fp->truesize; + frag_kfree_skb(nf, f, fp); + fp = xp; + } while (fp); + } else { + sum_truesize = inet_frag_rbtree_purge(&q->rb_fragments); } sum = sum_truesize + f->qsize; - if (f->destructor) - f->destructor(q); - kmem_cache_free(f->frags_cachep, q); + call_rcu(&q->rcu, inet_frag_destroy_rcu); sub_frag_mem_limit(nf, sum); } EXPORT_SYMBOL(inet_frag_destroy); -static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf, - struct inet_frag_queue *qp_in, - struct inet_frags *f, - void *arg) -{ - struct inet_frag_bucket *hb = get_frag_bucket_locked(qp_in, f); - struct inet_frag_queue *qp; - -#ifdef CONFIG_SMP - /* With SMP race we have to recheck hash table, because - * such entry could have been created on other cpu before - * we acquired hash bucket lock. - */ - hlist_for_each_entry(qp, &hb->chain, list) { - if (qp->net == nf && f->match(qp, arg)) { - atomic_inc(&qp->refcnt); - spin_unlock(&hb->chain_lock); - qp_in->flags |= INET_FRAG_COMPLETE; - inet_frag_put(qp_in, f); - return qp; - } - } -#endif - qp = qp_in; - if (!mod_timer(&qp->timer, jiffies + nf->timeout)) - atomic_inc(&qp->refcnt); - - atomic_inc(&qp->refcnt); - hlist_add_head(&qp->list, &hb->chain); - - spin_unlock(&hb->chain_lock); - - return qp; -} - static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf, struct inet_frags *f, void *arg) { struct inet_frag_queue *q; + if (!nf->high_thresh || frag_mem_limit(nf) > nf->high_thresh) + return NULL; + q = kmem_cache_zalloc(f->frags_cachep, GFP_ATOMIC); if (!q) return NULL; @@ -374,75 +182,52 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf, setup_timer(&q->timer, f->frag_expire, (unsigned long)q); spin_lock_init(&q->lock); - atomic_set(&q->refcnt, 1); + atomic_set(&q->refcnt, 3); return q; } static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf, - struct inet_frags *f, - void *arg) + void *arg, + struct inet_frag_queue **prev) { + struct inet_frags *f = nf->f; struct inet_frag_queue *q; q = inet_frag_alloc(nf, f, arg); - if (!q) + if (!q) { + *prev = ERR_PTR(-ENOMEM); return NULL; + } + mod_timer(&q->timer, jiffies + nf->timeout); - return inet_frag_intern(nf, q, f, arg); + *prev = rhashtable_lookup_get_insert_key(&nf->rhashtable, &q->key, + &q->node, f->rhash_params); + if (*prev) { + q->flags |= INET_FRAG_COMPLETE; + inet_frag_kill(q); + inet_frag_destroy(q); + return NULL; + } + return q; } +EXPORT_SYMBOL(inet_frag_create); -struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, - struct inet_frags *f, void *key, - unsigned int hash) +/* TODO : call from rcu_read_lock() and no longer use refcount_inc_not_zero() */ +struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key) { - struct inet_frag_bucket *hb; - struct inet_frag_queue *q; - int depth = 0; + struct inet_frag_queue *fq = NULL, *prev; - if (!nf->high_thresh || frag_mem_limit(nf) > nf->high_thresh) { - inet_frag_schedule_worker(f); - return NULL; + rcu_read_lock(); + prev = rhashtable_lookup(&nf->rhashtable, key, nf->f->rhash_params); + if (!prev) + fq = inet_frag_create(nf, key, &prev); + if (prev && !IS_ERR(prev)) { + fq = prev; + if (!atomic_inc_not_zero(&fq->refcnt)) + fq = NULL; } - - if (frag_mem_limit(nf) > nf->low_thresh) - inet_frag_schedule_worker(f); - - hash &= (INETFRAGS_HASHSZ - 1); - hb = &f->hash[hash]; - - spin_lock(&hb->chain_lock); - hlist_for_each_entry(q, &hb->chain, list) { - if (q->net == nf && f->match(q, key)) { - atomic_inc(&q->refcnt); - spin_unlock(&hb->chain_lock); - return q; - } - depth++; - } - spin_unlock(&hb->chain_lock); - - if (depth <= INETFRAGS_MAXDEPTH) - return inet_frag_create(nf, f, key); - - if (inet_frag_may_rebuild(f)) { - if (!f->rebuild) - f->rebuild = true; - inet_frag_schedule_worker(f); - } - - return ERR_PTR(-ENOBUFS); + rcu_read_unlock(); + return fq; } EXPORT_SYMBOL(inet_frag_find); - -void inet_frag_maybe_warn_overflow(struct inet_frag_queue *q, - const char *prefix) -{ - static const char msg[] = "inet_frag_find: Fragment hash bucket" - " list length grew over limit " __stringify(INETFRAGS_MAXDEPTH) - ". Dropping fragment.\n"; - - if (PTR_ERR(q) == -ENOBUFS) - net_dbg_ratelimited("%s%s", prefix, msg); -} -EXPORT_SYMBOL(inet_frag_maybe_warn_overflow); diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index 86fa45809540..0c5862914f05 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -448,6 +448,7 @@ relookup: atomic_set(&p->rid, 0); p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW; p->rate_tokens = 0; + p->n_redirects = 0; /* 60*HZ is arbitrary, but chosen enough high so that the first * calculation of tokens is at its maximum. */ diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 72915658a6b1..9b09a9b5a4fe 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -58,27 +58,64 @@ static int sysctl_ipfrag_max_dist __read_mostly = 64; static const char ip_frag_cache_name[] = "ip4-frags"; -struct ipfrag_skb_cb -{ +/* Use skb->cb to track consecutive/adjacent fragments coming at + * the end of the queue. Nodes in the rb-tree queue will + * contain "runs" of one or more adjacent fragments. + * + * Invariants: + * - next_frag is NULL at the tail of a "run"; + * - the head of a "run" has the sum of all fragment lengths in frag_run_len. + */ +struct ipfrag_skb_cb { struct inet_skb_parm h; - int offset; + struct sk_buff *next_frag; + int frag_run_len; }; -#define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb)) +#define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb)) + +static void ip4_frag_init_run(struct sk_buff *skb) +{ + BUILD_BUG_ON(sizeof(struct ipfrag_skb_cb) > sizeof(skb->cb)); + + FRAG_CB(skb)->next_frag = NULL; + FRAG_CB(skb)->frag_run_len = skb->len; +} + +/* Append skb to the last "run". */ +static void ip4_frag_append_to_last_run(struct inet_frag_queue *q, + struct sk_buff *skb) +{ + RB_CLEAR_NODE(&skb->rbnode); + FRAG_CB(skb)->next_frag = NULL; + + FRAG_CB(q->last_run_head)->frag_run_len += skb->len; + FRAG_CB(q->fragments_tail)->next_frag = skb; + q->fragments_tail = skb; +} + +/* Create a new "run" with the skb. */ +static void ip4_frag_create_run(struct inet_frag_queue *q, struct sk_buff *skb) +{ + if (q->last_run_head) + rb_link_node(&skb->rbnode, &q->last_run_head->rbnode, + &q->last_run_head->rbnode.rb_right); + else + rb_link_node(&skb->rbnode, NULL, &q->rb_fragments.rb_node); + rb_insert_color(&skb->rbnode, &q->rb_fragments); + + ip4_frag_init_run(skb); + q->fragments_tail = skb; + q->last_run_head = skb; +} /* Describe an entry in the "incomplete datagrams" queue. */ struct ipq { struct inet_frag_queue q; - u32 user; - __be32 saddr; - __be32 daddr; - __be16 id; - u8 protocol; u8 ecn; /* RFC3168 support */ u16 max_df_size; /* largest frag with DF set seen */ int iif; - int vif; /* L3 master device index */ unsigned int rid; struct inet_peer *peer; }; @@ -90,49 +127,9 @@ static u8 ip4_frag_ecn(u8 tos) static struct inet_frags ip4_frags; -int ip_frag_mem(struct net *net) -{ - return sum_frag_mem_limit(&net->ipv4.frags); -} +static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb, + struct sk_buff *prev_tail, struct net_device *dev); -static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, - struct net_device *dev); - -struct ip4_create_arg { - struct iphdr *iph; - u32 user; - int vif; -}; - -static unsigned int ipqhashfn(__be16 id, __be32 saddr, __be32 daddr, u8 prot) -{ - net_get_random_once(&ip4_frags.rnd, sizeof(ip4_frags.rnd)); - return jhash_3words((__force u32)id << 16 | prot, - (__force u32)saddr, (__force u32)daddr, - ip4_frags.rnd); -} - -static unsigned int ip4_hashfn(const struct inet_frag_queue *q) -{ - const struct ipq *ipq; - - ipq = container_of(q, struct ipq, q); - return ipqhashfn(ipq->id, ipq->saddr, ipq->daddr, ipq->protocol); -} - -static bool ip4_frag_match(const struct inet_frag_queue *q, const void *a) -{ - const struct ipq *qp; - const struct ip4_create_arg *arg = a; - - qp = container_of(q, struct ipq, q); - return qp->id == arg->iph->id && - qp->saddr == arg->iph->saddr && - qp->daddr == arg->iph->daddr && - qp->protocol == arg->iph->protocol && - qp->user == arg->user && - qp->vif == arg->vif; -} static void ip4_frag_init(struct inet_frag_queue *q, const void *a) { @@ -141,17 +138,12 @@ static void ip4_frag_init(struct inet_frag_queue *q, const void *a) frags); struct net *net = container_of(ipv4, struct net, ipv4); - const struct ip4_create_arg *arg = a; + const struct frag_v4_compare_key *key = a; - qp->protocol = arg->iph->protocol; - qp->id = arg->iph->id; - qp->ecn = ip4_frag_ecn(arg->iph->tos); - qp->saddr = arg->iph->saddr; - qp->daddr = arg->iph->daddr; - qp->vif = arg->vif; - qp->user = arg->user; + q->key.v4 = *key; + qp->ecn = 0; qp->peer = sysctl_ipfrag_max_dist ? - inet_getpeer_v4(net->ipv4.peers, arg->iph->saddr, arg->vif, 1) : + inet_getpeer_v4(net->ipv4.peers, key->saddr, key->vif, 1) : NULL; } @@ -169,7 +161,7 @@ static void ip4_frag_free(struct inet_frag_queue *q) static void ipq_put(struct ipq *ipq) { - inet_frag_put(&ipq->q, &ip4_frags); + inet_frag_put(&ipq->q); } /* Kill ipq entry. It is not destroyed immediately, @@ -177,7 +169,7 @@ static void ipq_put(struct ipq *ipq) */ static void ipq_kill(struct ipq *ipq) { - inet_frag_kill(&ipq->q, &ip4_frags); + inet_frag_kill(&ipq->q); } static bool frag_expire_skip_icmp(u32 user) @@ -194,8 +186,11 @@ static bool frag_expire_skip_icmp(u32 user) */ static void ip_expire(unsigned long arg) { - struct ipq *qp; + const struct iphdr *iph; + struct sk_buff *head = NULL; struct net *net; + struct ipq *qp; + int err; qp = container_of((struct inet_frag_queue *) arg, struct ipq, q); net = container_of(qp->q.net, struct net, ipv4.frags); @@ -208,51 +203,65 @@ static void ip_expire(unsigned long arg) ipq_kill(qp); IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS); + IP_INC_STATS_BH(net, IPSTATS_MIB_REASMTIMEOUT); - if (!inet_frag_evicting(&qp->q)) { - struct sk_buff *clone, *head = qp->q.fragments; - const struct iphdr *iph; - int err; + if (!(qp->q.flags & INET_FRAG_FIRST_IN)) + goto out; - IP_INC_STATS_BH(net, IPSTATS_MIB_REASMTIMEOUT); - - if (!(qp->q.flags & INET_FRAG_FIRST_IN) || !qp->q.fragments) + /* sk_buff::dev and sk_buff::rbnode are unionized. So we + * pull the head out of the tree in order to be able to + * deal with head->dev. + */ + if (qp->q.fragments) { + head = qp->q.fragments; + qp->q.fragments = head->next; + } else { + head = skb_rb_first(&qp->q.rb_fragments); + if (!head) goto out; - - head->dev = dev_get_by_index_rcu(net, qp->iif); - if (!head->dev) - goto out; - - - /* skb has no dst, perform route lookup again */ - iph = ip_hdr(head); - err = ip_route_input_noref(head, iph->daddr, iph->saddr, - iph->tos, head->dev); - if (err) - goto out; - - /* Only an end host needs to send an ICMP - * "Fragment Reassembly Timeout" message, per RFC792. - */ - if (frag_expire_skip_icmp(qp->user) && - (skb_rtable(head)->rt_type != RTN_LOCAL)) - goto out; - - clone = skb_clone(head, GFP_ATOMIC); - - /* Send an ICMP "Fragment Reassembly Timeout" message. */ - if (clone) { - spin_unlock(&qp->q.lock); - icmp_send(clone, ICMP_TIME_EXCEEDED, - ICMP_EXC_FRAGTIME, 0); - consume_skb(clone); - goto out_rcu_unlock; - } + if (FRAG_CB(head)->next_frag) + rb_replace_node(&head->rbnode, + &FRAG_CB(head)->next_frag->rbnode, + &qp->q.rb_fragments); + else + rb_erase(&head->rbnode, &qp->q.rb_fragments); + memset(&head->rbnode, 0, sizeof(head->rbnode)); + barrier(); } + if (head == qp->q.fragments_tail) + qp->q.fragments_tail = NULL; + + sub_frag_mem_limit(qp->q.net, head->truesize); + + head->dev = dev_get_by_index_rcu(net, qp->iif); + if (!head->dev) + goto out; + + + /* skb has no dst, perform route lookup again */ + iph = ip_hdr(head); + err = ip_route_input_noref(head, iph->daddr, iph->saddr, + iph->tos, head->dev); + if (err) + goto out; + + /* Only an end host needs to send an ICMP + * "Fragment Reassembly Timeout" message, per RFC792. + */ + if (frag_expire_skip_icmp(qp->q.key.v4.user) && + (skb_rtable(head)->rt_type != RTN_LOCAL)) + goto out; + + spin_unlock(&qp->q.lock); + icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0); + goto out_rcu_unlock; + out: spin_unlock(&qp->q.lock); out_rcu_unlock: rcu_read_unlock(); + if (head) + kfree_skb(head); ipq_put(qp); } @@ -262,21 +271,20 @@ out_rcu_unlock: static struct ipq *ip_find(struct net *net, struct iphdr *iph, u32 user, int vif) { + struct frag_v4_compare_key key = { + .saddr = iph->saddr, + .daddr = iph->daddr, + .user = user, + .vif = vif, + .id = iph->id, + .protocol = iph->protocol, + }; struct inet_frag_queue *q; - struct ip4_create_arg arg; - unsigned int hash; - arg.iph = iph; - arg.user = user; - arg.vif = vif; - - hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol); - - q = inet_frag_find(&net->ipv4.frags, &ip4_frags, &arg, hash); - if (IS_ERR_OR_NULL(q)) { - inet_frag_maybe_warn_overflow(q, pr_fmt()); + q = inet_frag_find(&net->ipv4.frags, &key); + if (!q) return NULL; - } + return container_of(q, struct ipq, q); } @@ -296,7 +304,7 @@ static int ip_frag_too_far(struct ipq *qp) end = atomic_inc_return(&peer->rid); qp->rid = end; - rc = qp->q.fragments && (end - start) > max; + rc = qp->q.fragments_tail && (end - start) > max; if (rc) { struct net *net; @@ -310,7 +318,6 @@ static int ip_frag_too_far(struct ipq *qp) static int ip_frag_reinit(struct ipq *qp) { - struct sk_buff *fp; unsigned int sum_truesize = 0; if (!mod_timer(&qp->q.timer, jiffies + qp->q.net->timeout)) { @@ -318,21 +325,16 @@ static int ip_frag_reinit(struct ipq *qp) return -ETIMEDOUT; } - fp = qp->q.fragments; - do { - struct sk_buff *xp = fp->next; - - sum_truesize += fp->truesize; - kfree_skb(fp); - fp = xp; - } while (fp); + sum_truesize = inet_frag_rbtree_purge(&qp->q.rb_fragments); sub_frag_mem_limit(qp->q.net, sum_truesize); qp->q.flags = 0; qp->q.len = 0; qp->q.meat = 0; qp->q.fragments = NULL; + qp->q.rb_fragments = RB_ROOT; qp->q.fragments_tail = NULL; + qp->q.last_run_head = NULL; qp->iif = 0; qp->ecn = 0; @@ -342,11 +344,13 @@ static int ip_frag_reinit(struct ipq *qp) /* Add new segment to existing queue. */ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) { - struct sk_buff *prev, *next; + struct net *net = container_of(qp->q.net, struct net, ipv4.frags); + struct rb_node **rbn, *parent; + struct sk_buff *skb1, *prev_tail; + int ihl, end, skb1_run_end; struct net_device *dev; unsigned int fragsize; int flags, offset; - int ihl, end; int err = -ENOENT; u8 ecn; @@ -405,94 +409,68 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) if (err) goto err; - /* Find out which fragments are in front and at the back of us - * in the chain of fragments so far. We must know where to put - * this fragment, right? - */ - prev = qp->q.fragments_tail; - if (!prev || FRAG_CB(prev)->offset < offset) { - next = NULL; - goto found; - } - prev = NULL; - for (next = qp->q.fragments; next != NULL; next = next->next) { - if (FRAG_CB(next)->offset >= offset) - break; /* bingo! */ - prev = next; - } - -found: - /* We found where to put this one. Check for overlap with - * preceding fragment, and, if needed, align things so that - * any overlaps are eliminated. - */ - if (prev) { - int i = (FRAG_CB(prev)->offset + prev->len) - offset; - - if (i > 0) { - offset += i; - err = -EINVAL; - if (end <= offset) - goto err; - err = -ENOMEM; - if (!pskb_pull(skb, i)) - goto err; - if (skb->ip_summed != CHECKSUM_UNNECESSARY) - skb->ip_summed = CHECKSUM_NONE; - } - } - - err = -ENOMEM; - - while (next && FRAG_CB(next)->offset < end) { - int i = end - FRAG_CB(next)->offset; /* overlap is 'i' bytes */ - - if (i < next->len) { - /* Eat head of the next overlapped fragment - * and leave the loop. The next ones cannot overlap. - */ - if (!pskb_pull(next, i)) - goto err; - FRAG_CB(next)->offset += i; - qp->q.meat -= i; - if (next->ip_summed != CHECKSUM_UNNECESSARY) - next->ip_summed = CHECKSUM_NONE; - break; - } else { - struct sk_buff *free_it = next; - - /* Old fragment is completely overridden with - * new one drop it. - */ - next = next->next; - - if (prev) - prev->next = next; - else - qp->q.fragments = next; - - qp->q.meat -= free_it->len; - sub_frag_mem_limit(qp->q.net, free_it->truesize); - kfree_skb(free_it); - } - } - - FRAG_CB(skb)->offset = offset; - - /* Insert this fragment in the chain of fragments. */ - skb->next = next; - if (!next) - qp->q.fragments_tail = skb; - if (prev) - prev->next = skb; - else - qp->q.fragments = skb; - + /* Note : skb->rbnode and skb->dev share the same location. */ dev = skb->dev; - if (dev) { - qp->iif = dev->ifindex; - skb->dev = NULL; + /* Makes sure compiler wont do silly aliasing games */ + barrier(); + + /* RFC5722, Section 4, amended by Errata ID : 3089 + * When reassembling an IPv6 datagram, if + * one or more its constituent fragments is determined to be an + * overlapping fragment, the entire datagram (and any constituent + * fragments) MUST be silently discarded. + * + * We do the same here for IPv4 (and increment an snmp counter) but + * we do not want to drop the whole queue in response to a duplicate + * fragment. + */ + + err = -EINVAL; + /* Find out where to put this fragment. */ + prev_tail = qp->q.fragments_tail; + if (!prev_tail) + ip4_frag_create_run(&qp->q, skb); /* First fragment. */ + else if (prev_tail->ip_defrag_offset + prev_tail->len < end) { + /* This is the common case: skb goes to the end. */ + /* Detect and discard overlaps. */ + if (offset < prev_tail->ip_defrag_offset + prev_tail->len) + goto discard_qp; + if (offset == prev_tail->ip_defrag_offset + prev_tail->len) + ip4_frag_append_to_last_run(&qp->q, skb); + else + ip4_frag_create_run(&qp->q, skb); + } else { + /* Binary search. Note that skb can become the first fragment, + * but not the last (covered above). + */ + rbn = &qp->q.rb_fragments.rb_node; + do { + parent = *rbn; + skb1 = rb_to_skb(parent); + skb1_run_end = skb1->ip_defrag_offset + + FRAG_CB(skb1)->frag_run_len; + if (end <= skb1->ip_defrag_offset) + rbn = &parent->rb_left; + else if (offset >= skb1_run_end) + rbn = &parent->rb_right; + else if (offset >= skb1->ip_defrag_offset && + end <= skb1_run_end) + goto err; /* No new data, potential duplicate */ + else + goto discard_qp; /* Found an overlap */ + } while (*rbn); + /* Here we have parent properly set, and rbn pointing to + * one of its NULL left/right children. Insert skb. + */ + ip4_frag_init_run(skb); + rb_link_node(&skb->rbnode, parent, rbn); + rb_insert_color(&skb->rbnode, &qp->q.rb_fragments); } + + if (dev) + qp->iif = dev->ifindex; + skb->ip_defrag_offset = offset; + qp->q.stamp = skb->tstamp; qp->q.meat += skb->len; qp->ecn |= ecn; @@ -514,7 +492,7 @@ found: unsigned long orefdst = skb->_skb_refdst; skb->_skb_refdst = 0UL; - err = ip_frag_reasm(qp, prev, dev); + err = ip_frag_reasm(qp, skb, prev_tail, dev); skb->_skb_refdst = orefdst; return err; } @@ -522,20 +500,23 @@ found: skb_dst_drop(skb); return -EINPROGRESS; +discard_qp: + inet_frag_kill(&qp->q); + IP_INC_STATS_BH(net, IPSTATS_MIB_REASM_OVERLAPS); err: kfree_skb(skb); return err; } - /* Build a new IP datagram from all its fragments. */ - -static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, - struct net_device *dev) +static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb, + struct sk_buff *prev_tail, struct net_device *dev) { struct net *net = container_of(qp->q.net, struct net, ipv4.frags); struct iphdr *iph; - struct sk_buff *fp, *head = qp->q.fragments; + struct sk_buff *fp, *head = skb_rb_first(&qp->q.rb_fragments); + struct sk_buff **nextp; /* To build frag_list. */ + struct rb_node *rbn; int len; int ihlen; int err; @@ -549,26 +530,27 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, goto out_fail; } /* Make the one we just received the head. */ - if (prev) { - head = prev->next; - fp = skb_clone(head, GFP_ATOMIC); + if (head != skb) { + fp = skb_clone(skb, GFP_ATOMIC); if (!fp) goto out_nomem; - - fp->next = head->next; - if (!fp->next) + FRAG_CB(fp)->next_frag = FRAG_CB(skb)->next_frag; + if (RB_EMPTY_NODE(&skb->rbnode)) + FRAG_CB(prev_tail)->next_frag = fp; + else + rb_replace_node(&skb->rbnode, &fp->rbnode, + &qp->q.rb_fragments); + if (qp->q.fragments_tail == skb) qp->q.fragments_tail = fp; - prev->next = fp; - - skb_morph(head, qp->q.fragments); - head->next = qp->q.fragments->next; - - consume_skb(qp->q.fragments); - qp->q.fragments = head; + skb_morph(skb, head); + FRAG_CB(skb)->next_frag = FRAG_CB(head)->next_frag; + rb_replace_node(&head->rbnode, &skb->rbnode, + &qp->q.rb_fragments); + consume_skb(head); + head = skb; } - WARN_ON(!head); - WARN_ON(FRAG_CB(head)->offset != 0); + WARN_ON(head->ip_defrag_offset != 0); /* Allocate a new buffer for the datagram. */ ihlen = ip_hdrlen(head); @@ -592,35 +574,61 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, clone = alloc_skb(0, GFP_ATOMIC); if (!clone) goto out_nomem; - clone->next = head->next; - head->next = clone; skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list; skb_frag_list_init(head); for (i = 0; i < skb_shinfo(head)->nr_frags; i++) plen += skb_frag_size(&skb_shinfo(head)->frags[i]); clone->len = clone->data_len = head->data_len - plen; - head->data_len -= clone->len; - head->len -= clone->len; + head->truesize += clone->truesize; clone->csum = 0; clone->ip_summed = head->ip_summed; add_frag_mem_limit(qp->q.net, clone->truesize); + skb_shinfo(head)->frag_list = clone; + nextp = &clone->next; + } else { + nextp = &skb_shinfo(head)->frag_list; } - skb_shinfo(head)->frag_list = head->next; skb_push(head, head->data - skb_network_header(head)); - for (fp=head->next; fp; fp = fp->next) { - head->data_len += fp->len; - head->len += fp->len; - if (head->ip_summed != fp->ip_summed) - head->ip_summed = CHECKSUM_NONE; - else if (head->ip_summed == CHECKSUM_COMPLETE) - head->csum = csum_add(head->csum, fp->csum); - head->truesize += fp->truesize; + /* Traverse the tree in order, to build frag_list. */ + fp = FRAG_CB(head)->next_frag; + rbn = rb_next(&head->rbnode); + rb_erase(&head->rbnode, &qp->q.rb_fragments); + while (rbn || fp) { + /* fp points to the next sk_buff in the current run; + * rbn points to the next run. + */ + /* Go through the current run. */ + while (fp) { + *nextp = fp; + nextp = &fp->next; + fp->prev = NULL; + memset(&fp->rbnode, 0, sizeof(fp->rbnode)); + fp->sk = NULL; + head->data_len += fp->len; + head->len += fp->len; + if (head->ip_summed != fp->ip_summed) + head->ip_summed = CHECKSUM_NONE; + else if (head->ip_summed == CHECKSUM_COMPLETE) + head->csum = csum_add(head->csum, fp->csum); + head->truesize += fp->truesize; + fp = FRAG_CB(fp)->next_frag; + } + /* Move to the next run. */ + if (rbn) { + struct rb_node *rbnext = rb_next(rbn); + + fp = rb_to_skb(rbn); + rb_erase(rbn, &qp->q.rb_fragments); + rbn = rbnext; + } } sub_frag_mem_limit(qp->q.net, head->truesize); + *nextp = NULL; head->next = NULL; + head->prev = NULL; head->dev = dev; head->tstamp = qp->q.stamp; IPCB(head)->frag_max_size = max(qp->max_df_size, qp->q.max_size); @@ -648,7 +656,9 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS); qp->q.fragments = NULL; + qp->q.rb_fragments = RB_ROOT; qp->q.fragments_tail = NULL; + qp->q.last_run_head = NULL; return 0; out_nomem: @@ -656,7 +666,7 @@ out_nomem: err = -ENOMEM; goto out_fail; out_oversize: - net_info_ratelimited("Oversized IP packet from %pI4\n", &qp->saddr); + net_info_ratelimited("Oversized IP packet from %pI4\n", &qp->q.key.v4.saddr); out_fail: IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS); return err; @@ -734,25 +744,46 @@ struct sk_buff *ip_check_defrag(struct net *net, struct sk_buff *skb, u32 user) } EXPORT_SYMBOL(ip_check_defrag); +unsigned int inet_frag_rbtree_purge(struct rb_root *root) +{ + struct rb_node *p = rb_first(root); + unsigned int sum = 0; + + while (p) { + struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode); + + p = rb_next(p); + rb_erase(&skb->rbnode, root); + while (skb) { + struct sk_buff *next = FRAG_CB(skb)->next_frag; + + sum += skb->truesize; + kfree_skb(skb); + skb = next; + } + } + return sum; +} +EXPORT_SYMBOL(inet_frag_rbtree_purge); + #ifdef CONFIG_SYSCTL -static int zero; +static int dist_min; static struct ctl_table ip4_frags_ns_ctl_table[] = { { .procname = "ipfrag_high_thresh", .data = &init_net.ipv4.frags.high_thresh, - .maxlen = sizeof(int), + .maxlen = sizeof(unsigned long), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_doulongvec_minmax, .extra1 = &init_net.ipv4.frags.low_thresh }, { .procname = "ipfrag_low_thresh", .data = &init_net.ipv4.frags.low_thresh, - .maxlen = sizeof(int), + .maxlen = sizeof(unsigned long), .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, + .proc_handler = proc_doulongvec_minmax, .extra2 = &init_net.ipv4.frags.high_thresh }, { @@ -781,7 +812,7 @@ static struct ctl_table ip4_frags_ctl_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero + .extra1 = &dist_min, }, { } }; @@ -853,6 +884,8 @@ static void __init ip4_frags_ctl_register(void) static int __net_init ipv4_frags_init_net(struct net *net) { + int res; + /* Fragment cache limits. * * The fragment memory accounting code, (tries to) account for @@ -876,15 +909,21 @@ static int __net_init ipv4_frags_init_net(struct net *net) */ net->ipv4.frags.timeout = IP_FRAG_TIME; - inet_frags_init_net(&net->ipv4.frags); + net->ipv4.frags.f = &ip4_frags; - return ip4_frags_ns_ctl_register(net); + res = inet_frags_init_net(&net->ipv4.frags); + if (res < 0) + return res; + res = ip4_frags_ns_ctl_register(net); + if (res < 0) + inet_frags_exit_net(&net->ipv4.frags); + return res; } static void __net_exit ipv4_frags_exit_net(struct net *net) { ip4_frags_ns_ctl_unregister(net); - inet_frags_exit_net(&net->ipv4.frags, &ip4_frags); + inet_frags_exit_net(&net->ipv4.frags); } static struct pernet_operations ip4_frags_ops = { @@ -892,18 +931,50 @@ static struct pernet_operations ip4_frags_ops = { .exit = ipv4_frags_exit_net, }; + +static u32 ip4_key_hashfn(const void *data, u32 len, u32 seed) +{ + return jhash2(data, + sizeof(struct frag_v4_compare_key) / sizeof(u32), seed); +} + +static u32 ip4_obj_hashfn(const void *data, u32 len, u32 seed) +{ + const struct inet_frag_queue *fq = data; + + return jhash2((const u32 *)&fq->key.v4, + sizeof(struct frag_v4_compare_key) / sizeof(u32), seed); +} + +static int ip4_obj_cmpfn(struct rhashtable_compare_arg *arg, const void *ptr) +{ + const struct frag_v4_compare_key *key = arg->key; + const struct inet_frag_queue *fq = ptr; + + return !!memcmp(&fq->key, key, sizeof(*key)); +} + +static const struct rhashtable_params ip4_rhash_params = { + .head_offset = offsetof(struct inet_frag_queue, node), + .key_offset = offsetof(struct inet_frag_queue, key), + .key_len = sizeof(struct frag_v4_compare_key), + .hashfn = ip4_key_hashfn, + .obj_hashfn = ip4_obj_hashfn, + .obj_cmpfn = ip4_obj_cmpfn, + .automatic_shrinking = true, +}; + void __init ipfrag_init(void) { - ip4_frags_ctl_register(); - register_pernet_subsys(&ip4_frags_ops); - ip4_frags.hashfn = ip4_hashfn; ip4_frags.constructor = ip4_frag_init; ip4_frags.destructor = ip4_frag_free; ip4_frags.skb_free = NULL; ip4_frags.qsize = sizeof(struct ipq); - ip4_frags.match = ip4_frag_match; ip4_frags.frag_expire = ip_expire; ip4_frags.frags_cache_name = ip_frag_cache_name; + ip4_frags.rhash_params = ip4_rhash_params; if (inet_frags_init(&ip4_frags)) panic("IP: failed to allocate ip4_frags cache\n"); + ip4_frags_ctl_register(); + register_pernet_subsys(&ip4_frags_ops); } diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index b1209b63381f..eb1834f2682f 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -444,6 +444,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, goto drop; } + iph = ip_hdr(skb); skb->transport_header = skb->network_header + iph->ihl*4; /* Remove any debris in the socket control block */ diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index bd246792360b..d3922a93e4c1 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -254,8 +254,9 @@ static void spec_dst_fill(__be32 *spec_dst, struct sk_buff *skb) * If opt == NULL, then skb->data should point to IP header. */ -int ip_options_compile(struct net *net, - struct ip_options *opt, struct sk_buff *skb) +int __ip_options_compile(struct net *net, + struct ip_options *opt, struct sk_buff *skb, + __be32 *info) { __be32 spec_dst = htonl(INADDR_ANY); unsigned char *pp_ptr = NULL; @@ -472,11 +473,22 @@ eol: return 0; error: - if (skb) { - icmp_send(skb, ICMP_PARAMETERPROB, 0, htonl((pp_ptr-iph)<<24)); - } + if (info) + *info = htonl((pp_ptr-iph)<<24); return -EINVAL; } + +int ip_options_compile(struct net *net, + struct ip_options *opt, struct sk_buff *skb) +{ + int ret; + __be32 info; + + ret = __ip_options_compile(net, opt, skb, &info); + if (ret != 0 && skb) + icmp_send(skb, ICMP_PARAMETERPROB, 0, info); + return ret; +} EXPORT_SYMBOL(ip_options_compile); /* diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 3f8caf7d19b8..1ea36bf778e6 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -133,19 +133,17 @@ static void ip_cmsg_recv_security(struct msghdr *msg, struct sk_buff *skb) static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb) { + __be16 _ports[2], *ports; struct sockaddr_in sin; - __be16 *ports; - int end; - - end = skb_transport_offset(skb) + 4; - if (end > 0 && !pskb_may_pull(skb, end)) - return; /* All current transport protocols have the port numbers in the * first four bytes of the transport header and this function is * written with this assumption in mind. */ - ports = (__be16 *)skb_transport_header(skb); + ports = skb_header_pointer(skb, skb_transport_offset(skb), + sizeof(_ports), &_ports); + if (!ports) + return; sin.sin_family = AF_INET; sin.sin_addr.s_addr = ip_hdr(skb)->daddr; diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 4b7c81f88abf..fcf327ebd134 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -75,6 +75,33 @@ drop: return 0; } +static int vti_input_ipip(struct sk_buff *skb, int nexthdr, __be32 spi, + int encap_type) +{ + struct ip_tunnel *tunnel; + const struct iphdr *iph = ip_hdr(skb); + struct net *net = dev_net(skb->dev); + struct ip_tunnel_net *itn = net_generic(net, vti_net_id); + + tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, + iph->saddr, iph->daddr, 0); + if (tunnel) { + if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) + goto drop; + + XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = tunnel; + + skb->dev = tunnel->dev; + + return xfrm_input(skb, nexthdr, spi, encap_type); + } + + return -EINVAL; +drop: + kfree_skb(skb); + return 0; +} + static int vti_rcv(struct sk_buff *skb) { XFRM_SPI_SKB_CB(skb)->family = AF_INET; @@ -83,6 +110,14 @@ static int vti_rcv(struct sk_buff *skb) return vti_input(skb, ip_hdr(skb)->protocol, 0, 0); } +static int vti_rcv_ipip(struct sk_buff *skb) +{ + XFRM_SPI_SKB_CB(skb)->family = AF_INET; + XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr); + + return vti_input_ipip(skb, ip_hdr(skb)->protocol, ip_hdr(skb)->saddr, 0); +} + static int vti_rcv_cb(struct sk_buff *skb, int err) { unsigned short family; @@ -409,6 +444,12 @@ static struct xfrm4_protocol vti_ipcomp4_protocol __read_mostly = { .priority = 100, }; +static struct xfrm_tunnel ipip_handler __read_mostly = { + .handler = vti_rcv_ipip, + .err_handler = vti4_err, + .priority = 0, +}; + static int __net_init vti_init_net(struct net *net) { int err; @@ -592,6 +633,13 @@ static int __init vti_init(void) if (err < 0) goto xfrm_proto_comp_failed; + msg = "ipip tunnel"; + err = xfrm4_tunnel_register(&ipip_handler, AF_INET); + if (err < 0) { + pr_info("%s: cant't register tunnel\n",__func__); + goto xfrm_tunnel_failed; + } + msg = "netlink interface"; err = rtnl_link_register(&vti_link_ops); if (err < 0) @@ -601,6 +649,8 @@ static int __init vti_init(void) rtnl_link_failed: xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP); +xfrm_tunnel_failed: + xfrm4_tunnel_deregister(&ipip_handler, AF_INET); xfrm_proto_comp_failed: xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH); xfrm_proto_ah_failed: diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 8e77786549c6..1cb865fcc91b 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -66,6 +66,7 @@ #include #include #include +#include #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2) #define CONFIG_IP_PIMSM 1 @@ -1574,6 +1575,7 @@ int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) return -EFAULT; if (vr.vifi >= mrt->maxvif) return -EINVAL; + vr.vifi = array_index_nospec(vr.vifi, mrt->maxvif); read_lock(&mrt_lock); vif = &mrt->vif_table[vr.vifi]; if (VIF_EXISTS(mrt, vr.vifi)) { diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index f51b32ed353c..cbe630aab44a 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -983,6 +983,7 @@ static int get_entries(struct net *net, struct arpt_get_entries __user *uptr, sizeof(struct arpt_get_entries) + get.size); return -EINVAL; } + get.name[sizeof(get.name) - 1] = '\0'; t = xt_find_table_lock(net, NFPROTO_ARP, get.name); if (!IS_ERR_OR_NULL(t)) { @@ -1557,6 +1558,7 @@ static int compat_get_entries(struct net *net, *len, sizeof(get) + get.size); return -EINVAL; } + get.name[sizeof(get.name) - 1] = '\0'; xt_compat_lock(NFPROTO_ARP); t = xt_find_table_lock(net, NFPROTO_ARP, get.name); diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 8adb6e9ba8f5..53d664a7774c 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1171,6 +1171,7 @@ get_entries(struct net *net, struct ipt_get_entries __user *uptr, *len, sizeof(get) + get.size); return -EINVAL; } + get.name[sizeof(get.name) - 1] = '\0'; t = xt_find_table_lock(net, AF_INET, get.name); if (!IS_ERR_OR_NULL(t)) { @@ -1799,6 +1800,7 @@ compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr, *len, sizeof(get) + get.size); return -EINVAL; } + get.name[sizeof(get.name) - 1] = '\0'; xt_compat_lock(AF_INET); t = xt_find_table_lock(net, AF_INET, get.name); diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 3abd9d7a3adf..b001ad668108 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -52,7 +52,6 @@ static int sockstat_seq_show(struct seq_file *seq, void *v) { struct net *net = seq->private; - unsigned int frag_mem; int orphans, sockets; local_bh_disable(); @@ -72,8 +71,9 @@ static int sockstat_seq_show(struct seq_file *seq, void *v) sock_prot_inuse_get(net, &udplite_prot)); seq_printf(seq, "RAW: inuse %d\n", sock_prot_inuse_get(net, &raw_prot)); - frag_mem = ip_frag_mem(net); - seq_printf(seq, "FRAG: inuse %u memory %u\n", !!frag_mem, frag_mem); + seq_printf(seq, "FRAG: inuse %u memory %lu\n", + atomic_read(&net->ipv4.frags.rhashtable.nelems), + frag_mem_limit(&net->ipv4.frags)); return 0; } @@ -132,6 +132,7 @@ static const struct snmp_mib snmp4_ipextstats_list[] = { SNMP_MIB_ITEM("InECT1Pkts", IPSTATS_MIB_ECT1PKTS), SNMP_MIB_ITEM("InECT0Pkts", IPSTATS_MIB_ECT0PKTS), SNMP_MIB_ITEM("InCEPkts", IPSTATS_MIB_CEPKTS), + SNMP_MIB_ITEM("ReasmOverlaps", IPSTATS_MIB_REASM_OVERLAPS), SNMP_MIB_SENTINEL }; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 75406603fa1e..773c71771ffd 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -880,13 +880,15 @@ void ip_rt_send_redirect(struct sk_buff *skb) /* No redirected packets during ip_rt_redirect_silence; * reset the algorithm. */ - if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence)) + if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence)) { peer->rate_tokens = 0; + peer->n_redirects = 0; + } /* Too many ignored redirects; do not send anything * set dst.rate_last to the last seen redirected packet. */ - if (peer->rate_tokens >= ip_rt_redirect_number) { + if (peer->n_redirects >= ip_rt_redirect_number) { peer->rate_last = jiffies; goto out_put_peer; } @@ -903,6 +905,7 @@ void ip_rt_send_redirect(struct sk_buff *skb) icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, gw); peer->rate_last = jiffies; ++peer->rate_tokens; + ++peer->n_redirects; #ifdef CONFIG_IP_ROUTE_VERBOSE if (log_martians && peer->rate_tokens == ip_rt_redirect_number) @@ -1167,9 +1170,23 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) static void ipv4_link_failure(struct sk_buff *skb) { + struct ip_options opt; struct rtable *rt; + int res; - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); + /* Recompile ip options since IPCB may not be valid anymore. + */ + memset(&opt, 0, sizeof(opt)); + opt.optlen = ip_hdr(skb)->ihl*4 - sizeof(struct iphdr); + + rcu_read_lock(); + res = __ip_options_compile(dev_net(skb->dev), &opt, skb, NULL); + rcu_read_unlock(); + + if (res) + return; + + __icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0, &opt); rt = skb_rtable(skb); if (rt) @@ -1607,6 +1624,10 @@ static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr) if (fnhe->fnhe_daddr == daddr) { rcu_assign_pointer(*fnhe_p, rcu_dereference_protected( fnhe->fnhe_next, lockdep_is_held(&fnhe_lock))); + /* set fnhe_daddr to 0 to ensure it won't bind with + * new dsts in rt_bind_exception(). + */ + fnhe->fnhe_daddr = 0; fnhe_flush_routes(fnhe); kfree_rcu(fnhe, rcu); break; diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 57bbcd5b650a..4cebe913a0b3 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -228,7 +228,12 @@ struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb, if (child) { atomic_set(&req->rsk_refcnt, 1); sock_rps_save_rxhash(child, skb); - inet_csk_reqsk_queue_add(sk, req, child); + if (!inet_csk_reqsk_queue_add(sk, req, child)) { + bh_unlock_sock(child); + sock_put(child); + child = NULL; + reqsk_put(req); + } } else { reqsk_free(req); } diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index d3c4a83ad687..e9f8443d3b8a 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2253,7 +2253,6 @@ int tcp_disconnect(struct sock *sk, int flags) tp->write_seq += tp->max_window + 2; if (tp->write_seq == 0) tp->write_seq = 1; - icsk->icsk_backoff = 0; tp->snd_cwnd = 2; icsk->icsk_probes_out = 0; tp->packets_out = 0; diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c index 62f90f6b7a9d..7ccbbd0d3e43 100644 --- a/net/ipv4/tcp_dctcp.c +++ b/net/ipv4/tcp_dctcp.c @@ -66,11 +66,6 @@ static unsigned int dctcp_alpha_on_init __read_mostly = DCTCP_MAX_ALPHA; module_param(dctcp_alpha_on_init, uint, 0644); MODULE_PARM_DESC(dctcp_alpha_on_init, "parameter for initial alpha value"); -static unsigned int dctcp_clamp_alpha_on_loss __read_mostly; -module_param(dctcp_clamp_alpha_on_loss, uint, 0644); -MODULE_PARM_DESC(dctcp_clamp_alpha_on_loss, - "parameter for clamping alpha on loss"); - static struct tcp_congestion_ops dctcp_reno; static void dctcp_reset(const struct tcp_sock *tp, struct dctcp *ca) @@ -211,21 +206,23 @@ static void dctcp_update_alpha(struct sock *sk, u32 flags) } } +static void dctcp_react_to_loss(struct sock *sk) +{ + struct dctcp *ca = inet_csk_ca(sk); + struct tcp_sock *tp = tcp_sk(sk); + + ca->loss_cwnd = tp->snd_cwnd; + tp->snd_ssthresh = max(tp->snd_cwnd >> 1U, 2U); +} + static void dctcp_state(struct sock *sk, u8 new_state) { - if (dctcp_clamp_alpha_on_loss && new_state == TCP_CA_Loss) { - struct dctcp *ca = inet_csk_ca(sk); - - /* If this extension is enabled, we clamp dctcp_alpha to - * max on packet loss; the motivation is that dctcp_alpha - * is an indicator to the extend of congestion and packet - * loss is an indicator of extreme congestion; setting - * this in practice turned out to be beneficial, and - * effectively assumes total congestion which reduces the - * window by half. - */ - ca->dctcp_alpha = DCTCP_MAX_ALPHA; - } + if (new_state == TCP_CA_Recovery && + new_state != inet_csk(sk)->icsk_ca_state) + dctcp_react_to_loss(sk); + /* We handle RTO in dctcp_cwnd_event to ensure that we perform only + * one loss-adjustment per RTT. + */ } static void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev) @@ -237,6 +234,9 @@ static void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev) case CA_EVENT_ECN_NO_CE: dctcp_ce_state_1_to_0(sk); break; + case CA_EVENT_LOSS: + dctcp_react_to_loss(sk); + break; default: /* Don't care for the rest. */ break; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 48c774c0acda..a369344d0163 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -366,11 +366,12 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb) static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); + int room; + + room = min_t(int, tp->window_clamp, tcp_space(sk)) - tp->rcv_ssthresh; /* Check #1 */ - if (tp->rcv_ssthresh < tp->window_clamp && - (int)tp->rcv_ssthresh < tcp_space(sk) && - !tcp_under_memory_pressure(sk)) { + if (room > 0 && !tcp_under_memory_pressure(sk)) { int incr; /* Check #2. Increase window, if skb with such overhead @@ -383,8 +384,7 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb) if (incr) { incr = max_t(int, incr, 2 * skb->len); - tp->rcv_ssthresh = min(tp->rcv_ssthresh + incr, - tp->window_clamp); + tp->rcv_ssthresh += min(room, incr); inet_csk(sk)->icsk_ack.quick |= 1; } } @@ -6404,7 +6404,13 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, af_ops->send_synack(fastopen_sk, dst, &fl, req, &foc, false); /* Add the child socket directly into the accept queue */ - inet_csk_reqsk_queue_add(sk, req, fastopen_sk); + if (!inet_csk_reqsk_queue_add(sk, req, fastopen_sk)) { + reqsk_fastopen_remove(fastopen_sk, req, false); + bh_unlock_sock(fastopen_sk); + sock_put(fastopen_sk); + reqsk_put(req); + goto drop; + } sk->sk_data_ready(sk); bh_unlock_sock(fastopen_sk); sock_put(fastopen_sk); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 5a8c8b081b80..a32818f086bd 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -466,14 +466,15 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) if (sock_owned_by_user(sk)) break; + skb = tcp_write_queue_head(sk); + if (WARN_ON_ONCE(!skb)) + break; + icsk->icsk_backoff--; icsk->icsk_rto = tp->srtt_us ? __tcp_set_rto(tp) : TCP_TIMEOUT_INIT; icsk->icsk_rto = inet_csk_rto_backoff(icsk, TCP_RTO_MAX); - skb = tcp_write_queue_head(sk); - BUG_ON(!skb); - remaining = icsk->icsk_rto - min(icsk->icsk_rto, tcp_time_stamp - tcp_skb_timestamp(skb)); @@ -713,8 +714,8 @@ release_sk1: outside socket context is ugly, certainly. What can I do? */ -static void tcp_v4_send_ack(const struct sock *sk, - struct sk_buff *skb, u32 seq, u32 ack, +static void tcp_v4_send_ack(const struct sock *sk, struct sk_buff *skb, + u32 seq, u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, struct tcp_md5sig_key *key, int reply_flags, u8 tos) @@ -728,8 +729,8 @@ static void tcp_v4_send_ack(const struct sock *sk, #endif ]; } rep; - struct net *net = sock_net(sk); struct ip_reply_arg arg; + struct net *net = sock_net(sk); memset(&rep.th, 0, sizeof(struct tcphdr)); memset(&arg, 0, sizeof(arg)); @@ -792,8 +793,7 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) struct inet_timewait_sock *tw = inet_twsk(sk); struct tcp_timewait_sock *tcptw = tcp_twsk(sk); - tcp_v4_send_ack(sk, skb, - tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, + tcp_v4_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, tcp_time_stamp + tcptw->tw_ts_offset, tcptw->tw_ts_recent, @@ -812,17 +812,9 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV * sk->sk_state == TCP_SYN_RECV -> for Fast Open. */ - u32 seq = (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 : - tcp_sk(sk)->snd_nxt; - - /* RFC 7323 2.3 - * The window field (SEG.WND) of every outgoing segment, with the - * exception of segments, MUST be right-shifted by - * Rcv.Wind.Shift bits: - */ - tcp_v4_send_ack(sk, skb, seq, - tcp_rsk(req)->rcv_nxt, - req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale, + tcp_v4_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ? + tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt, + tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd, tcp_time_stamp, req->ts_recent, 0, diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index f3952c75c47a..9ada5e4a28e5 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2263,14 +2263,18 @@ void tcp_send_loss_probe(struct sock *sk) skb = tcp_write_queue_tail(sk); } + if (unlikely(!skb)) { + WARN_ONCE(tp->packets_out, + "invalid inflight: %u state %u cwnd %u mss %d\n", + tp->packets_out, sk->sk_state, tp->snd_cwnd, mss); + inet_csk(sk)->icsk_pending = 0; + return; + } + /* At most one outstanding TLP retransmission. */ if (tp->tlp_high_seq) goto rearm_timer; - /* Retransmit last segment. */ - if (WARN_ON(!skb)) - goto rearm_timer; - if (skb_still_in_host_queue(sk, skb)) goto rearm_timer; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 4c61b3ecfb6e..1fa06f897228 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1464,7 +1464,7 @@ static void udp_v4_rehash(struct sock *sk) udp_lib_rehash(sk, new_hash); } -static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) { int rc; diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h index 7e0fe4bdd967..feb50a16398d 100644 --- a/net/ipv4/udp_impl.h +++ b/net/ipv4/udp_impl.h @@ -25,7 +25,7 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, int flags, int *addr_len); int udp_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags); -int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); +int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); void udp_destroy_sock(struct sock *sk); #ifdef CONFIG_PROC_FS diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index 3b3efbda48e1..78766b32b78b 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c @@ -50,7 +50,7 @@ struct proto udplite_prot = { .sendmsg = udp_sendmsg, .recvmsg = udp_recvmsg, .sendpage = udp_sendpage, - .backlog_rcv = udp_queue_rcv_skb, + .backlog_rcv = __udp_queue_rcv_skb, .hash = udp_lib_hash, .unhash = udp_lib_unhash, .get_port = udp_v4_get_port, diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index ef222a4bc1a5..0ffd555151be 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1070,7 +1070,8 @@ check_cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long *expires) list_for_each_entry(ifa, &idev->addr_list, if_list) { if (ifa == ifp) continue; - if (!ipv6_prefix_equal(&ifa->addr, &ifp->addr, + if (ifa->prefix_len != ifp->prefix_len || + !ipv6_prefix_equal(&ifa->addr, &ifp->addr, ifp->prefix_len)) continue; if (ifa->flags & (IFA_F_PERMANENT | IFA_F_NOPREFIXROUTE)) diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index e1fe8d227ef1..28e03fce9e89 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -308,6 +308,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) /* Check if the address belongs to the host. */ if (addr_type == IPV6_ADDR_MAPPED) { + struct net_device *dev = NULL; int chk_addr_ret; /* Binding to v4-mapped address on a v6-only socket @@ -318,9 +319,20 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) goto out; } + rcu_read_lock(); + if (sk->sk_bound_dev_if) { + dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if); + if (!dev) { + err = -ENODEV; + goto out_unlock; + } + } + /* Reproduce AF_INET checks to make the bindings consistent */ v4addr = addr->sin6_addr.s6_addr32[3]; - chk_addr_ret = inet_addr_type(net, v4addr); + chk_addr_ret = inet_addr_type_dev_table(net, dev, v4addr); + rcu_read_unlock(); + if (!net->ipv4.sysctl_ip_nonlocal_bind && !(inet->freebind || inet->transparent) && v4addr != htonl(INADDR_ANY) && @@ -349,6 +361,9 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) err = -EINVAL; goto out_unlock; } + } + + if (sk->sk_bound_dev_if) { dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if); if (!dev) { err = -ENODEV; diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index fc69e03451a7..54fc09409532 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -291,6 +291,7 @@ void ipv6_local_error(struct sock *sk, int err, struct flowi6 *fl6, u32 info) skb_reset_network_header(skb); iph = ipv6_hdr(skb); iph->daddr = fl6->daddr; + ip6_flow_hdr(iph, 0, 0); serr = SKB_EXT_ERR(skb); serr->ee.ee_errno = err; @@ -658,17 +659,15 @@ void ip6_datagram_recv_specific_ctl(struct sock *sk, struct msghdr *msg, } if (np->rxopt.bits.rxorigdstaddr) { struct sockaddr_in6 sin6; - __be16 *ports; - int end; + __be16 _ports[2], *ports; - end = skb_transport_offset(skb) + 4; - if (end <= 0 || pskb_may_pull(skb, end)) { + ports = skb_header_pointer(skb, skb_transport_offset(skb), + sizeof(_ports), &_ports); + if (ports) { /* All current transport protocols have the port numbers in the * first four bytes of the transport header and this function is * written with this assumption in mind. */ - ports = (__be16 *)skb_transport_header(skb); - sin6.sin6_family = AF_INET6; sin6.sin6_addr = ipv6_hdr(skb)->daddr; sin6.sin6_port = ports[1]; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index ba923a5504b9..3544d5e84b20 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -170,37 +170,37 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, const struct ipv6_pinfo *np = inet6_sk(sk); struct in6_addr *first_hop = &fl6->daddr; struct dst_entry *dst = skb_dst(skb); + unsigned int head_room; struct ipv6hdr *hdr; u8 proto = fl6->flowi6_proto; int seg_len = skb->len; int hlimit = -1; u32 mtu; - if (opt) { - unsigned int head_room; + head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev); + if (opt) + head_room += opt->opt_nflen + opt->opt_flen; - /* First: exthdrs may take lots of space (~8K for now) - MAX_HEADER is not enough. - */ - head_room = opt->opt_nflen + opt->opt_flen; - seg_len += head_room; - head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev); - - if (skb_headroom(skb) < head_room) { - struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room); - if (!skb2) { - IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_OUTDISCARDS); - kfree_skb(skb); - return -ENOBUFS; - } - if (skb->sk) - skb_set_owner_w(skb2, skb->sk); - consume_skb(skb); - skb = skb2; + if (unlikely(skb_headroom(skb) < head_room)) { + struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room); + if (!skb2) { + IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_OUTDISCARDS); + kfree_skb(skb); + return -ENOBUFS; } + if (skb->sk) + skb_set_owner_w(skb2, skb->sk); + consume_skb(skb); + skb = skb2; + } + + if (opt) { + seg_len += opt->opt_nflen + opt->opt_flen; + if (opt->opt_flen) ipv6_push_frag_opts(skb, opt, &proto); + if (opt->opt_nflen) ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop); } @@ -576,7 +576,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, inet6_sk(skb->sk) : NULL; struct ipv6hdr *tmp_hdr; struct frag_hdr *fh; - unsigned int mtu, hlen, left, len; + unsigned int mtu, hlen, left, len, nexthdr_offset; int hroom, troom; __be32 frag_id; int ptr, offset = 0, err = 0; @@ -587,6 +587,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, goto fail; hlen = err; nexthdr = *prevhdr; + nexthdr_offset = prevhdr - skb_network_header(skb); mtu = ip6_skb_dst_mtu(skb); @@ -621,6 +622,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, (err = skb_checksum_help(skb))) goto fail; + prevhdr = skb_network_header(skb) + nexthdr_offset; hroom = LL_RESERVED_SPACE(rt->dst.dev); if (skb_has_frag_list(skb)) { int first_len = skb_pagelen(skb); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 8d55abb1a689..40cf35c9a2f3 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -622,7 +622,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, IPPROTO_IPIP, RT_TOS(eiph->tos), 0); if (IS_ERR(rt) || - rt->dst.dev->type != ARPHRD_TUNNEL) { + rt->dst.dev->type != ARPHRD_TUNNEL6) { if (!IS_ERR(rt)) ip_rt_put(rt); goto out; @@ -632,7 +632,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, ip_rt_put(rt); if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) || - skb_dst(skb2)->dev->type != ARPHRD_TUNNEL) + skb_dst(skb2)->dev->type != ARPHRD_TUNNEL6) goto out; } diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c index 14dacf1df529..30b03d8e321a 100644 --- a/net/ipv6/ip6_udp_tunnel.c +++ b/net/ipv6/ip6_udp_tunnel.c @@ -15,7 +15,7 @@ int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg, struct socket **sockp) { - struct sockaddr_in6 udp6_addr; + struct sockaddr_in6 udp6_addr = {}; int err; struct socket *sock = NULL; @@ -42,6 +42,7 @@ int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg, goto error; if (cfg->peer_udp_port) { + memset(&udp6_addr, 0, sizeof(udp6_addr)); udp6_addr.sin6_family = AF_INET6; memcpy(&udp6_addr.sin6_addr, &cfg->peer_ip6, sizeof(udp6_addr.sin6_addr)); diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 9b92960f024d..e348a140e540 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -72,6 +72,8 @@ struct mr6_table { #endif }; +#include + struct ip6mr_rule { struct fib_rule common; }; @@ -1871,6 +1873,7 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg) return -EFAULT; if (vr.mifi >= mrt->maxvif) return -EINVAL; + vr.mifi = array_index_nospec(vr.mifi, mrt->maxvif); read_lock(&mrt_lock); vif = &mrt->vif6_table[vr.mifi]; if (MIF_EXISTS(mrt, vr.mifi)) { @@ -1945,6 +1948,7 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) return -EFAULT; if (vr.mifi >= mrt->maxvif) return -EINVAL; + vr.mifi = array_index_nospec(vr.mifi, mrt->maxvif); read_lock(&mrt_lock); vif = &mrt->vif6_table[vr.mifi]; if (MIF_EXISTS(mrt, vr.mifi)) { @@ -1986,10 +1990,10 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { - IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_OUTFORWDATAGRAMS); - IP6_ADD_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_OUTOCTETS, skb->len); + IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_OUTFORWDATAGRAMS); + IP6_ADD_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_OUTOCTETS, skb->len); return dst_output(net, sk, skb); } diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 96de322fe5e2..f563cf3fcc4c 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1182,6 +1182,7 @@ get_entries(struct net *net, struct ip6t_get_entries __user *uptr, *len, sizeof(get) + get.size); return -EINVAL; } + get.name[sizeof(get.name) - 1] = '\0'; t = xt_find_table_lock(net, AF_INET6, get.name); if (!IS_ERR_OR_NULL(t)) { @@ -1800,6 +1801,7 @@ compat_get_entries(struct net *net, struct compat_ip6t_get_entries __user *uptr, *len, sizeof(get) + get.size); return -EINVAL; } + get.name[sizeof(get.name) - 1] = '\0'; xt_compat_lock(AF_INET6); t = xt_find_table_lock(net, AF_INET6, get.name); diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 5a9ae56e7868..664c84e47bab 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -64,7 +64,6 @@ struct nf_ct_frag6_skb_cb static struct inet_frags nf_frags; #ifdef CONFIG_SYSCTL -static int zero; static struct ctl_table nf_ct_frag6_sysctl_table[] = { { @@ -77,18 +76,17 @@ static struct ctl_table nf_ct_frag6_sysctl_table[] = { { .procname = "nf_conntrack_frag6_low_thresh", .data = &init_net.nf_frag.frags.low_thresh, - .maxlen = sizeof(unsigned int), + .maxlen = sizeof(unsigned long), .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, + .proc_handler = proc_doulongvec_minmax, .extra2 = &init_net.nf_frag.frags.high_thresh }, { .procname = "nf_conntrack_frag6_high_thresh", .data = &init_net.nf_frag.frags.high_thresh, - .maxlen = sizeof(unsigned int), + .maxlen = sizeof(unsigned long), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_doulongvec_minmax, .extra1 = &init_net.nf_frag.frags.low_thresh }, { } @@ -153,23 +151,6 @@ static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h) return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK); } -static unsigned int nf_hash_frag(__be32 id, const struct in6_addr *saddr, - const struct in6_addr *daddr) -{ - net_get_random_once(&nf_frags.rnd, sizeof(nf_frags.rnd)); - return jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr), - (__force u32)id, nf_frags.rnd); -} - - -static unsigned int nf_hashfn(const struct inet_frag_queue *q) -{ - const struct frag_queue *nq; - - nq = container_of(q, struct frag_queue, q); - return nf_hash_frag(nq->id, &nq->saddr, &nq->daddr); -} - static void nf_skb_free(struct sk_buff *skb) { if (NFCT_FRAG6_CB(skb)->orig) @@ -184,34 +165,26 @@ static void nf_ct_frag6_expire(unsigned long data) fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q); net = container_of(fq->q.net, struct net, nf_frag.frags); - ip6_expire_frag_queue(net, fq, &nf_frags); + ip6_expire_frag_queue(net, fq); } /* Creation primitives. */ -static inline struct frag_queue *fq_find(struct net *net, __be32 id, - u32 user, struct in6_addr *src, - struct in6_addr *dst, int iif, u8 ecn) +static struct frag_queue *fq_find(struct net *net, __be32 id, u32 user, + const struct ipv6hdr *hdr, int iif) { + struct frag_v6_compare_key key = { + .id = id, + .saddr = hdr->saddr, + .daddr = hdr->daddr, + .user = user, + .iif = iif, + }; struct inet_frag_queue *q; - struct ip6_create_arg arg; - unsigned int hash; - arg.id = id; - arg.user = user; - arg.src = src; - arg.dst = dst; - arg.iif = iif; - arg.ecn = ecn; - - local_bh_disable(); - hash = nf_hash_frag(id, src, dst); - - q = inet_frag_find(&net->nf_frag.frags, &nf_frags, &arg, hash); - local_bh_enable(); - if (IS_ERR_OR_NULL(q)) { - inet_frag_maybe_warn_overflow(q, pr_fmt()); + q = inet_frag_find(&net->nf_frag.frags, &key); + if (!q) return NULL; - } + return container_of(q, struct frag_queue, q); } @@ -362,7 +335,7 @@ found: return 0; discard_fq: - inet_frag_kill(&fq->q, &nf_frags); + inet_frag_kill(&fq->q); err: return -1; } @@ -383,7 +356,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev) int payload_len; u8 ecn; - inet_frag_kill(&fq->q, &nf_frags); + inet_frag_kill(&fq->q); WARN_ON(head == NULL); WARN_ON(NFCT_FRAG6_CB(head)->offset != 0); @@ -454,6 +427,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev) else if (head->ip_summed == CHECKSUM_COMPLETE) head->csum = csum_add(head->csum, fp->csum); head->truesize += fp->truesize; + fp->sk = NULL; } sub_frag_mem_limit(fq->q.net, head->truesize); @@ -472,6 +446,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev) head->csum); fq->q.fragments = NULL; + fq->q.rb_fragments = RB_ROOT; fq->q.fragments_tail = NULL; /* all original skbs are linked into the NFCT_FRAG6_CB(head).orig */ @@ -601,9 +576,13 @@ struct sk_buff *nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 use hdr = ipv6_hdr(clone); fhdr = (struct frag_hdr *)skb_transport_header(clone); + if (clone->len - skb_network_offset(clone) < IPV6_MIN_MTU && + fhdr->frag_off & htons(IP6_MF)) + goto ret_orig; + skb_orphan(skb); - fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr, - skb->dev ? skb->dev->ifindex : 0, ip6_frag_ecn(hdr)); + fq = fq_find(net, fhdr->identification, user, hdr, + skb->dev ? skb->dev->ifindex : 0); if (fq == NULL) { pr_debug("Can't find and can't create new queue\n"); goto ret_orig; @@ -614,7 +593,7 @@ struct sk_buff *nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 use if (nf_ct_frag6_queue(fq, clone, fhdr, nhoff) < 0) { spin_unlock_bh(&fq->q.lock); pr_debug("Can't insert skb to queue\n"); - inet_frag_put(&fq->q, &nf_frags); + inet_frag_put(&fq->q); goto ret_orig; } @@ -626,7 +605,7 @@ struct sk_buff *nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 use } spin_unlock_bh(&fq->q.lock); - inet_frag_put(&fq->q, &nf_frags); + inet_frag_put(&fq->q); return ret_skb; ret_orig: @@ -650,18 +629,26 @@ EXPORT_SYMBOL_GPL(nf_ct_frag6_consume_orig); static int nf_ct_net_init(struct net *net) { + int res; + net->nf_frag.frags.high_thresh = IPV6_FRAG_HIGH_THRESH; net->nf_frag.frags.low_thresh = IPV6_FRAG_LOW_THRESH; net->nf_frag.frags.timeout = IPV6_FRAG_TIMEOUT; - inet_frags_init_net(&net->nf_frag.frags); + net->nf_frag.frags.f = &nf_frags; - return nf_ct_frag6_sysctl_register(net); + res = inet_frags_init_net(&net->nf_frag.frags); + if (res < 0) + return res; + res = nf_ct_frag6_sysctl_register(net); + if (res < 0) + inet_frags_exit_net(&net->nf_frag.frags); + return res; } static void nf_ct_net_exit(struct net *net) { nf_ct_frags6_sysctl_unregister(net); - inet_frags_exit_net(&net->nf_frag.frags, &nf_frags); + inet_frags_exit_net(&net->nf_frag.frags); } static struct pernet_operations nf_ct_net_ops = { @@ -673,14 +660,13 @@ int nf_ct_frag6_init(void) { int ret = 0; - nf_frags.hashfn = nf_hashfn; nf_frags.constructor = ip6_frag_init; nf_frags.destructor = NULL; nf_frags.skb_free = nf_skb_free; nf_frags.qsize = sizeof(struct frag_queue); - nf_frags.match = ip6_frag_match; nf_frags.frag_expire = nf_ct_frag6_expire; nf_frags.frags_cache_name = nf_frags_cache_name; + nf_frags.rhash_params = ip6_rhash_params; ret = inet_frags_init(&nf_frags); if (ret) goto out; diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index 679253d0af84..73e766e7bc37 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -33,7 +33,6 @@ static int sockstat6_seq_show(struct seq_file *seq, void *v) { struct net *net = seq->private; - unsigned int frag_mem = ip6_frag_mem(net); seq_printf(seq, "TCP6: inuse %d\n", sock_prot_inuse_get(net, &tcpv6_prot)); @@ -43,7 +42,9 @@ static int sockstat6_seq_show(struct seq_file *seq, void *v) sock_prot_inuse_get(net, &udplitev6_prot)); seq_printf(seq, "RAW6: inuse %d\n", sock_prot_inuse_get(net, &rawv6_prot)); - seq_printf(seq, "FRAG6: inuse %u memory %u\n", !!frag_mem, frag_mem); + seq_printf(seq, "FRAG6: inuse %u memory %lu\n", + atomic_read(&net->ipv6.frags.rhashtable.nelems), + frag_mem_limit(&net->ipv6.frags)); return 0; } diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 58f2139ebb5e..ec917f58d105 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -79,94 +79,58 @@ static struct inet_frags ip6_frags; static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_device *dev); -/* - * callers should be careful not to use the hash value outside the ipfrag_lock - * as doing so could race with ipfrag_hash_rnd being recalculated. - */ -static unsigned int inet6_hash_frag(__be32 id, const struct in6_addr *saddr, - const struct in6_addr *daddr) -{ - net_get_random_once(&ip6_frags.rnd, sizeof(ip6_frags.rnd)); - return jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr), - (__force u32)id, ip6_frags.rnd); -} - -static unsigned int ip6_hashfn(const struct inet_frag_queue *q) -{ - const struct frag_queue *fq; - - fq = container_of(q, struct frag_queue, q); - return inet6_hash_frag(fq->id, &fq->saddr, &fq->daddr); -} - -bool ip6_frag_match(const struct inet_frag_queue *q, const void *a) -{ - const struct frag_queue *fq; - const struct ip6_create_arg *arg = a; - - fq = container_of(q, struct frag_queue, q); - return fq->id == arg->id && - fq->user == arg->user && - ipv6_addr_equal(&fq->saddr, arg->src) && - ipv6_addr_equal(&fq->daddr, arg->dst) && - (arg->iif == fq->iif || - !(ipv6_addr_type(arg->dst) & (IPV6_ADDR_MULTICAST | - IPV6_ADDR_LINKLOCAL))); -} -EXPORT_SYMBOL(ip6_frag_match); - void ip6_frag_init(struct inet_frag_queue *q, const void *a) { struct frag_queue *fq = container_of(q, struct frag_queue, q); - const struct ip6_create_arg *arg = a; + const struct frag_v6_compare_key *key = a; - fq->id = arg->id; - fq->user = arg->user; - fq->saddr = *arg->src; - fq->daddr = *arg->dst; - fq->ecn = arg->ecn; + q->key.v6 = *key; + fq->ecn = 0; } EXPORT_SYMBOL(ip6_frag_init); -void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq, - struct inet_frags *frags) +void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq) { struct net_device *dev = NULL; + struct sk_buff *head; + rcu_read_lock(); spin_lock(&fq->q.lock); if (fq->q.flags & INET_FRAG_COMPLETE) goto out; - inet_frag_kill(&fq->q, frags); + inet_frag_kill(&fq->q); - rcu_read_lock(); dev = dev_get_by_index_rcu(net, fq->iif); if (!dev) - goto out_rcu_unlock; + goto out; IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS); - - if (inet_frag_evicting(&fq->q)) - goto out_rcu_unlock; - IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT); /* Don't send error if the first segment did not arrive. */ - if (!(fq->q.flags & INET_FRAG_FIRST_IN) || !fq->q.fragments) - goto out_rcu_unlock; + head = fq->q.fragments; + if (!(fq->q.flags & INET_FRAG_FIRST_IN) || !head) + goto out; /* But use as source device on which LAST ARRIVED * segment was received. And do not use fq->dev * pointer directly, device might already disappeared. */ - fq->q.fragments->dev = dev; - icmpv6_send(fq->q.fragments, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0); -out_rcu_unlock: - rcu_read_unlock(); + head->dev = dev; + skb_get(head); + spin_unlock(&fq->q.lock); + + icmpv6_send(head, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0); + kfree_skb(head); + goto out_rcu_unlock; + out: spin_unlock(&fq->q.lock); - inet_frag_put(&fq->q, frags); +out_rcu_unlock: + rcu_read_unlock(); + inet_frag_put(&fq->q); } EXPORT_SYMBOL(ip6_expire_frag_queue); @@ -178,31 +142,29 @@ static void ip6_frag_expire(unsigned long data) fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q); net = container_of(fq->q.net, struct net, ipv6.frags); - ip6_expire_frag_queue(net, fq, &ip6_frags); + ip6_expire_frag_queue(net, fq); } static struct frag_queue * -fq_find(struct net *net, __be32 id, const struct in6_addr *src, - const struct in6_addr *dst, int iif, u8 ecn) +fq_find(struct net *net, __be32 id, const struct ipv6hdr *hdr, int iif) { + struct frag_v6_compare_key key = { + .id = id, + .saddr = hdr->saddr, + .daddr = hdr->daddr, + .user = IP6_DEFRAG_LOCAL_DELIVER, + .iif = iif, + }; struct inet_frag_queue *q; - struct ip6_create_arg arg; - unsigned int hash; - arg.id = id; - arg.user = IP6_DEFRAG_LOCAL_DELIVER; - arg.src = src; - arg.dst = dst; - arg.iif = iif; - arg.ecn = ecn; + if (!(ipv6_addr_type(&hdr->daddr) & (IPV6_ADDR_MULTICAST | + IPV6_ADDR_LINKLOCAL))) + key.iif = 0; - hash = inet6_hash_frag(id, src, dst); - - q = inet_frag_find(&net->ipv6.frags, &ip6_frags, &arg, hash); - if (IS_ERR_OR_NULL(q)) { - inet_frag_maybe_warn_overflow(q, pr_fmt()); + q = inet_frag_find(&net->ipv6.frags, &key); + if (!q) return NULL; - } + return container_of(q, struct frag_queue, q); } @@ -359,7 +321,7 @@ found: return -1; discard_fq: - inet_frag_kill(&fq->q, &ip6_frags); + inet_frag_kill(&fq->q); err: IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMFAILS); @@ -386,7 +348,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, int sum_truesize; u8 ecn; - inet_frag_kill(&fq->q, &ip6_frags); + inet_frag_kill(&fq->q); ecn = ip_frag_ecn_table[fq->ecn]; if (unlikely(ecn == 0xff)) @@ -503,6 +465,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMOKS); rcu_read_unlock(); fq->q.fragments = NULL; + fq->q.rb_fragments = RB_ROOT; fq->q.fragments_tail = NULL; return 1; @@ -524,6 +487,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb) struct frag_queue *fq; const struct ipv6hdr *hdr = ipv6_hdr(skb); struct net *net = dev_net(skb_dst(skb)->dev); + int iif; if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED) goto fail_hdr; @@ -552,17 +516,22 @@ static int ipv6_frag_rcv(struct sk_buff *skb) return 1; } - fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr, - skb->dev ? skb->dev->ifindex : 0, ip6_frag_ecn(hdr)); + if (skb->len - skb_network_offset(skb) < IPV6_MIN_MTU && + fhdr->frag_off & htons(IP6_MF)) + goto fail_hdr; + + iif = skb->dev ? skb->dev->ifindex : 0; + fq = fq_find(net, fhdr->identification, hdr, iif); if (fq) { int ret; spin_lock(&fq->q.lock); + fq->iif = iif; ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff); spin_unlock(&fq->q.lock); - inet_frag_put(&fq->q, &ip6_frags); + inet_frag_put(&fq->q); return ret; } @@ -583,24 +552,22 @@ static const struct inet6_protocol frag_protocol = { }; #ifdef CONFIG_SYSCTL -static int zero; static struct ctl_table ip6_frags_ns_ctl_table[] = { { .procname = "ip6frag_high_thresh", .data = &init_net.ipv6.frags.high_thresh, - .maxlen = sizeof(int), + .maxlen = sizeof(unsigned long), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_doulongvec_minmax, .extra1 = &init_net.ipv6.frags.low_thresh }, { .procname = "ip6frag_low_thresh", .data = &init_net.ipv6.frags.low_thresh, - .maxlen = sizeof(int), + .maxlen = sizeof(unsigned long), .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, + .proc_handler = proc_doulongvec_minmax, .extra2 = &init_net.ipv6.frags.high_thresh }, { @@ -708,19 +675,27 @@ static void ip6_frags_sysctl_unregister(void) static int __net_init ipv6_frags_init_net(struct net *net) { + int res; + net->ipv6.frags.high_thresh = IPV6_FRAG_HIGH_THRESH; net->ipv6.frags.low_thresh = IPV6_FRAG_LOW_THRESH; net->ipv6.frags.timeout = IPV6_FRAG_TIMEOUT; + net->ipv6.frags.f = &ip6_frags; - inet_frags_init_net(&net->ipv6.frags); + res = inet_frags_init_net(&net->ipv6.frags); + if (res < 0) + return res; - return ip6_frags_ns_sysctl_register(net); + res = ip6_frags_ns_sysctl_register(net); + if (res < 0) + inet_frags_exit_net(&net->ipv6.frags); + return res; } static void __net_exit ipv6_frags_exit_net(struct net *net) { ip6_frags_ns_sysctl_unregister(net); - inet_frags_exit_net(&net->ipv6.frags, &ip6_frags); + inet_frags_exit_net(&net->ipv6.frags); } static struct pernet_operations ip6_frags_ops = { @@ -728,14 +703,55 @@ static struct pernet_operations ip6_frags_ops = { .exit = ipv6_frags_exit_net, }; +static u32 ip6_key_hashfn(const void *data, u32 len, u32 seed) +{ + return jhash2(data, + sizeof(struct frag_v6_compare_key) / sizeof(u32), seed); +} + +static u32 ip6_obj_hashfn(const void *data, u32 len, u32 seed) +{ + const struct inet_frag_queue *fq = data; + + return jhash2((const u32 *)&fq->key.v6, + sizeof(struct frag_v6_compare_key) / sizeof(u32), seed); +} + +static int ip6_obj_cmpfn(struct rhashtable_compare_arg *arg, const void *ptr) +{ + const struct frag_v6_compare_key *key = arg->key; + const struct inet_frag_queue *fq = ptr; + + return !!memcmp(&fq->key, key, sizeof(*key)); +} + +const struct rhashtable_params ip6_rhash_params = { + .head_offset = offsetof(struct inet_frag_queue, node), + .hashfn = ip6_key_hashfn, + .obj_hashfn = ip6_obj_hashfn, + .obj_cmpfn = ip6_obj_cmpfn, + .automatic_shrinking = true, +}; +EXPORT_SYMBOL(ip6_rhash_params); + int __init ipv6_frag_init(void) { int ret; - ret = inet6_add_protocol(&frag_protocol, IPPROTO_FRAGMENT); + ip6_frags.constructor = ip6_frag_init; + ip6_frags.destructor = NULL; + ip6_frags.qsize = sizeof(struct frag_queue); + ip6_frags.frag_expire = ip6_frag_expire; + ip6_frags.frags_cache_name = ip6_frag_cache_name; + ip6_frags.rhash_params = ip6_rhash_params; + ret = inet_frags_init(&ip6_frags); if (ret) goto out; + ret = inet6_add_protocol(&frag_protocol, IPPROTO_FRAGMENT); + if (ret) + goto err_protocol; + ret = ip6_frags_sysctl_register(); if (ret) goto err_sysctl; @@ -744,17 +760,6 @@ int __init ipv6_frag_init(void) if (ret) goto err_pernet; - ip6_frags.hashfn = ip6_hashfn; - ip6_frags.constructor = ip6_frag_init; - ip6_frags.destructor = NULL; - ip6_frags.skb_free = NULL; - ip6_frags.qsize = sizeof(struct frag_queue); - ip6_frags.match = ip6_frag_match; - ip6_frags.frag_expire = ip6_frag_expire; - ip6_frags.frags_cache_name = ip6_frag_cache_name; - ret = inet_frags_init(&ip6_frags); - if (ret) - goto err_pernet; out: return ret; @@ -762,6 +767,8 @@ err_pernet: ip6_frags_sysctl_unregister(); err_sysctl: inet6_del_protocol(&frag_protocol, IPPROTO_FRAGMENT); +err_protocol: + inet_frags_fini(&ip6_frags); goto out; } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 1f79910fef55..9de10cecc919 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3087,7 +3087,7 @@ static int rt6_fill_node(struct net *net, table = rt->rt6i_table->tb6_id; else table = RT6_TABLE_UNSPEC; - rtm->rtm_table = table; + rtm->rtm_table = table < 256 ? table : RT_TABLE_COMPAT; if (nla_put_u32(skb, RTA_TABLE, table)) goto nla_put_failure; if (rt->rt6i_flags & RTF_REJECT) { diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 11282ffca567..77736190dc15 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -577,7 +577,7 @@ static int ipip6_err(struct sk_buff *skb, u32 info) goto out; err = 0; - if (!ipip6_err_gen_icmpv6_unreach(skb)) + if (__in6_dev_get(skb->dev) && !ipip6_err_gen_icmpv6_unreach(skb)) goto out; if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) @@ -693,6 +693,10 @@ static int ipip6_rcv(struct sk_buff *skb) if (iptunnel_pull_header(skb, 0, htons(ETH_P_IPV6))) goto out; + /* skb can be uncloned in iptunnel_pull_header, so + * old iph is no longer valid + */ + iph = (const struct iphdr *)skb_mac_header(skb); err = IP_ECN_decapsulate(iph, skb); if (unlikely(err)) { if (log_ecn_error) @@ -772,8 +776,9 @@ static bool check_6rd(struct ip_tunnel *tunnel, const struct in6_addr *v6dst, pbw0 = tunnel->ip6rd.prefixlen >> 5; pbi0 = tunnel->ip6rd.prefixlen & 0x1f; - d = (ntohl(v6dst->s6_addr32[pbw0]) << pbi0) >> - tunnel->ip6rd.relay_prefixlen; + d = tunnel->ip6rd.relay_prefixlen < 32 ? + (ntohl(v6dst->s6_addr32[pbw0]) << pbi0) >> + tunnel->ip6rd.relay_prefixlen : 0; pbi1 = pbi0 - tunnel->ip6rd.relay_prefixlen; if (pbi1 > 0) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 625e05dbf8cc..75caab7dfa2a 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1045,11 +1045,11 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * newnp->ipv6_fl_list = NULL; newnp->pktoptions = NULL; newnp->opt = NULL; - newnp->mcast_oif = tcp_v6_iif(skb); - newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; - newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb)); + newnp->mcast_oif = inet_iif(skb); + newnp->mcast_hops = ip_hdr(skb)->ttl; + newnp->rcv_flowinfo = 0; if (np->repflow) - newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb)); + newnp->flow_label = 0; /* * No need to charge this sock to the relevant IPv6 refcnt debug socks count diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 3b0e22d20e83..0f2a36ead339 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -585,7 +585,7 @@ out: sock_put(sk); } -static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) { int rc; diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h index 0682c031ccdc..3c1dbc9f74cf 100644 --- a/net/ipv6/udp_impl.h +++ b/net/ipv6/udp_impl.h @@ -26,7 +26,7 @@ int compat_udpv6_getsockopt(struct sock *sk, int level, int optname, int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len); int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, int flags, int *addr_len); -int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); +int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); void udpv6_destroy_sock(struct sock *sk); void udp_v6_clear_sk(struct sock *sk, int size); diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index 9cf097e206e9..d1eaeeaa34d2 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -45,7 +45,7 @@ struct proto udplitev6_prot = { .getsockopt = udpv6_getsockopt, .sendmsg = udpv6_sendmsg, .recvmsg = udpv6_recvmsg, - .backlog_rcv = udpv6_queue_rcv_skb, + .backlog_rcv = __udpv6_queue_rcv_skb, .hash = udp_lib_hash, .unhash = udp_lib_unhash, .get_port = udp_v6_get_port, diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index 5743044cd660..56b72cada346 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -144,6 +144,9 @@ static u32 __xfrm6_tunnel_alloc_spi(struct net *net, xfrm_address_t *saddr) index = __xfrm6_tunnel_spi_check(net, spi); if (index >= 0) goto alloc_spi; + + if (spi == XFRM6_TUNNEL_SPI_MAX) + break; } for (spi = XFRM6_TUNNEL_SPI_MIN; spi < xfrm6_tn->spi; spi++) { index = __xfrm6_tunnel_spi_check(net, spi); diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 591d18785285..429dbb064240 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -83,8 +83,7 @@ #define L2TP_SLFLAG_S 0x40000000 #define L2TP_SL_SEQ_MASK 0x00ffffff -#define L2TP_HDR_SIZE_SEQ 10 -#define L2TP_HDR_SIZE_NOSEQ 6 +#define L2TP_HDR_SIZE_MAX 14 /* Default trace flags */ #define L2TP_DEFAULT_DEBUG_FLAGS 0 @@ -705,11 +704,9 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, "%s: recv data ns=%u, session nr=%u\n", session->name, ns, session->nr); } + ptr += 4; } - /* Advance past L2-specific header, if present */ - ptr += session->l2specific_len; - if (L2TP_SKB_CB(skb)->has_seq) { /* Received a packet with sequence numbers. If we're the LNS, * check if we sre sending sequence numbers and if not, @@ -860,7 +857,7 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb, __skb_pull(skb, sizeof(struct udphdr)); /* Short packet? */ - if (!pskb_may_pull(skb, L2TP_HDR_SIZE_SEQ)) { + if (!pskb_may_pull(skb, L2TP_HDR_SIZE_MAX)) { l2tp_info(tunnel, L2TP_MSG_DATA, "%s: recv short packet (len=%d)\n", tunnel->name, skb->len); @@ -933,6 +930,10 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb, goto error; } + if (tunnel->version == L2TP_HDR_VER_3 && + l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr)) + goto error; + l2tp_recv_common(session, skb, ptr, optr, hdrflags, length, payload_hook); return 0; @@ -1031,21 +1032,20 @@ static int l2tp_build_l2tpv3_header(struct l2tp_session *session, void *buf) memcpy(bufp, &session->cookie[0], session->cookie_len); bufp += session->cookie_len; } - if (session->l2specific_len) { - if (session->l2specific_type == L2TP_L2SPECTYPE_DEFAULT) { - u32 l2h = 0; - if (session->send_seq) { - l2h = 0x40000000 | session->ns; - session->ns++; - session->ns &= 0xffffff; - l2tp_dbg(session, L2TP_MSG_SEQ, - "%s: updated ns to %u\n", - session->name, session->ns); - } + if (session->l2specific_type == L2TP_L2SPECTYPE_DEFAULT) { + u32 l2h = 0; - *((__be32 *) bufp) = htonl(l2h); + if (session->send_seq) { + l2h = 0x40000000 | session->ns; + session->ns++; + session->ns &= 0xffffff; + l2tp_dbg(session, L2TP_MSG_SEQ, + "%s: updated ns to %u\n", + session->name, session->ns); } - bufp += session->l2specific_len; + + *((__be32 *)bufp) = htonl(l2h); + bufp += 4; } if (session->offset) bufp += session->offset; @@ -1724,7 +1724,7 @@ int l2tp_session_delete(struct l2tp_session *session) EXPORT_SYMBOL_GPL(l2tp_session_delete); /* We come here whenever a session's send_seq, cookie_len or - * l2specific_len parameters are set. + * l2specific_type parameters are set. */ void l2tp_session_set_header_len(struct l2tp_session *session, int version) { @@ -1733,7 +1733,8 @@ void l2tp_session_set_header_len(struct l2tp_session *session, int version) if (session->send_seq) session->hdr_len += 4; } else { - session->hdr_len = 4 + session->cookie_len + session->l2specific_len + session->offset; + session->hdr_len = 4 + session->cookie_len + session->offset; + session->hdr_len += l2tp_get_l2specific_len(session); if (session->tunnel->encap == L2TP_ENCAPTYPE_UDP) session->hdr_len += 4; } diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index 9cf546846edb..fad47e9d74bc 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -313,6 +313,37 @@ do { \ #define l2tp_session_dec_refcount(s) l2tp_session_dec_refcount_1(s) #endif +static inline int l2tp_get_l2specific_len(struct l2tp_session *session) +{ + switch (session->l2specific_type) { + case L2TP_L2SPECTYPE_DEFAULT: + return 4; + case L2TP_L2SPECTYPE_NONE: + default: + return 0; + } +} + +static inline int l2tp_v3_ensure_opt_in_linear(struct l2tp_session *session, struct sk_buff *skb, + unsigned char **ptr, unsigned char **optr) +{ + int opt_len = session->peer_cookie_len + l2tp_get_l2specific_len(session); + + if (opt_len > 0) { + int off = *ptr - *optr; + + if (!pskb_may_pull(skb, off + opt_len)) + return -1; + + if (skb->data != *optr) { + *optr = skb->data; + *ptr = skb->data + off; + } + } + + return 0; +} + #define l2tp_printk(ptr, type, func, fmt, ...) \ do { \ if (((ptr)->debug) & (type)) \ diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index af74e3ba0f92..7efb3cadc152 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -163,6 +163,9 @@ static int l2tp_ip_recv(struct sk_buff *skb) print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, ptr, length); } + if (l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr)) + goto discard; + l2tp_recv_common(session, skb, ptr, optr, 0, skb->len, tunnel->recv_payload_hook); return 0; diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index a30f6fb6caa9..c125478981ac 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -174,6 +174,9 @@ static int l2tp_ip6_recv(struct sk_buff *skb) print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, ptr, length); } + if (l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr)) + goto discard; + l2tp_recv_common(session, skb, ptr, optr, 0, skb->len, tunnel->recv_payload_hook); return 0; @@ -664,9 +667,6 @@ static int l2tp_ip6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, if (flags & MSG_OOB) goto out; - if (addr_len) - *addr_len = sizeof(*lsa); - if (flags & MSG_ERRQUEUE) return ipv6_recv_error(sk, msg, len, addr_len); @@ -696,6 +696,7 @@ static int l2tp_ip6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, lsa->l2tp_conn_id = 0; if (ipv6_addr_type(&lsa->l2tp_addr) & IPV6_ADDR_LINKLOCAL) lsa->l2tp_scope_id = inet6_iif(skb); + *addr_len = sizeof(*lsa); } if (np->rxopt.all) diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 67348d8ac35d..7349bf26ae7b 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1228,6 +1228,10 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, if (params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)) sta->sta.tdls = true; + if (sta->sta.tdls && sdata->vif.type == NL80211_IFTYPE_STATION && + !sdata->u.mgd.associated) + return -EINVAL; + err = sta_apply_parameters(local, sta, params); if (err) { sta_info_free(local, sta); diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 18b0d65baff0..6019988bfb84 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -1157,6 +1157,9 @@ static inline void drv_wake_tx_queue(struct ieee80211_local *local, { struct ieee80211_sub_if_data *sdata = vif_to_sdata(txq->txq.vif); + if (local->in_reconfig) + return; + if (!check_sdata_in_driver(sdata)) return; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index a5e11280f405..ed4fef32b394 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1886,7 +1886,8 @@ static bool ieee80211_sta_wmm_params(struct ieee80211_local *local, params[ac].acm = acm; params[ac].uapsd = uapsd; - if (params[ac].cw_min > params[ac].cw_max) { + if (params[ac].cw_min == 0 || + params[ac].cw_min > params[ac].cw_max) { sdata_info(sdata, "AP has invalid WMM params (CWmin/max=%d/%d for ACI %d), using defaults\n", params[ac].cw_min, params[ac].cw_max, aci); diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 64f76f88f819..833ad779659c 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -149,6 +149,9 @@ ieee80211_rx_radiotap_hdrlen(struct ieee80211_local *local, /* allocate extra bitmaps */ if (status->chains) len += 4 * hweight8(status->chains); + /* vendor presence bitmap */ + if (status->flag & RX_FLAG_RADIOTAP_VENDOR_DATA) + len += 4; if (ieee80211_have_rx_timestamp(status)) { len = ALIGN(len, 8); @@ -185,8 +188,6 @@ ieee80211_rx_radiotap_hdrlen(struct ieee80211_local *local, if (status->flag & RX_FLAG_RADIOTAP_VENDOR_DATA) { struct ieee80211_vendor_radiotap *rtap = (void *)skb->data; - /* vendor presence bitmap */ - len += 4; /* alignment for fixed 6-byte vendor data header */ len = ALIGN(len, 2); /* vendor data header */ @@ -2339,7 +2340,9 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) skb_set_queue_mapping(skb, q); if (!--mesh_hdr->ttl) { - IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, dropped_frames_ttl); + if (!is_multicast_ether_addr(hdr->addr1)) + IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, + dropped_frames_ttl); goto out; } diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index c1c27a516e45..41f3eb565ef3 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1599,9 +1599,16 @@ static int ieee80211_skb_resize(struct ieee80211_sub_if_data *sdata, int head_need, bool may_encrypt) { struct ieee80211_local *local = sdata->local; + struct ieee80211_hdr *hdr; + bool enc_tailroom; int tail_need = 0; - if (may_encrypt && sdata->crypto_tx_tailroom_needed_cnt) { + hdr = (struct ieee80211_hdr *) skb->data; + enc_tailroom = may_encrypt && + (sdata->crypto_tx_tailroom_needed_cnt || + ieee80211_is_mgmt(hdr->frame_control)); + + if (enc_tailroom) { tail_need = IEEE80211_ENCRYPT_TAILROOM; tail_need -= skb_tailroom(skb); tail_need = max_t(int, tail_need, 0); @@ -1609,8 +1616,7 @@ static int ieee80211_skb_resize(struct ieee80211_sub_if_data *sdata, if (skb_cloned(skb) && (!ieee80211_hw_check(&local->hw, SUPPORTS_CLONED_SKBS) || - !skb_clone_writable(skb, ETH_HLEN) || - (may_encrypt && sdata->crypto_tx_tailroom_needed_cnt))) + !skb_clone_writable(skb, ETH_HLEN) || enc_tailroom)) I802_DEBUG_INC(local->tx_expand_skb_head_cloned); else if (head_need || tail_need) I802_DEBUG_INC(local->tx_expand_skb_head); diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 3167ec76903a..56c62b65923f 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -2217,6 +2217,18 @@ static int ip_vs_set_timeout(struct netns_ipvs *ipvs, struct ip_vs_timeout_user u->tcp_fin_timeout, u->udp_timeout); +#ifdef CONFIG_IP_VS_PROTO_TCP + if (u->tcp_timeout < 0 || u->tcp_timeout > (INT_MAX / HZ) || + u->tcp_fin_timeout < 0 || u->tcp_fin_timeout > (INT_MAX / HZ)) { + return -EINVAL; + } +#endif + +#ifdef CONFIG_IP_VS_PROTO_UDP + if (u->udp_timeout < 0 || u->udp_timeout > (INT_MAX / HZ)) + return -EINVAL; +#endif + #ifdef CONFIG_IP_VS_PROTO_TCP if (u->tcp_timeout) { pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP); diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 278f3b9356ef..7cc1d9c22a9f 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -410,6 +410,8 @@ static void tcp_options(const struct sk_buff *skb, length--; continue; default: + if (length < 2) + return; opsize=*ptr++; if (opsize < 2) /* "silly options" */ return; @@ -470,6 +472,8 @@ static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff, length--; continue; default: + if (length < 2) + return; opsize = *ptr++; if (opsize < 2) /* "silly options" */ return; diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c index fefbf5f0b28d..088e8da06b00 100644 --- a/net/netfilter/nfnetlink_acct.c +++ b/net/netfilter/nfnetlink_acct.c @@ -243,6 +243,9 @@ nfacct_filter_alloc(const struct nlattr * const attr) if (err < 0) return ERR_PTR(err); + if (!tb[NFACCT_FILTER_MASK] || !tb[NFACCT_FILTER_VALUE]) + return ERR_PTR(-EINVAL); + filter = kzalloc(sizeof(struct nfacct_filter), GFP_KERNEL); if (!filter) return ERR_PTR(-ENOMEM); diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 740cce4685ac..85b4f7902b49 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -895,7 +895,7 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb, goto out_put; default: ret = -ENOTSUPP; - break; + goto out_put; } } else if (!inst) { ret = -ENODEV; diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c index 1caaccbc306c..7e4063621960 100644 --- a/net/netfilter/xt_physdev.c +++ b/net/netfilter/xt_physdev.c @@ -96,8 +96,7 @@ match_outdev: static int physdev_mt_check(const struct xt_mtchk_param *par) { const struct xt_physdev_info *info = par->matchinfo; - - br_netfilter_enable(); + static bool brnf_probed __read_mostly; if (!(info->bitmask & XT_PHYSDEV_OP_MASK) || info->bitmask & ~XT_PHYSDEV_OP_MASK) @@ -113,6 +112,12 @@ static int physdev_mt_check(const struct xt_mtchk_param *par) if (par->hook_mask & (1 << NF_INET_LOCAL_OUT)) return -EINVAL; } + + if (!brnf_probed) { + brnf_probed = true; + request_module("br_netfilter"); + } + return 0; } diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index ed212ffc1d9d..046ae1caecea 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -153,7 +153,7 @@ static struct sock *nr_find_listener(ax25_address *addr) sk_for_each(s, &nr_list) if (!ax25cmp(&nr_sk(s)->source_addr, addr) && s->sk_state == TCP_LISTEN) { - bh_lock_sock(s); + sock_hold(s); goto found; } s = NULL; @@ -174,7 +174,7 @@ static struct sock *nr_find_socket(unsigned char index, unsigned char id) struct nr_sock *nr = nr_sk(s); if (nr->my_index == index && nr->my_id == id) { - bh_lock_sock(s); + sock_hold(s); goto found; } } @@ -198,7 +198,7 @@ static struct sock *nr_find_peer(unsigned char index, unsigned char id, if (nr->your_index == index && nr->your_id == id && !ax25cmp(&nr->dest_addr, dest)) { - bh_lock_sock(s); + sock_hold(s); goto found; } } @@ -224,7 +224,7 @@ static unsigned short nr_find_next_circuit(void) if (i != 0 && j != 0) { if ((sk=nr_find_socket(i, j)) == NULL) break; - bh_unlock_sock(sk); + sock_put(sk); } id++; @@ -918,6 +918,7 @@ int nr_rx_frame(struct sk_buff *skb, struct net_device *dev) } if (sk != NULL) { + bh_lock_sock(sk); skb_reset_transport_header(skb); if (frametype == NR_CONNACK && skb->len == 22) @@ -927,6 +928,7 @@ int nr_rx_frame(struct sk_buff *skb, struct net_device *dev) ret = nr_process_rx_frame(sk, skb); bh_unlock_sock(sk); + sock_put(sk); return ret; } @@ -958,10 +960,12 @@ int nr_rx_frame(struct sk_buff *skb, struct net_device *dev) (make = nr_make_new(sk)) == NULL) { nr_transmit_refusal(skb, 0); if (sk) - bh_unlock_sock(sk); + sock_put(sk); return 0; } + bh_lock_sock(sk); + window = skb->data[20]; skb->sk = make; @@ -1014,6 +1018,7 @@ int nr_rx_frame(struct sk_buff *skb, struct net_device *dev) sk->sk_data_ready(sk); bh_unlock_sock(sk); + sock_put(sk); nr_insert_socket(make); diff --git a/net/netrom/nr_timer.c b/net/netrom/nr_timer.c index 94d05806a9a2..f0ecaec1ff3d 100644 --- a/net/netrom/nr_timer.c +++ b/net/netrom/nr_timer.c @@ -53,21 +53,21 @@ void nr_start_t1timer(struct sock *sk) { struct nr_sock *nr = nr_sk(sk); - mod_timer(&nr->t1timer, jiffies + nr->t1); + sk_reset_timer(sk, &nr->t1timer, jiffies + nr->t1); } void nr_start_t2timer(struct sock *sk) { struct nr_sock *nr = nr_sk(sk); - mod_timer(&nr->t2timer, jiffies + nr->t2); + sk_reset_timer(sk, &nr->t2timer, jiffies + nr->t2); } void nr_start_t4timer(struct sock *sk) { struct nr_sock *nr = nr_sk(sk); - mod_timer(&nr->t4timer, jiffies + nr->t4); + sk_reset_timer(sk, &nr->t4timer, jiffies + nr->t4); } void nr_start_idletimer(struct sock *sk) @@ -75,37 +75,37 @@ void nr_start_idletimer(struct sock *sk) struct nr_sock *nr = nr_sk(sk); if (nr->idle > 0) - mod_timer(&nr->idletimer, jiffies + nr->idle); + sk_reset_timer(sk, &nr->idletimer, jiffies + nr->idle); } void nr_start_heartbeat(struct sock *sk) { - mod_timer(&sk->sk_timer, jiffies + 5 * HZ); + sk_reset_timer(sk, &sk->sk_timer, jiffies + 5 * HZ); } void nr_stop_t1timer(struct sock *sk) { - del_timer(&nr_sk(sk)->t1timer); + sk_stop_timer(sk, &nr_sk(sk)->t1timer); } void nr_stop_t2timer(struct sock *sk) { - del_timer(&nr_sk(sk)->t2timer); + sk_stop_timer(sk, &nr_sk(sk)->t2timer); } void nr_stop_t4timer(struct sock *sk) { - del_timer(&nr_sk(sk)->t4timer); + sk_stop_timer(sk, &nr_sk(sk)->t4timer); } void nr_stop_idletimer(struct sock *sk) { - del_timer(&nr_sk(sk)->idletimer); + sk_stop_timer(sk, &nr_sk(sk)->idletimer); } void nr_stop_heartbeat(struct sock *sk) { - del_timer(&sk->sk_timer); + sk_stop_timer(sk, &sk->sk_timer); } int nr_t1timer_running(struct sock *sk) diff --git a/net/nfc/llcp_commands.c b/net/nfc/llcp_commands.c index 04f060488686..96277ac37dac 100644 --- a/net/nfc/llcp_commands.c +++ b/net/nfc/llcp_commands.c @@ -419,6 +419,10 @@ int nfc_llcp_send_connect(struct nfc_llcp_sock *sock) sock->service_name, sock->service_name_len, &service_name_tlv_length); + if (!service_name_tlv) { + err = -ENOMEM; + goto error_tlv; + } size += service_name_tlv_length; } @@ -429,9 +433,17 @@ int nfc_llcp_send_connect(struct nfc_llcp_sock *sock) miux_tlv = nfc_llcp_build_tlv(LLCP_TLV_MIUX, (u8 *)&miux, 0, &miux_tlv_length); + if (!miux_tlv) { + err = -ENOMEM; + goto error_tlv; + } size += miux_tlv_length; rw_tlv = nfc_llcp_build_tlv(LLCP_TLV_RW, &rw, 0, &rw_tlv_length); + if (!rw_tlv) { + err = -ENOMEM; + goto error_tlv; + } size += rw_tlv_length; pr_debug("SKB size %d SN length %zu\n", size, sock->service_name_len); @@ -486,9 +498,17 @@ int nfc_llcp_send_cc(struct nfc_llcp_sock *sock) miux_tlv = nfc_llcp_build_tlv(LLCP_TLV_MIUX, (u8 *)&miux, 0, &miux_tlv_length); + if (!miux_tlv) { + err = -ENOMEM; + goto error_tlv; + } size += miux_tlv_length; rw_tlv = nfc_llcp_build_tlv(LLCP_TLV_RW, &rw, 0, &rw_tlv_length); + if (!rw_tlv) { + err = -ENOMEM; + goto error_tlv; + } size += rw_tlv_length; skb = llcp_allocate_pdu(sock, LLCP_PDU_CC, size); diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c index 98876274a1ee..c1334b826dd5 100644 --- a/net/nfc/llcp_core.c +++ b/net/nfc/llcp_core.c @@ -532,10 +532,10 @@ static u8 nfc_llcp_reserve_sdp_ssap(struct nfc_llcp_local *local) static int nfc_llcp_build_gb(struct nfc_llcp_local *local) { - u8 *gb_cur, *version_tlv, version, version_length; - u8 *lto_tlv, lto_length; - u8 *wks_tlv, wks_length; - u8 *miux_tlv, miux_length; + u8 *gb_cur, version, version_length; + u8 lto_length, wks_length, miux_length; + u8 *version_tlv = NULL, *lto_tlv = NULL, + *wks_tlv = NULL, *miux_tlv = NULL; __be16 wks = cpu_to_be16(local->local_wks); u8 gb_len = 0; int ret = 0; @@ -543,17 +543,33 @@ static int nfc_llcp_build_gb(struct nfc_llcp_local *local) version = LLCP_VERSION_11; version_tlv = nfc_llcp_build_tlv(LLCP_TLV_VERSION, &version, 1, &version_length); + if (!version_tlv) { + ret = -ENOMEM; + goto out; + } gb_len += version_length; lto_tlv = nfc_llcp_build_tlv(LLCP_TLV_LTO, &local->lto, 1, <o_length); + if (!lto_tlv) { + ret = -ENOMEM; + goto out; + } gb_len += lto_length; pr_debug("Local wks 0x%lx\n", local->local_wks); wks_tlv = nfc_llcp_build_tlv(LLCP_TLV_WKS, (u8 *)&wks, 2, &wks_length); + if (!wks_tlv) { + ret = -ENOMEM; + goto out; + } gb_len += wks_length; miux_tlv = nfc_llcp_build_tlv(LLCP_TLV_MIUX, (u8 *)&local->miux, 0, &miux_length); + if (!miux_tlv) { + ret = -ENOMEM; + goto out; + } gb_len += miux_length; gb_len += ARRAY_SIZE(llcp_magic); diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 624c4719e404..9b676f8fc16f 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -409,7 +409,7 @@ static int __parse_flow_nlattrs(const struct nlattr *attr, return -EINVAL; } - if (!nz || !is_all_zero(nla_data(nla), expected_len)) { + if (!nz || !is_all_zero(nla_data(nla), nla_len(nla))) { attrs |= 1 << type; a[type] = nla; } @@ -1736,14 +1736,14 @@ static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa, struct sw_flow_actions *acts; int new_acts_size; - int req_size = NLA_ALIGN(attr_len); + size_t req_size = NLA_ALIGN(attr_len); int next_offset = offsetof(struct sw_flow_actions, actions) + (*sfa)->actions_len; if (req_size <= (ksize(*sfa) - next_offset)) goto out; - new_acts_size = ksize(*sfa) * 2; + new_acts_size = max(next_offset + req_size, ksize(*sfa) * 2); if (new_acts_size > MAX_ACTIONS_BUFSIZE) { if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size) { diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 07668f152a3a..7d93228ba1e1 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2511,8 +2511,10 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) sll_addr))) goto out; proto = saddr->sll_protocol; - addr = saddr->sll_addr; + addr = saddr->sll_halen ? saddr->sll_addr : NULL; dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex); + if (addr && dev && saddr->sll_halen < dev->addr_len) + goto out_put; } err = -ENXIO; @@ -2678,8 +2680,10 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr))) goto out; proto = saddr->sll_protocol; - addr = saddr->sll_addr; + addr = saddr->sll_halen ? saddr->sll_addr : NULL; dev = dev_get_by_index(sock_net(sk), saddr->sll_ifindex); + if (addr && dev && saddr->sll_halen < dev->addr_len) + goto out_unlock; } err = -ENXIO; @@ -3151,7 +3155,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, } mutex_lock(&net->packet.sklist_lock); - sk_add_node_rcu(sk, &net->packet.sklist); + sk_add_node_tail_rcu(sk, &net->packet.sklist); mutex_unlock(&net->packet.sklist_lock); preempt_disable(); @@ -4126,7 +4130,7 @@ static struct pgv *alloc_pg_vec(struct tpacket_req *req, int order) struct pgv *pg_vec; int i; - pg_vec = kcalloc(block_nr, sizeof(struct pgv), GFP_KERNEL); + pg_vec = kcalloc(block_nr, sizeof(struct pgv), GFP_KERNEL | __GFP_NOWARN); if (unlikely(!pg_vec)) goto out; @@ -4213,7 +4217,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, rb->frames_per_block = req->tp_block_size / req->tp_frame_size; if (unlikely(rb->frames_per_block == 0)) goto out; - if (unlikely(req->tp_block_size > UINT_MAX / req->tp_block_nr)) + if (unlikely(rb->frames_per_block > UINT_MAX / req->tp_block_nr)) goto out; if (unlikely((rb->frames_per_block * req->tp_block_nr) != req->tp_frame_nr)) diff --git a/net/phonet/pep.c b/net/phonet/pep.c index 850a86cde0b3..f6aa532bcbf6 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -131,7 +131,7 @@ static int pep_indicate(struct sock *sk, u8 id, u8 code, ph->utid = 0; ph->message_id = id; ph->pipe_handle = pn->pipe_handle; - ph->data[0] = code; + ph->error_code = code; return pn_skb_send(sk, skb, NULL); } @@ -152,7 +152,7 @@ static int pipe_handler_request(struct sock *sk, u8 id, u8 code, ph->utid = id; /* whatever */ ph->message_id = id; ph->pipe_handle = pn->pipe_handle; - ph->data[0] = code; + ph->error_code = code; return pn_skb_send(sk, skb, NULL); } @@ -207,7 +207,7 @@ static int pep_ctrlreq_error(struct sock *sk, struct sk_buff *oskb, u8 code, struct pnpipehdr *ph; struct sockaddr_pn dst; u8 data[4] = { - oph->data[0], /* PEP type */ + oph->pep_type, /* PEP type */ code, /* error code, at an unusual offset */ PAD, PAD, }; @@ -220,7 +220,7 @@ static int pep_ctrlreq_error(struct sock *sk, struct sk_buff *oskb, u8 code, ph->utid = oph->utid; ph->message_id = PNS_PEP_CTRL_RESP; ph->pipe_handle = oph->pipe_handle; - ph->data[0] = oph->data[1]; /* CTRL id */ + ph->data0 = oph->data[0]; /* CTRL id */ pn_skb_get_src_sockaddr(oskb, &dst); return pn_skb_send(sk, skb, &dst); @@ -271,17 +271,17 @@ static int pipe_rcv_status(struct sock *sk, struct sk_buff *skb) return -EINVAL; hdr = pnp_hdr(skb); - if (hdr->data[0] != PN_PEP_TYPE_COMMON) { + if (hdr->pep_type != PN_PEP_TYPE_COMMON) { net_dbg_ratelimited("Phonet unknown PEP type: %u\n", - (unsigned int)hdr->data[0]); + (unsigned int)hdr->pep_type); return -EOPNOTSUPP; } - switch (hdr->data[1]) { + switch (hdr->data[0]) { case PN_PEP_IND_FLOW_CONTROL: switch (pn->tx_fc) { case PN_LEGACY_FLOW_CONTROL: - switch (hdr->data[4]) { + switch (hdr->data[3]) { case PEP_IND_BUSY: atomic_set(&pn->tx_credits, 0); break; @@ -291,7 +291,7 @@ static int pipe_rcv_status(struct sock *sk, struct sk_buff *skb) } break; case PN_ONE_CREDIT_FLOW_CONTROL: - if (hdr->data[4] == PEP_IND_READY) + if (hdr->data[3] == PEP_IND_READY) atomic_set(&pn->tx_credits, wake = 1); break; } @@ -300,12 +300,12 @@ static int pipe_rcv_status(struct sock *sk, struct sk_buff *skb) case PN_PEP_IND_ID_MCFC_GRANT_CREDITS: if (pn->tx_fc != PN_MULTI_CREDIT_FLOW_CONTROL) break; - atomic_add(wake = hdr->data[4], &pn->tx_credits); + atomic_add(wake = hdr->data[3], &pn->tx_credits); break; default: net_dbg_ratelimited("Phonet unknown PEP indication: %u\n", - (unsigned int)hdr->data[1]); + (unsigned int)hdr->data[0]); return -EOPNOTSUPP; } if (wake) @@ -317,7 +317,7 @@ static int pipe_rcv_created(struct sock *sk, struct sk_buff *skb) { struct pep_sock *pn = pep_sk(sk); struct pnpipehdr *hdr = pnp_hdr(skb); - u8 n_sb = hdr->data[0]; + u8 n_sb = hdr->data0; pn->rx_fc = pn->tx_fc = PN_LEGACY_FLOW_CONTROL; __skb_pull(skb, sizeof(*hdr)); @@ -505,7 +505,7 @@ static int pep_connresp_rcv(struct sock *sk, struct sk_buff *skb) return -ECONNREFUSED; /* Parse sub-blocks */ - n_sb = hdr->data[4]; + n_sb = hdr->data[3]; while (n_sb > 0) { u8 type, buf[6], len = sizeof(buf); const u8 *data = pep_get_sb(skb, &type, &len, buf); @@ -738,7 +738,7 @@ static int pipe_do_remove(struct sock *sk) ph->utid = 0; ph->message_id = PNS_PIPE_REMOVE_REQ; ph->pipe_handle = pn->pipe_handle; - ph->data[0] = PAD; + ph->data0 = PAD; return pn_skb_send(sk, skb, NULL); } @@ -815,7 +815,7 @@ static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp) peer_type = hdr->other_pep_type << 8; /* Parse sub-blocks (options) */ - n_sb = hdr->data[4]; + n_sb = hdr->data[3]; while (n_sb > 0) { u8 type, buf[1], len = sizeof(buf); const u8 *data = pep_get_sb(skb, &type, &len, buf); @@ -1106,7 +1106,7 @@ static int pipe_skb_send(struct sock *sk, struct sk_buff *skb) ph->utid = 0; if (pn->aligned) { ph->message_id = PNS_PIPE_ALIGNED_DATA; - ph->data[0] = 0; /* padding */ + ph->data0 = 0; /* padding */ } else ph->message_id = PNS_PIPE_DATA; ph->pipe_handle = pn->pipe_handle; diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 18e50a8fc05f..554d4b461983 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -346,7 +346,7 @@ static void rds_tcp_kill_sock(struct net *net) list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) { struct net *c_net = read_pnet(&tc->conn->c_net); - if (net != c_net || !tc->t_sock) + if (net != c_net) continue; list_move_tail(&tc->t_tcp_node, &tmp_list); } diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index 0fc76d845103..9f704a7f2a28 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c @@ -848,6 +848,7 @@ void rose_link_device_down(struct net_device *dev) /* * Route a frame to an appropriate AX.25 connection. + * A NULL ax25_cb indicates an internally generated frame. */ int rose_route_frame(struct sk_buff *skb, ax25_cb *ax25) { @@ -865,6 +866,10 @@ int rose_route_frame(struct sk_buff *skb, ax25_cb *ax25) if (skb->len < ROSE_MIN_LEN) return res; + + if (!ax25) + return rose_loopback_queue(skb, NULL); + frametype = skb->data[2]; lci = ((skb->data[0] << 8) & 0xF00) + ((skb->data[1] << 0) & 0x0FF); if (frametype == ROSE_CALL_REQUEST && diff --git a/net/rose/rose_subr.c b/net/rose/rose_subr.c index 7ca57741b2fb..7849f286bb93 100644 --- a/net/rose/rose_subr.c +++ b/net/rose/rose_subr.c @@ -105,16 +105,17 @@ void rose_write_internal(struct sock *sk, int frametype) struct sk_buff *skb; unsigned char *dptr; unsigned char lci1, lci2; - char buffer[100]; - int len, faclen = 0; + int maxfaclen = 0; + int len, faclen; + int reserve; - len = AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN + ROSE_MIN_LEN + 1; + reserve = AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN + 1; + len = ROSE_MIN_LEN; switch (frametype) { case ROSE_CALL_REQUEST: len += 1 + ROSE_ADDR_LEN + ROSE_ADDR_LEN; - faclen = rose_create_facilities(buffer, rose); - len += faclen; + maxfaclen = 256; break; case ROSE_CALL_ACCEPTED: case ROSE_CLEAR_REQUEST: @@ -123,15 +124,16 @@ void rose_write_internal(struct sock *sk, int frametype) break; } - if ((skb = alloc_skb(len, GFP_ATOMIC)) == NULL) + skb = alloc_skb(reserve + len + maxfaclen, GFP_ATOMIC); + if (!skb) return; /* * Space for AX.25 header and PID. */ - skb_reserve(skb, AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN + 1); + skb_reserve(skb, reserve); - dptr = skb_put(skb, skb_tailroom(skb)); + dptr = skb_put(skb, len); lci1 = (rose->lci >> 8) & 0x0F; lci2 = (rose->lci >> 0) & 0xFF; @@ -146,7 +148,8 @@ void rose_write_internal(struct sock *sk, int frametype) dptr += ROSE_ADDR_LEN; memcpy(dptr, &rose->source_addr, ROSE_ADDR_LEN); dptr += ROSE_ADDR_LEN; - memcpy(dptr, buffer, faclen); + faclen = rose_create_facilities(dptr, rose); + skb_put(skb, faclen); dptr += faclen; break; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 6d340cd6e2a7..b379c330a338 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1823,7 +1823,6 @@ done: int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res, bool compat_mode) { - __be16 protocol = tc_skb_protocol(skb); #ifdef CONFIG_NET_CLS_ACT const struct tcf_proto *old_tp = tp; int limit = 0; @@ -1831,6 +1830,7 @@ int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp, reclassify: #endif for (; tp; tp = rcu_dereference_bh(tp->next)) { + __be16 protocol = tc_skb_protocol(skb); int err; if (tp->protocol != protocol && @@ -1857,7 +1857,6 @@ reset: } tp = old_tp; - protocol = tc_skb_protocol(skb); goto reclassify; #endif } diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 743ff23885da..7acf1f2b8dfc 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -432,6 +432,9 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) int count = 1; int rc = NET_XMIT_SUCCESS; + /* Do not fool qdisc_drop_all() */ + skb->prev = NULL; + /* Random duplication */ if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor)) ++count; diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 5ca8309ea7b1..9fa0b0dc3868 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -97,10 +97,9 @@ static int sctp_inet6addr_event(struct notifier_block *this, unsigned long ev, switch (ev) { case NETDEV_UP: - addr = kmalloc(sizeof(struct sctp_sockaddr_entry), GFP_ATOMIC); + addr = kzalloc(sizeof(*addr), GFP_ATOMIC); if (addr) { addr->a.v6.sin6_family = AF_INET6; - addr->a.v6.sin6_port = 0; addr->a.v6.sin6_addr = ifa->addr; addr->a.v6.sin6_scope_id = ifa->idev->dev->ifindex; addr->valid = 1; @@ -411,7 +410,6 @@ static void sctp_v6_copy_addrlist(struct list_head *addrlist, addr = kzalloc(sizeof(*addr), GFP_ATOMIC); if (addr) { addr->a.v6.sin6_family = AF_INET6; - addr->a.v6.sin6_port = 0; addr->a.v6.sin6_addr = ifp->addr; addr->a.v6.sin6_scope_id = dev->ifindex; addr->valid = 1; diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index dc030efa4447..247d1888c386 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -151,7 +151,6 @@ static void sctp_v4_copy_addrlist(struct list_head *addrlist, addr = kzalloc(sizeof(*addr), GFP_ATOMIC); if (addr) { addr->a.v4.sin_family = AF_INET; - addr->a.v4.sin_port = 0; addr->a.v4.sin_addr.s_addr = ifa->ifa_local; addr->valid = 1; INIT_LIST_HEAD(&addr->list); @@ -599,6 +598,7 @@ out: static int sctp_v4_addr_to_user(struct sctp_sock *sp, union sctp_addr *addr) { /* No address mapping for V4 sockets */ + memset(addr->v4.sin_zero, 0, sizeof(addr->v4.sin_zero)); return sizeof(struct sockaddr_in); } @@ -775,10 +775,9 @@ static int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev, switch (ev) { case NETDEV_UP: - addr = kmalloc(sizeof(struct sctp_sockaddr_entry), GFP_ATOMIC); + addr = kzalloc(sizeof(*addr), GFP_ATOMIC); if (addr) { addr->a.v4.sin_family = AF_INET; - addr->a.v4.sin_port = 0; addr->a.v4.sin_addr.s_addr = ifa->ifa_local; addr->valid = 1; spin_lock_bh(&net->sctp.local_addr_lock); diff --git a/net/socket.c b/net/socket.c index 5fc5362fa9a3..6412baf4da27 100644 --- a/net/socket.c +++ b/net/socket.c @@ -470,27 +470,15 @@ static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed) static ssize_t sockfs_getxattr(struct dentry *dentry, const char *name, void *value, size_t size) { - const char *proto_name; - size_t proto_size; - int error; - - error = -ENODATA; - if (!strncmp(name, XATTR_NAME_SOCKPROTONAME, XATTR_NAME_SOCKPROTONAME_LEN)) { - proto_name = dentry->d_name.name; - proto_size = strlen(proto_name); - + if (!strcmp(name, XATTR_NAME_SOCKPROTONAME)) { if (value) { - error = -ERANGE; - if (proto_size + 1 > size) - goto out; - - strncpy(value, proto_name, proto_size + 1); + if (dentry->d_name.len + 1 > size) + return -ERANGE; + memcpy(value, dentry->d_name.name, dentry->d_name.len + 1); } - error = proto_size + 1; + return dentry->d_name.len + 1; } - -out: - return error; + return -EOPNOTSUPP; } static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer, @@ -590,6 +578,7 @@ static void __sock_release(struct socket *sock, struct inode *inode) if (inode) inode_lock(inode); sock->ops->release(sock); + sock->sk = NULL; if (inode) inode_unlock(inode); sock->ops = NULL; diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 036bbf2b44c1..b5291ea54a3d 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -1105,7 +1105,7 @@ static int svcauth_gss_legacy_init(struct svc_rqst *rqstp, struct kvec *resv = &rqstp->rq_res.head[0]; struct rsi *rsip, rsikey; int ret; - struct sunrpc_net *sn = net_generic(rqstp->rq_xprt->xpt_net, sunrpc_net_id); + struct sunrpc_net *sn = net_generic(SVC_NET(rqstp), sunrpc_net_id); memset(&rsikey, 0, sizeof(rsikey)); ret = gss_read_verf(gc, argv, authp, @@ -1216,7 +1216,7 @@ static int svcauth_gss_proxy_init(struct svc_rqst *rqstp, uint64_t handle; int status; int ret; - struct net *net = rqstp->rq_xprt->xpt_net; + struct net *net = SVC_NET(rqstp); struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); memset(&ud, 0, sizeof(ud)); @@ -1406,7 +1406,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) __be32 *rpcstart; __be32 *reject_stat = resv->iov_base + resv->iov_len; int ret; - struct sunrpc_net *sn = net_generic(rqstp->rq_xprt->xpt_net, sunrpc_net_id); + struct sunrpc_net *sn = net_generic(SVC_NET(rqstp), sunrpc_net_id); dprintk("RPC: svcauth_gss: argv->iov_len = %zd\n", argv->iov_len); @@ -1694,7 +1694,7 @@ svcauth_gss_release(struct svc_rqst *rqstp) struct rpc_gss_wire_cred *gc = &gsd->clcred; struct xdr_buf *resbuf = &rqstp->rq_res; int stat = -EINVAL; - struct sunrpc_net *sn = net_generic(rqstp->rq_xprt->xpt_net, sunrpc_net_id); + struct sunrpc_net *sn = net_generic(SVC_NET(rqstp), sunrpc_net_id); if (gc->gc_proc != RPC_GSS_PROC_DATA) goto out; diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 63fb5ee212cf..af17b00145e1 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -54,6 +54,11 @@ static void cache_init(struct cache_head *h, struct cache_detail *detail) h->last_refresh = now; } +static void cache_fresh_locked(struct cache_head *head, time_t expiry, + struct cache_detail *detail); +static void cache_fresh_unlocked(struct cache_head *head, + struct cache_detail *detail); + struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail, struct cache_head *key, int hash) { @@ -95,6 +100,7 @@ struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail, if (cache_is_expired(detail, tmp)) { hlist_del_init(&tmp->cache_list); detail->entries --; + cache_fresh_locked(tmp, 0, detail); freeme = tmp; break; } @@ -110,8 +116,10 @@ struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail, cache_get(new); write_unlock(&detail->hash_lock); - if (freeme) + if (freeme) { + cache_fresh_unlocked(freeme, detail); cache_put(freeme, detail); + } return new; } EXPORT_SYMBOL_GPL(sunrpc_cache_lookup); diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index cf5770d8f49a..c89626b2afff 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -772,6 +772,12 @@ void rpcb_getport_async(struct rpc_task *task) case RPCBVERS_3: map->r_netid = xprt->address_strings[RPC_DISPLAY_NETID]; map->r_addr = rpc_sockaddr2uaddr(sap, GFP_ATOMIC); + if (!map->r_addr) { + status = -ENOMEM; + dprintk("RPC: %5u %s: no memory available\n", + task->tk_pid, __func__); + goto bailout_free_args; + } map->r_owner = ""; break; case RPCBVERS_2: @@ -794,6 +800,8 @@ void rpcb_getport_async(struct rpc_task *task) rpc_put_task(child); return; +bailout_free_args: + kfree(map); bailout_release_client: rpc_release_client(rpcb_clnt); bailout_nofree: diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index c5b0cb4f4056..41f6e964fe91 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1062,6 +1062,8 @@ void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...) static __printf(2,3) void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...) {} #endif +extern void svc_tcp_prep_reply_hdr(struct svc_rqst *); + /* * Common routine for processing the RPC request. */ @@ -1091,7 +1093,8 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) clear_bit(RQ_DROPME, &rqstp->rq_flags); /* Setup reply header */ - rqstp->rq_xprt->xpt_ops->xpo_prep_reply_hdr(rqstp); + if (rqstp->rq_prot == IPPROTO_TCP) + svc_tcp_prep_reply_hdr(rqstp); svc_putu32(resv, rqstp->rq_xid); @@ -1138,7 +1141,8 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) case SVC_DENIED: goto err_bad_auth; case SVC_CLOSE: - if (test_bit(XPT_TEMP, &rqstp->rq_xprt->xpt_flags)) + if (rqstp->rq_xprt && + test_bit(XPT_TEMP, &rqstp->rq_xprt->xpt_flags)) svc_close_xprt(rqstp->rq_xprt); case SVC_DROP: goto dropit; @@ -1360,10 +1364,10 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req, dprintk("svc: %s(%p)\n", __func__, req); /* Build the svc_rqst used by the common processing routine */ - rqstp->rq_xprt = serv->sv_bc_xprt; rqstp->rq_xid = req->rq_xid; rqstp->rq_prot = req->rq_xprt->prot; rqstp->rq_server = serv; + rqstp->rq_bc_net = req->rq_xprt->xprt_net; rqstp->rq_addrlen = sizeof(req->rq_xprt->addr); memcpy(&rqstp->rq_addr, &req->rq_xprt->addr, rqstp->rq_addrlen); diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 71f15da72f02..2b8e80c721db 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -454,10 +454,11 @@ out: */ void svc_reserve(struct svc_rqst *rqstp, int space) { + struct svc_xprt *xprt = rqstp->rq_xprt; + space += rqstp->rq_res.head[0].iov_len; - if (space < rqstp->rq_reserved) { - struct svc_xprt *xprt = rqstp->rq_xprt; + if (xprt && space < rqstp->rq_reserved) { atomic_sub((rqstp->rq_reserved - space), &xprt->xpt_reserved); rqstp->rq_reserved = space; diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 1413cdcc131c..0a9fe033132c 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -614,7 +614,7 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp) /* Don't enable netstamp, sunrpc doesn't need that much accuracy */ } - svsk->sk_sk->sk_stamp = skb->tstamp; + sock_write_timestamp(svsk->sk_sk, skb->tstamp); set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); /* there may be more data... */ len = skb->len - sizeof(struct udphdr); @@ -1240,7 +1240,7 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp) /* * Setup response header. TCP has a 4B record length field. */ -static void svc_tcp_prep_reply_hdr(struct svc_rqst *rqstp) +void svc_tcp_prep_reply_hdr(struct svc_rqst *rqstp) { struct kvec *resv = &rqstp->rq_res.head[0]; diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 2e98f4a243e5..112c191b8336 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -758,8 +758,15 @@ void xprt_connect(struct rpc_task *task) return; if (xprt_test_and_set_connecting(xprt)) return; - xprt->stat.connect_start = jiffies; - xprt->ops->connect(xprt, task); + /* Race breaker */ + if (!xprt_connected(xprt)) { + xprt->stat.connect_start = jiffies; + xprt->ops->connect(xprt, task); + } else { + xprt_clear_connecting(xprt); + task->tk_status = 0; + rpc_wake_up_queued_task(&xprt->pending, task); + } } xprt_release_write(xprt, task); } diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index f86c6555a539..e9653c42cdd1 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -87,6 +87,11 @@ static int tipc_skb_tailroom(struct sk_buff *skb) return limit; } +static inline int TLV_GET_DATA_LEN(struct tlv_desc *tlv) +{ + return TLV_GET_LEN(tlv) - TLV_SPACE(0); +} + static int tipc_add_tlv(struct sk_buff *skb, u16 type, void *data, u16 len) { struct tlv_desc *tlv = (struct tlv_desc *)skb_tail_pointer(skb); @@ -166,6 +171,11 @@ static struct sk_buff *tipc_get_err_tlv(char *str) return buf; } +static inline bool string_is_valid(char *s, int len) +{ + return memchr(s, '\0', len) ? true : false; +} + static int __tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd, struct tipc_nl_compat_msg *msg, struct sk_buff *arg) @@ -364,6 +374,7 @@ static int tipc_nl_compat_bearer_enable(struct tipc_nl_compat_cmd_doit *cmd, struct nlattr *prop; struct nlattr *bearer; struct tipc_bearer_config *b; + int len; b = (struct tipc_bearer_config *)TLV_DATA(msg->req); @@ -371,6 +382,10 @@ static int tipc_nl_compat_bearer_enable(struct tipc_nl_compat_cmd_doit *cmd, if (!bearer) return -EMSGSIZE; + len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_BEARER_NAME); + if (!string_is_valid(b->name, len)) + return -EINVAL; + if (nla_put_string(skb, TIPC_NLA_BEARER_NAME, b->name)) return -EMSGSIZE; @@ -396,6 +411,7 @@ static int tipc_nl_compat_bearer_disable(struct tipc_nl_compat_cmd_doit *cmd, { char *name; struct nlattr *bearer; + int len; name = (char *)TLV_DATA(msg->req); @@ -403,6 +419,10 @@ static int tipc_nl_compat_bearer_disable(struct tipc_nl_compat_cmd_doit *cmd, if (!bearer) return -EMSGSIZE; + len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_BEARER_NAME); + if (!string_is_valid(name, len)) + return -EINVAL; + if (nla_put_string(skb, TIPC_NLA_BEARER_NAME, name)) return -EMSGSIZE; @@ -462,6 +482,7 @@ static int tipc_nl_compat_link_stat_dump(struct tipc_nl_compat_msg *msg, struct nlattr *link[TIPC_NLA_LINK_MAX + 1]; struct nlattr *prop[TIPC_NLA_PROP_MAX + 1]; struct nlattr *stats[TIPC_NLA_STATS_MAX + 1]; + int len; nla_parse_nested(link, TIPC_NLA_LINK_MAX, attrs[TIPC_NLA_LINK], NULL); @@ -472,6 +493,11 @@ static int tipc_nl_compat_link_stat_dump(struct tipc_nl_compat_msg *msg, NULL); name = (char *)TLV_DATA(msg->req); + + len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_LINK_NAME); + if (!string_is_valid(name, len)) + return -EINVAL; + if (strcmp(name, nla_data(link[TIPC_NLA_LINK_NAME])) != 0) return 0; @@ -605,6 +631,7 @@ static int tipc_nl_compat_media_set(struct sk_buff *skb, struct nlattr *prop; struct nlattr *media; struct tipc_link_config *lc; + int len; lc = (struct tipc_link_config *)TLV_DATA(msg->req); @@ -612,6 +639,10 @@ static int tipc_nl_compat_media_set(struct sk_buff *skb, if (!media) return -EMSGSIZE; + len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_MEDIA_NAME); + if (!string_is_valid(lc->name, len)) + return -EINVAL; + if (nla_put_string(skb, TIPC_NLA_MEDIA_NAME, lc->name)) return -EMSGSIZE; @@ -632,6 +663,7 @@ static int tipc_nl_compat_bearer_set(struct sk_buff *skb, struct nlattr *prop; struct nlattr *bearer; struct tipc_link_config *lc; + int len; lc = (struct tipc_link_config *)TLV_DATA(msg->req); @@ -639,6 +671,10 @@ static int tipc_nl_compat_bearer_set(struct sk_buff *skb, if (!bearer) return -EMSGSIZE; + len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_MEDIA_NAME); + if (!string_is_valid(lc->name, len)) + return -EINVAL; + if (nla_put_string(skb, TIPC_NLA_BEARER_NAME, lc->name)) return -EMSGSIZE; @@ -687,9 +723,14 @@ static int tipc_nl_compat_link_set(struct tipc_nl_compat_cmd_doit *cmd, struct tipc_link_config *lc; struct tipc_bearer *bearer; struct tipc_media *media; + int len; lc = (struct tipc_link_config *)TLV_DATA(msg->req); + len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_LINK_NAME); + if (!string_is_valid(lc->name, len)) + return -EINVAL; + media = tipc_media_find(lc->name); if (media) { cmd->doit = &tipc_nl_media_set; @@ -711,6 +752,7 @@ static int tipc_nl_compat_link_reset_stats(struct tipc_nl_compat_cmd_doit *cmd, { char *name; struct nlattr *link; + int len; name = (char *)TLV_DATA(msg->req); @@ -718,6 +760,10 @@ static int tipc_nl_compat_link_reset_stats(struct tipc_nl_compat_cmd_doit *cmd, if (!link) return -EMSGSIZE; + len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_LINK_NAME); + if (!string_is_valid(name, len)) + return -EINVAL; + if (nla_put_string(skb, TIPC_NLA_LINK_NAME, name)) return -EMSGSIZE; @@ -739,6 +785,8 @@ static int tipc_nl_compat_name_table_dump_header(struct tipc_nl_compat_msg *msg) }; ntq = (struct tipc_name_table_query *)TLV_DATA(msg->req); + if (TLV_GET_DATA_LEN(msg->req) < sizeof(struct tipc_name_table_query)) + return -EINVAL; depth = ntohl(ntq->depth); @@ -1117,7 +1165,7 @@ static int tipc_nl_compat_recv(struct sk_buff *skb, struct genl_info *info) } len = nlmsg_attrlen(req_nlh, GENL_HDRLEN + TIPC_GENL_HDRLEN); - if (len && !TLV_OK(msg.req, len)) { + if (!len || !TLV_OK(msg.req, len)) { msg.rep = tipc_get_err_tlv(TIPC_CFG_NOT_SUPPORTED); err = -EOPNOTSUPP; goto send; diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index f9ff73a8d815..500c9e614a06 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -337,7 +337,7 @@ int tipc_topsrv_start(struct net *net) topsrv->tipc_conn_new = tipc_subscrb_connect_cb; topsrv->tipc_conn_shutdown = tipc_subscrb_shutdown_cb; - strncpy(topsrv->name, name, strlen(name) + 1); + strscpy(topsrv->name, name, sizeof(topsrv->name)); tn->topsrv = topsrv; atomic_set(&tn->subscription_count, 0); diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index c6b1eec94911..b1a72615fdc3 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -890,7 +890,7 @@ retry: addr->hash ^= sk->sk_type; __unix_remove_socket(sk); - u->addr = addr; + smp_store_release(&u->addr, addr); __unix_insert_socket(&unix_socket_table[addr->hash], sk); spin_unlock(&unix_table_lock); err = 0; @@ -1060,7 +1060,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) err = 0; __unix_remove_socket(sk); - u->addr = addr; + smp_store_release(&u->addr, addr); __unix_insert_socket(list, sk); out_unlock: @@ -1331,15 +1331,29 @@ restart: RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq); otheru = unix_sk(other); - /* copy address information from listening to new sock*/ - if (otheru->addr) { - atomic_inc(&otheru->addr->refcnt); - newu->addr = otheru->addr; - } + /* copy address information from listening to new sock + * + * The contents of *(otheru->addr) and otheru->path + * are seen fully set up here, since we have found + * otheru in hash under unix_table_lock. Insertion + * into the hash chain we'd found it in had been done + * in an earlier critical area protected by unix_table_lock, + * the same one where we'd set *(otheru->addr) contents, + * as well as otheru->path and otheru->addr itself. + * + * Using smp_store_release() here to set newu->addr + * is enough to make those stores, as well as stores + * to newu->path visible to anyone who gets newu->addr + * by smp_load_acquire(). IOW, the same warranties + * as for unix_sock instances bound in unix_bind() or + * in unix_autobind(). + */ if (otheru->path.dentry) { path_get(&otheru->path); newu->path = otheru->path; } + atomic_inc(&otheru->addr->refcnt); + smp_store_release(&newu->addr, otheru->addr); /* Set credentials */ copy_peercred(sk, other); @@ -1452,7 +1466,7 @@ out: static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer) { struct sock *sk = sock->sk; - struct unix_sock *u; + struct unix_address *addr; DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr); int err = 0; @@ -1467,19 +1481,15 @@ static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_ sock_hold(sk); } - u = unix_sk(sk); - unix_state_lock(sk); - if (!u->addr) { + addr = smp_load_acquire(&unix_sk(sk)->addr); + if (!addr) { sunaddr->sun_family = AF_UNIX; sunaddr->sun_path[0] = 0; *uaddr_len = sizeof(short); } else { - struct unix_address *addr = u->addr; - *uaddr_len = addr->len; memcpy(sunaddr, addr->name, *uaddr_len); } - unix_state_unlock(sk); sock_put(sk); out: return err; @@ -2093,11 +2103,11 @@ static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg, static void unix_copy_addr(struct msghdr *msg, struct sock *sk) { - struct unix_sock *u = unix_sk(sk); + struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr); - if (u->addr) { - msg->msg_namelen = u->addr->len; - memcpy(msg->msg_name, u->addr->name, u->addr->len); + if (addr) { + msg->msg_namelen = addr->len; + memcpy(msg->msg_name, addr->name, addr->len); } } @@ -2820,7 +2830,7 @@ static int unix_seq_show(struct seq_file *seq, void *v) (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING), sock_i_ino(s)); - if (u->addr) { + if (u->addr) { // under unix_table_lock here int i, len; seq_putc(seq, ' '); diff --git a/net/unix/diag.c b/net/unix/diag.c index 384c84e83462..3183d9b8ab33 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -10,7 +10,8 @@ static int sk_diag_dump_name(struct sock *sk, struct sk_buff *nlskb) { - struct unix_address *addr = unix_sk(sk)->addr; + /* might or might not have unix_table_lock */ + struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr); if (!addr) return 0; diff --git a/net/vmw_vsock/Kconfig b/net/vmw_vsock/Kconfig index 14810abedc2e..8831e7c42167 100644 --- a/net/vmw_vsock/Kconfig +++ b/net/vmw_vsock/Kconfig @@ -26,3 +26,23 @@ config VMWARE_VMCI_VSOCKETS To compile this driver as a module, choose M here: the module will be called vmw_vsock_vmci_transport. If unsure, say N. + +config VIRTIO_VSOCKETS + tristate "virtio transport for Virtual Sockets" + depends on VSOCKETS && VIRTIO + select VIRTIO_VSOCKETS_COMMON + help + This module implements a virtio transport for Virtual Sockets. + + Enable this transport if your Virtual Machine host supports Virtual + Sockets over virtio. + + To compile this driver as a module, choose M here: the module will be + called vmw_vsock_virtio_transport. If unsure, say N. + +config VIRTIO_VSOCKETS_COMMON + tristate + help + This option is selected by any driver which needs to access + the virtio_vsock. The module will be called + vmw_vsock_virtio_transport_common. diff --git a/net/vmw_vsock/Makefile b/net/vmw_vsock/Makefile index 2ce52d70f224..bc27c70e0e59 100644 --- a/net/vmw_vsock/Makefile +++ b/net/vmw_vsock/Makefile @@ -1,7 +1,13 @@ obj-$(CONFIG_VSOCKETS) += vsock.o obj-$(CONFIG_VMWARE_VMCI_VSOCKETS) += vmw_vsock_vmci_transport.o +obj-$(CONFIG_VIRTIO_VSOCKETS) += vmw_vsock_virtio_transport.o +obj-$(CONFIG_VIRTIO_VSOCKETS_COMMON) += vmw_vsock_virtio_transport_common.o vsock-y += af_vsock.o vsock_addr.o vmw_vsock_vmci_transport-y += vmci_transport.o vmci_transport_notify.o \ vmci_transport_notify_qstate.o + +vmw_vsock_virtio_transport-y += virtio_transport.o + +vmw_vsock_virtio_transport_common-y += virtio_transport_common.o diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 7f1d166ce612..7566395e526d 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -61,6 +61,14 @@ * function will also cleanup rejected sockets, those that reach the connected * state but leave it before they have been accepted. * + * - Lock ordering for pending or accept queue sockets is: + * + * lock_sock(listener); + * lock_sock_nested(pending, SINGLE_DEPTH_NESTING); + * + * Using explicit nested locking keeps lockdep happy since normally only one + * lock of a given class may be taken at a time. + * * - Sockets created by user action will be cleaned up when the user process * calls close(2), causing our release implementation to be called. Our release * implementation will perform some cleanup then drop the last reference so our @@ -336,6 +344,16 @@ static bool vsock_in_connected_table(struct vsock_sock *vsk) return ret; } +void vsock_remove_sock(struct vsock_sock *vsk) +{ + if (vsock_in_bound_table(vsk)) + vsock_remove_bound(vsk); + + if (vsock_in_connected_table(vsk)) + vsock_remove_connected(vsk); +} +EXPORT_SYMBOL_GPL(vsock_remove_sock); + void vsock_for_each_connected_socket(void (*fn)(struct sock *sk)) { int i; @@ -443,10 +461,12 @@ static void vsock_pending_work(struct work_struct *work) cleanup = true; lock_sock(listener); - lock_sock(sk); + lock_sock_nested(sk, SINGLE_DEPTH_NESTING); if (vsock_is_pending(sk)) { vsock_remove_pending(listener, sk); + + listener->sk_ack_backlog--; } else if (!vsk->rejected) { /* We are not on the pending list and accept() did not reject * us, so we must have been accepted by our user process. We @@ -457,8 +477,6 @@ static void vsock_pending_work(struct work_struct *work) goto out; } - listener->sk_ack_backlog--; - /* We need to remove ourself from the global connected sockets list so * incoming packets can't find this socket, and to reduce the reference * count. @@ -655,12 +673,6 @@ static void __vsock_release(struct sock *sk) vsk = vsock_sk(sk); pending = NULL; /* Compiler warning. */ - if (vsock_in_bound_table(vsk)) - vsock_remove_bound(vsk); - - if (vsock_in_connected_table(vsk)) - vsock_remove_connected(vsk); - transport->release(vsk); lock_sock(sk); @@ -1092,10 +1104,19 @@ static const struct proto_ops vsock_dgram_ops = { .sendpage = sock_no_sendpage, }; +static int vsock_transport_cancel_pkt(struct vsock_sock *vsk) +{ + if (!transport->cancel_pkt) + return -EOPNOTSUPP; + + return transport->cancel_pkt(vsk); +} + static void vsock_connect_timeout(struct work_struct *work) { struct sock *sk; struct vsock_sock *vsk; + int cancel = 0; vsk = container_of(work, struct vsock_sock, connect_work.work); sk = sk_vsock(vsk); @@ -1106,8 +1127,11 @@ static void vsock_connect_timeout(struct work_struct *work) sk->sk_state = SS_UNCONNECTED; sk->sk_err = ETIMEDOUT; sk->sk_error_report(sk); + cancel = 1; } release_sock(sk); + if (cancel) + vsock_transport_cancel_pkt(vsk); sock_put(sk); } @@ -1212,11 +1236,13 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr, err = sock_intr_errno(timeout); sk->sk_state = SS_UNCONNECTED; sock->state = SS_UNCONNECTED; + vsock_transport_cancel_pkt(vsk); goto out_wait; } else if (timeout == 0) { err = -ETIMEDOUT; sk->sk_state = SS_UNCONNECTED; sock->state = SS_UNCONNECTED; + vsock_transport_cancel_pkt(vsk); goto out_wait; } @@ -1293,7 +1319,7 @@ static int vsock_accept(struct socket *sock, struct socket *newsock, int flags) if (connected) { listener->sk_ack_backlog--; - lock_sock(connected); + lock_sock_nested(connected, SINGLE_DEPTH_NESTING); vconnected = vsock_sk(connected); /* If the listener socket has received an error, then we should @@ -1983,7 +2009,16 @@ void vsock_core_exit(void) } EXPORT_SYMBOL_GPL(vsock_core_exit); +const struct vsock_transport *vsock_core_get_transport(void) +{ + /* vsock_register_mutex not taken since only the transport uses this + * function and only while registered. + */ + return transport; +} +EXPORT_SYMBOL_GPL(vsock_core_get_transport); + MODULE_AUTHOR("VMware, Inc."); MODULE_DESCRIPTION("VMware Virtual Socket Family"); -MODULE_VERSION("1.0.1.0-k"); +MODULE_VERSION("1.0.2.0-k"); MODULE_LICENSE("GPL v2"); diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c new file mode 100644 index 000000000000..936d7eee62d0 --- /dev/null +++ b/net/vmw_vsock/virtio_transport.c @@ -0,0 +1,620 @@ +/* + * virtio transport for vsock + * + * Copyright (C) 2013-2015 Red Hat, Inc. + * Author: Asias He + * Stefan Hajnoczi + * + * Some of the code is take from Gerd Hoffmann 's + * early virtio-vsock proof-of-concept bits. + * + * This work is licensed under the terms of the GNU GPL, version 2. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static struct workqueue_struct *virtio_vsock_workqueue; +static struct virtio_vsock *the_virtio_vsock; +static DEFINE_MUTEX(the_virtio_vsock_mutex); /* protects the_virtio_vsock */ + +struct virtio_vsock { + struct virtio_device *vdev; + struct virtqueue *vqs[VSOCK_VQ_MAX]; + + /* Virtqueue processing is deferred to a workqueue */ + struct work_struct tx_work; + struct work_struct rx_work; + struct work_struct event_work; + + /* The following fields are protected by tx_lock. vqs[VSOCK_VQ_TX] + * must be accessed with tx_lock held. + */ + struct mutex tx_lock; + + struct work_struct send_pkt_work; + spinlock_t send_pkt_list_lock; + struct list_head send_pkt_list; + + atomic_t queued_replies; + + /* The following fields are protected by rx_lock. vqs[VSOCK_VQ_RX] + * must be accessed with rx_lock held. + */ + struct mutex rx_lock; + int rx_buf_nr; + int rx_buf_max_nr; + + /* The following fields are protected by event_lock. + * vqs[VSOCK_VQ_EVENT] must be accessed with event_lock held. + */ + struct mutex event_lock; + struct virtio_vsock_event event_list[8]; + + u32 guest_cid; +}; + +static struct virtio_vsock *virtio_vsock_get(void) +{ + return the_virtio_vsock; +} + +static u32 virtio_transport_get_local_cid(void) +{ + struct virtio_vsock *vsock = virtio_vsock_get(); + + return vsock->guest_cid; +} + +static void +virtio_transport_send_pkt_work(struct work_struct *work) +{ + struct virtio_vsock *vsock = + container_of(work, struct virtio_vsock, send_pkt_work); + struct virtqueue *vq; + bool added = false; + bool restart_rx = false; + + mutex_lock(&vsock->tx_lock); + + vq = vsock->vqs[VSOCK_VQ_TX]; + + for (;;) { + struct virtio_vsock_pkt *pkt; + struct scatterlist hdr, buf, *sgs[2]; + int ret, in_sg = 0, out_sg = 0; + bool reply; + + spin_lock_bh(&vsock->send_pkt_list_lock); + if (list_empty(&vsock->send_pkt_list)) { + spin_unlock_bh(&vsock->send_pkt_list_lock); + break; + } + + pkt = list_first_entry(&vsock->send_pkt_list, + struct virtio_vsock_pkt, list); + list_del_init(&pkt->list); + spin_unlock_bh(&vsock->send_pkt_list_lock); + + reply = pkt->reply; + + sg_init_one(&hdr, &pkt->hdr, sizeof(pkt->hdr)); + sgs[out_sg++] = &hdr; + if (pkt->buf) { + sg_init_one(&buf, pkt->buf, pkt->len); + sgs[out_sg++] = &buf; + } + + ret = virtqueue_add_sgs(vq, sgs, out_sg, in_sg, pkt, GFP_KERNEL); + /* Usually this means that there is no more space available in + * the vq + */ + if (ret < 0) { + spin_lock_bh(&vsock->send_pkt_list_lock); + list_add(&pkt->list, &vsock->send_pkt_list); + spin_unlock_bh(&vsock->send_pkt_list_lock); + break; + } + + if (reply) { + struct virtqueue *rx_vq = vsock->vqs[VSOCK_VQ_RX]; + int val; + + val = atomic_dec_return(&vsock->queued_replies); + + /* Do we now have resources to resume rx processing? */ + if (val + 1 == virtqueue_get_vring_size(rx_vq)) + restart_rx = true; + } + + added = true; + } + + if (added) + virtqueue_kick(vq); + + mutex_unlock(&vsock->tx_lock); + + if (restart_rx) + queue_work(virtio_vsock_workqueue, &vsock->rx_work); +} + +static int +virtio_transport_send_pkt(struct virtio_vsock_pkt *pkt) +{ + struct virtio_vsock *vsock; + int len = pkt->len; + + vsock = virtio_vsock_get(); + if (!vsock) { + virtio_transport_free_pkt(pkt); + return -ENODEV; + } + + if (pkt->reply) + atomic_inc(&vsock->queued_replies); + + spin_lock_bh(&vsock->send_pkt_list_lock); + list_add_tail(&pkt->list, &vsock->send_pkt_list); + spin_unlock_bh(&vsock->send_pkt_list_lock); + + queue_work(virtio_vsock_workqueue, &vsock->send_pkt_work); + return len; +} + +static void virtio_vsock_rx_fill(struct virtio_vsock *vsock) +{ + int buf_len = VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE; + struct virtio_vsock_pkt *pkt; + struct scatterlist hdr, buf, *sgs[2]; + struct virtqueue *vq; + int ret; + + vq = vsock->vqs[VSOCK_VQ_RX]; + + do { + pkt = kzalloc(sizeof(*pkt), GFP_KERNEL); + if (!pkt) + break; + + pkt->buf = kmalloc(buf_len, GFP_KERNEL); + if (!pkt->buf) { + virtio_transport_free_pkt(pkt); + break; + } + + pkt->len = buf_len; + + sg_init_one(&hdr, &pkt->hdr, sizeof(pkt->hdr)); + sgs[0] = &hdr; + + sg_init_one(&buf, pkt->buf, buf_len); + sgs[1] = &buf; + ret = virtqueue_add_sgs(vq, sgs, 0, 2, pkt, GFP_KERNEL); + if (ret) { + virtio_transport_free_pkt(pkt); + break; + } + vsock->rx_buf_nr++; + } while (vq->num_free); + if (vsock->rx_buf_nr > vsock->rx_buf_max_nr) + vsock->rx_buf_max_nr = vsock->rx_buf_nr; + virtqueue_kick(vq); +} + +static void virtio_transport_tx_work(struct work_struct *work) +{ + struct virtio_vsock *vsock = + container_of(work, struct virtio_vsock, tx_work); + struct virtqueue *vq; + bool added = false; + + vq = vsock->vqs[VSOCK_VQ_TX]; + mutex_lock(&vsock->tx_lock); + do { + struct virtio_vsock_pkt *pkt; + unsigned int len; + + virtqueue_disable_cb(vq); + while ((pkt = virtqueue_get_buf(vq, &len)) != NULL) { + virtio_transport_free_pkt(pkt); + added = true; + } + } while (!virtqueue_enable_cb(vq)); + mutex_unlock(&vsock->tx_lock); + + if (added) + queue_work(virtio_vsock_workqueue, &vsock->send_pkt_work); +} + +/* Is there space left for replies to rx packets? */ +static bool virtio_transport_more_replies(struct virtio_vsock *vsock) +{ + struct virtqueue *vq = vsock->vqs[VSOCK_VQ_RX]; + int val; + + smp_rmb(); /* paired with atomic_inc() and atomic_dec_return() */ + val = atomic_read(&vsock->queued_replies); + + return val < virtqueue_get_vring_size(vq); +} + +static void virtio_transport_rx_work(struct work_struct *work) +{ + struct virtio_vsock *vsock = + container_of(work, struct virtio_vsock, rx_work); + struct virtqueue *vq; + + vq = vsock->vqs[VSOCK_VQ_RX]; + + mutex_lock(&vsock->rx_lock); + + do { + virtqueue_disable_cb(vq); + for (;;) { + struct virtio_vsock_pkt *pkt; + unsigned int len; + + if (!virtio_transport_more_replies(vsock)) { + /* Stop rx until the device processes already + * pending replies. Leave rx virtqueue + * callbacks disabled. + */ + goto out; + } + + pkt = virtqueue_get_buf(vq, &len); + if (!pkt) { + break; + } + + vsock->rx_buf_nr--; + + /* Drop short/long packets */ + if (unlikely(len < sizeof(pkt->hdr) || + len > sizeof(pkt->hdr) + pkt->len)) { + virtio_transport_free_pkt(pkt); + continue; + } + + pkt->len = len - sizeof(pkt->hdr); + virtio_transport_recv_pkt(pkt); + } + } while (!virtqueue_enable_cb(vq)); + +out: + if (vsock->rx_buf_nr < vsock->rx_buf_max_nr / 2) + virtio_vsock_rx_fill(vsock); + mutex_unlock(&vsock->rx_lock); +} + +/* event_lock must be held */ +static int virtio_vsock_event_fill_one(struct virtio_vsock *vsock, + struct virtio_vsock_event *event) +{ + struct scatterlist sg; + struct virtqueue *vq; + + vq = vsock->vqs[VSOCK_VQ_EVENT]; + + sg_init_one(&sg, event, sizeof(*event)); + + return virtqueue_add_inbuf(vq, &sg, 1, event, GFP_KERNEL); +} + +/* event_lock must be held */ +static void virtio_vsock_event_fill(struct virtio_vsock *vsock) +{ + size_t i; + + for (i = 0; i < ARRAY_SIZE(vsock->event_list); i++) { + struct virtio_vsock_event *event = &vsock->event_list[i]; + + virtio_vsock_event_fill_one(vsock, event); + } + + virtqueue_kick(vsock->vqs[VSOCK_VQ_EVENT]); +} + +static void virtio_vsock_reset_sock(struct sock *sk) +{ + lock_sock(sk); + sk->sk_state = SS_UNCONNECTED; + sk->sk_err = ECONNRESET; + sk->sk_error_report(sk); + release_sock(sk); +} + +static void virtio_vsock_update_guest_cid(struct virtio_vsock *vsock) +{ + struct virtio_device *vdev = vsock->vdev; + u64 guest_cid; + + vdev->config->get(vdev, offsetof(struct virtio_vsock_config, guest_cid), + &guest_cid, sizeof(guest_cid)); + vsock->guest_cid = le64_to_cpu(guest_cid); +} + +/* event_lock must be held */ +static void virtio_vsock_event_handle(struct virtio_vsock *vsock, + struct virtio_vsock_event *event) +{ + switch (le32_to_cpu(event->id)) { + case VIRTIO_VSOCK_EVENT_TRANSPORT_RESET: + virtio_vsock_update_guest_cid(vsock); + vsock_for_each_connected_socket(virtio_vsock_reset_sock); + break; + } +} + +static void virtio_transport_event_work(struct work_struct *work) +{ + struct virtio_vsock *vsock = + container_of(work, struct virtio_vsock, event_work); + struct virtqueue *vq; + + vq = vsock->vqs[VSOCK_VQ_EVENT]; + + mutex_lock(&vsock->event_lock); + + do { + struct virtio_vsock_event *event; + unsigned int len; + + virtqueue_disable_cb(vq); + while ((event = virtqueue_get_buf(vq, &len)) != NULL) { + if (len == sizeof(*event)) + virtio_vsock_event_handle(vsock, event); + + virtio_vsock_event_fill_one(vsock, event); + } + } while (!virtqueue_enable_cb(vq)); + + virtqueue_kick(vsock->vqs[VSOCK_VQ_EVENT]); + + mutex_unlock(&vsock->event_lock); +} + +static void virtio_vsock_event_done(struct virtqueue *vq) +{ + struct virtio_vsock *vsock = vq->vdev->priv; + + if (!vsock) + return; + queue_work(virtio_vsock_workqueue, &vsock->event_work); +} + +static void virtio_vsock_tx_done(struct virtqueue *vq) +{ + struct virtio_vsock *vsock = vq->vdev->priv; + + if (!vsock) + return; + queue_work(virtio_vsock_workqueue, &vsock->tx_work); +} + +static void virtio_vsock_rx_done(struct virtqueue *vq) +{ + struct virtio_vsock *vsock = vq->vdev->priv; + + if (!vsock) + return; + queue_work(virtio_vsock_workqueue, &vsock->rx_work); +} + +static struct virtio_transport virtio_transport = { + .transport = { + .get_local_cid = virtio_transport_get_local_cid, + + .init = virtio_transport_do_socket_init, + .destruct = virtio_transport_destruct, + .release = virtio_transport_release, + .connect = virtio_transport_connect, + .shutdown = virtio_transport_shutdown, + + .dgram_bind = virtio_transport_dgram_bind, + .dgram_dequeue = virtio_transport_dgram_dequeue, + .dgram_enqueue = virtio_transport_dgram_enqueue, + .dgram_allow = virtio_transport_dgram_allow, + + .stream_dequeue = virtio_transport_stream_dequeue, + .stream_enqueue = virtio_transport_stream_enqueue, + .stream_has_data = virtio_transport_stream_has_data, + .stream_has_space = virtio_transport_stream_has_space, + .stream_rcvhiwat = virtio_transport_stream_rcvhiwat, + .stream_is_active = virtio_transport_stream_is_active, + .stream_allow = virtio_transport_stream_allow, + + .notify_poll_in = virtio_transport_notify_poll_in, + .notify_poll_out = virtio_transport_notify_poll_out, + .notify_recv_init = virtio_transport_notify_recv_init, + .notify_recv_pre_block = virtio_transport_notify_recv_pre_block, + .notify_recv_pre_dequeue = virtio_transport_notify_recv_pre_dequeue, + .notify_recv_post_dequeue = virtio_transport_notify_recv_post_dequeue, + .notify_send_init = virtio_transport_notify_send_init, + .notify_send_pre_block = virtio_transport_notify_send_pre_block, + .notify_send_pre_enqueue = virtio_transport_notify_send_pre_enqueue, + .notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue, + + .set_buffer_size = virtio_transport_set_buffer_size, + .set_min_buffer_size = virtio_transport_set_min_buffer_size, + .set_max_buffer_size = virtio_transport_set_max_buffer_size, + .get_buffer_size = virtio_transport_get_buffer_size, + .get_min_buffer_size = virtio_transport_get_min_buffer_size, + .get_max_buffer_size = virtio_transport_get_max_buffer_size, + }, + + .send_pkt = virtio_transport_send_pkt, +}; + +static int virtio_vsock_probe(struct virtio_device *vdev) +{ + vq_callback_t *callbacks[] = { + virtio_vsock_rx_done, + virtio_vsock_tx_done, + virtio_vsock_event_done, + }; + static const char * const names[] = { + "rx", + "tx", + "event", + }; + struct virtio_vsock *vsock = NULL; + int ret; + + ret = mutex_lock_interruptible(&the_virtio_vsock_mutex); + if (ret) + return ret; + + /* Only one virtio-vsock device per guest is supported */ + if (the_virtio_vsock) { + ret = -EBUSY; + goto out; + } + + vsock = kzalloc(sizeof(*vsock), GFP_KERNEL); + if (!vsock) { + ret = -ENOMEM; + goto out; + } + + vsock->vdev = vdev; + + ret = vsock->vdev->config->find_vqs(vsock->vdev, VSOCK_VQ_MAX, + vsock->vqs, callbacks, names); + if (ret < 0) + goto out; + + virtio_vsock_update_guest_cid(vsock); + + ret = vsock_core_init(&virtio_transport.transport); + if (ret < 0) + goto out_vqs; + + vsock->rx_buf_nr = 0; + vsock->rx_buf_max_nr = 0; + atomic_set(&vsock->queued_replies, 0); + + vdev->priv = vsock; + the_virtio_vsock = vsock; + mutex_init(&vsock->tx_lock); + mutex_init(&vsock->rx_lock); + mutex_init(&vsock->event_lock); + spin_lock_init(&vsock->send_pkt_list_lock); + INIT_LIST_HEAD(&vsock->send_pkt_list); + INIT_WORK(&vsock->rx_work, virtio_transport_rx_work); + INIT_WORK(&vsock->tx_work, virtio_transport_tx_work); + INIT_WORK(&vsock->event_work, virtio_transport_event_work); + INIT_WORK(&vsock->send_pkt_work, virtio_transport_send_pkt_work); + + mutex_lock(&vsock->rx_lock); + virtio_vsock_rx_fill(vsock); + mutex_unlock(&vsock->rx_lock); + + mutex_lock(&vsock->event_lock); + virtio_vsock_event_fill(vsock); + mutex_unlock(&vsock->event_lock); + + mutex_unlock(&the_virtio_vsock_mutex); + return 0; + +out_vqs: + vsock->vdev->config->del_vqs(vsock->vdev); +out: + kfree(vsock); + mutex_unlock(&the_virtio_vsock_mutex); + return ret; +} + +static void virtio_vsock_remove(struct virtio_device *vdev) +{ + struct virtio_vsock *vsock = vdev->priv; + struct virtio_vsock_pkt *pkt; + + flush_work(&vsock->rx_work); + flush_work(&vsock->tx_work); + flush_work(&vsock->event_work); + flush_work(&vsock->send_pkt_work); + + vdev->config->reset(vdev); + + mutex_lock(&vsock->rx_lock); + while ((pkt = virtqueue_detach_unused_buf(vsock->vqs[VSOCK_VQ_RX]))) + virtio_transport_free_pkt(pkt); + mutex_unlock(&vsock->rx_lock); + + mutex_lock(&vsock->tx_lock); + while ((pkt = virtqueue_detach_unused_buf(vsock->vqs[VSOCK_VQ_TX]))) + virtio_transport_free_pkt(pkt); + mutex_unlock(&vsock->tx_lock); + + spin_lock_bh(&vsock->send_pkt_list_lock); + while (!list_empty(&vsock->send_pkt_list)) { + pkt = list_first_entry(&vsock->send_pkt_list, + struct virtio_vsock_pkt, list); + list_del(&pkt->list); + virtio_transport_free_pkt(pkt); + } + spin_unlock_bh(&vsock->send_pkt_list_lock); + + mutex_lock(&the_virtio_vsock_mutex); + the_virtio_vsock = NULL; + vsock_core_exit(); + mutex_unlock(&the_virtio_vsock_mutex); + + vdev->config->del_vqs(vdev); + + kfree(vsock); +} + +static struct virtio_device_id id_table[] = { + { VIRTIO_ID_VSOCK, VIRTIO_DEV_ANY_ID }, + { 0 }, +}; + +static unsigned int features[] = { +}; + +static struct virtio_driver virtio_vsock_driver = { + .feature_table = features, + .feature_table_size = ARRAY_SIZE(features), + .driver.name = KBUILD_MODNAME, + .driver.owner = THIS_MODULE, + .id_table = id_table, + .probe = virtio_vsock_probe, + .remove = virtio_vsock_remove, +}; + +static int __init virtio_vsock_init(void) +{ + int ret; + + virtio_vsock_workqueue = alloc_workqueue("virtio_vsock", 0, 0); + if (!virtio_vsock_workqueue) + return -ENOMEM; + ret = register_virtio_driver(&virtio_vsock_driver); + if (ret) + destroy_workqueue(virtio_vsock_workqueue); + return ret; +} + +static void __exit virtio_vsock_exit(void) +{ + unregister_virtio_driver(&virtio_vsock_driver); + destroy_workqueue(virtio_vsock_workqueue); +} + +module_init(virtio_vsock_init); +module_exit(virtio_vsock_exit); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Asias He"); +MODULE_DESCRIPTION("virtio transport for vsock"); +MODULE_DEVICE_TABLE(virtio, id_table); diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c new file mode 100644 index 000000000000..9c07c76c504d --- /dev/null +++ b/net/vmw_vsock/virtio_transport_common.c @@ -0,0 +1,999 @@ +/* + * common code for virtio vsock + * + * Copyright (C) 2013-2015 Red Hat, Inc. + * Author: Asias He + * Stefan Hajnoczi + * + * This work is licensed under the terms of the GNU GPL, version 2. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#define CREATE_TRACE_POINTS +#include + +/* How long to wait for graceful shutdown of a connection */ +#define VSOCK_CLOSE_TIMEOUT (8 * HZ) + +static const struct virtio_transport *virtio_transport_get_ops(void) +{ + const struct vsock_transport *t = vsock_core_get_transport(); + + return container_of(t, struct virtio_transport, transport); +} + +struct virtio_vsock_pkt * +virtio_transport_alloc_pkt(struct virtio_vsock_pkt_info *info, + size_t len, + u32 src_cid, + u32 src_port, + u32 dst_cid, + u32 dst_port) +{ + struct virtio_vsock_pkt *pkt; + int err; + + pkt = kzalloc(sizeof(*pkt), GFP_KERNEL); + if (!pkt) + return NULL; + + pkt->hdr.type = cpu_to_le16(info->type); + pkt->hdr.op = cpu_to_le16(info->op); + pkt->hdr.src_cid = cpu_to_le64(src_cid); + pkt->hdr.dst_cid = cpu_to_le64(dst_cid); + pkt->hdr.src_port = cpu_to_le32(src_port); + pkt->hdr.dst_port = cpu_to_le32(dst_port); + pkt->hdr.flags = cpu_to_le32(info->flags); + pkt->len = len; + pkt->hdr.len = cpu_to_le32(len); + pkt->reply = info->reply; + pkt->vsk = info->vsk; + + if (info->msg && len > 0) { + pkt->buf = kmalloc(len, GFP_KERNEL); + if (!pkt->buf) + goto out_pkt; + err = memcpy_from_msg(pkt->buf, info->msg, len); + if (err) + goto out; + } + + trace_virtio_transport_alloc_pkt(src_cid, src_port, + dst_cid, dst_port, + len, + info->type, + info->op, + info->flags); + + return pkt; + +out: + kfree(pkt->buf); +out_pkt: + kfree(pkt); + return NULL; +} +EXPORT_SYMBOL_GPL(virtio_transport_alloc_pkt); + +static int virtio_transport_send_pkt_info(struct vsock_sock *vsk, + struct virtio_vsock_pkt_info *info) +{ + u32 src_cid, src_port, dst_cid, dst_port; + struct virtio_vsock_sock *vvs; + struct virtio_vsock_pkt *pkt; + u32 pkt_len = info->pkt_len; + + src_cid = vm_sockets_get_local_cid(); + src_port = vsk->local_addr.svm_port; + if (!info->remote_cid) { + dst_cid = vsk->remote_addr.svm_cid; + dst_port = vsk->remote_addr.svm_port; + } else { + dst_cid = info->remote_cid; + dst_port = info->remote_port; + } + + vvs = vsk->trans; + + /* we can send less than pkt_len bytes */ + if (pkt_len > VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE) + pkt_len = VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE; + + /* virtio_transport_get_credit might return less than pkt_len credit */ + pkt_len = virtio_transport_get_credit(vvs, pkt_len); + + /* Do not send zero length OP_RW pkt */ + if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW) + return pkt_len; + + pkt = virtio_transport_alloc_pkt(info, pkt_len, + src_cid, src_port, + dst_cid, dst_port); + if (!pkt) { + virtio_transport_put_credit(vvs, pkt_len); + return -ENOMEM; + } + + virtio_transport_inc_tx_pkt(vvs, pkt); + + return virtio_transport_get_ops()->send_pkt(pkt); +} + +static void virtio_transport_inc_rx_pkt(struct virtio_vsock_sock *vvs, + struct virtio_vsock_pkt *pkt) +{ + vvs->rx_bytes += pkt->len; +} + +static void virtio_transport_dec_rx_pkt(struct virtio_vsock_sock *vvs, + struct virtio_vsock_pkt *pkt) +{ + vvs->rx_bytes -= pkt->len; + vvs->fwd_cnt += pkt->len; +} + +void virtio_transport_inc_tx_pkt(struct virtio_vsock_sock *vvs, struct virtio_vsock_pkt *pkt) +{ + spin_lock_bh(&vvs->tx_lock); + pkt->hdr.fwd_cnt = cpu_to_le32(vvs->fwd_cnt); + pkt->hdr.buf_alloc = cpu_to_le32(vvs->buf_alloc); + spin_unlock_bh(&vvs->tx_lock); +} +EXPORT_SYMBOL_GPL(virtio_transport_inc_tx_pkt); + +u32 virtio_transport_get_credit(struct virtio_vsock_sock *vvs, u32 credit) +{ + u32 ret; + + spin_lock_bh(&vvs->tx_lock); + ret = vvs->peer_buf_alloc - (vvs->tx_cnt - vvs->peer_fwd_cnt); + if (ret > credit) + ret = credit; + vvs->tx_cnt += ret; + spin_unlock_bh(&vvs->tx_lock); + + return ret; +} +EXPORT_SYMBOL_GPL(virtio_transport_get_credit); + +void virtio_transport_put_credit(struct virtio_vsock_sock *vvs, u32 credit) +{ + spin_lock_bh(&vvs->tx_lock); + vvs->tx_cnt -= credit; + spin_unlock_bh(&vvs->tx_lock); +} +EXPORT_SYMBOL_GPL(virtio_transport_put_credit); + +static int virtio_transport_send_credit_update(struct vsock_sock *vsk, + int type, + struct virtio_vsock_hdr *hdr) +{ + struct virtio_vsock_pkt_info info = { + .op = VIRTIO_VSOCK_OP_CREDIT_UPDATE, + .type = type, + .vsk = vsk, + }; + + return virtio_transport_send_pkt_info(vsk, &info); +} + +static ssize_t +virtio_transport_stream_do_dequeue(struct vsock_sock *vsk, + struct msghdr *msg, + size_t len) +{ + struct virtio_vsock_sock *vvs = vsk->trans; + struct virtio_vsock_pkt *pkt; + size_t bytes, total = 0; + int err = -EFAULT; + + spin_lock_bh(&vvs->rx_lock); + while (total < len && !list_empty(&vvs->rx_queue)) { + pkt = list_first_entry(&vvs->rx_queue, + struct virtio_vsock_pkt, list); + + bytes = len - total; + if (bytes > pkt->len - pkt->off) + bytes = pkt->len - pkt->off; + + /* sk_lock is held by caller so no one else can dequeue. + * Unlock rx_lock since memcpy_to_msg() may sleep. + */ + spin_unlock_bh(&vvs->rx_lock); + + err = memcpy_to_msg(msg, pkt->buf + pkt->off, bytes); + if (err) + goto out; + + spin_lock_bh(&vvs->rx_lock); + + total += bytes; + pkt->off += bytes; + if (pkt->off == pkt->len) { + virtio_transport_dec_rx_pkt(vvs, pkt); + list_del(&pkt->list); + virtio_transport_free_pkt(pkt); + } + } + spin_unlock_bh(&vvs->rx_lock); + + /* Send a credit pkt to peer */ + virtio_transport_send_credit_update(vsk, VIRTIO_VSOCK_TYPE_STREAM, + NULL); + + return total; + +out: + if (total) + err = total; + return err; +} + +ssize_t +virtio_transport_stream_dequeue(struct vsock_sock *vsk, + struct msghdr *msg, + size_t len, int flags) +{ + if (flags & MSG_PEEK) + return -EOPNOTSUPP; + + return virtio_transport_stream_do_dequeue(vsk, msg, len); +} +EXPORT_SYMBOL_GPL(virtio_transport_stream_dequeue); + +int +virtio_transport_dgram_dequeue(struct vsock_sock *vsk, + struct msghdr *msg, + size_t len, int flags) +{ + return -EOPNOTSUPP; +} +EXPORT_SYMBOL_GPL(virtio_transport_dgram_dequeue); + +s64 virtio_transport_stream_has_data(struct vsock_sock *vsk) +{ + struct virtio_vsock_sock *vvs = vsk->trans; + s64 bytes; + + spin_lock_bh(&vvs->rx_lock); + bytes = vvs->rx_bytes; + spin_unlock_bh(&vvs->rx_lock); + + return bytes; +} +EXPORT_SYMBOL_GPL(virtio_transport_stream_has_data); + +static s64 virtio_transport_has_space(struct vsock_sock *vsk) +{ + struct virtio_vsock_sock *vvs = vsk->trans; + s64 bytes; + + bytes = vvs->peer_buf_alloc - (vvs->tx_cnt - vvs->peer_fwd_cnt); + if (bytes < 0) + bytes = 0; + + return bytes; +} + +s64 virtio_transport_stream_has_space(struct vsock_sock *vsk) +{ + struct virtio_vsock_sock *vvs = vsk->trans; + s64 bytes; + + spin_lock_bh(&vvs->tx_lock); + bytes = virtio_transport_has_space(vsk); + spin_unlock_bh(&vvs->tx_lock); + + return bytes; +} +EXPORT_SYMBOL_GPL(virtio_transport_stream_has_space); + +int virtio_transport_do_socket_init(struct vsock_sock *vsk, + struct vsock_sock *psk) +{ + struct virtio_vsock_sock *vvs; + + vvs = kzalloc(sizeof(*vvs), GFP_KERNEL); + if (!vvs) + return -ENOMEM; + + vsk->trans = vvs; + vvs->vsk = vsk; + if (psk) { + struct virtio_vsock_sock *ptrans = psk->trans; + + vvs->buf_size = ptrans->buf_size; + vvs->buf_size_min = ptrans->buf_size_min; + vvs->buf_size_max = ptrans->buf_size_max; + vvs->peer_buf_alloc = ptrans->peer_buf_alloc; + } else { + vvs->buf_size = VIRTIO_VSOCK_DEFAULT_BUF_SIZE; + vvs->buf_size_min = VIRTIO_VSOCK_DEFAULT_MIN_BUF_SIZE; + vvs->buf_size_max = VIRTIO_VSOCK_DEFAULT_MAX_BUF_SIZE; + } + + vvs->buf_alloc = vvs->buf_size; + + spin_lock_init(&vvs->rx_lock); + spin_lock_init(&vvs->tx_lock); + INIT_LIST_HEAD(&vvs->rx_queue); + + return 0; +} +EXPORT_SYMBOL_GPL(virtio_transport_do_socket_init); + +u64 virtio_transport_get_buffer_size(struct vsock_sock *vsk) +{ + struct virtio_vsock_sock *vvs = vsk->trans; + + return vvs->buf_size; +} +EXPORT_SYMBOL_GPL(virtio_transport_get_buffer_size); + +u64 virtio_transport_get_min_buffer_size(struct vsock_sock *vsk) +{ + struct virtio_vsock_sock *vvs = vsk->trans; + + return vvs->buf_size_min; +} +EXPORT_SYMBOL_GPL(virtio_transport_get_min_buffer_size); + +u64 virtio_transport_get_max_buffer_size(struct vsock_sock *vsk) +{ + struct virtio_vsock_sock *vvs = vsk->trans; + + return vvs->buf_size_max; +} +EXPORT_SYMBOL_GPL(virtio_transport_get_max_buffer_size); + +void virtio_transport_set_buffer_size(struct vsock_sock *vsk, u64 val) +{ + struct virtio_vsock_sock *vvs = vsk->trans; + + if (val > VIRTIO_VSOCK_MAX_BUF_SIZE) + val = VIRTIO_VSOCK_MAX_BUF_SIZE; + if (val < vvs->buf_size_min) + vvs->buf_size_min = val; + if (val > vvs->buf_size_max) + vvs->buf_size_max = val; + vvs->buf_size = val; + vvs->buf_alloc = val; +} +EXPORT_SYMBOL_GPL(virtio_transport_set_buffer_size); + +void virtio_transport_set_min_buffer_size(struct vsock_sock *vsk, u64 val) +{ + struct virtio_vsock_sock *vvs = vsk->trans; + + if (val > VIRTIO_VSOCK_MAX_BUF_SIZE) + val = VIRTIO_VSOCK_MAX_BUF_SIZE; + if (val > vvs->buf_size) + vvs->buf_size = val; + vvs->buf_size_min = val; +} +EXPORT_SYMBOL_GPL(virtio_transport_set_min_buffer_size); + +void virtio_transport_set_max_buffer_size(struct vsock_sock *vsk, u64 val) +{ + struct virtio_vsock_sock *vvs = vsk->trans; + + if (val > VIRTIO_VSOCK_MAX_BUF_SIZE) + val = VIRTIO_VSOCK_MAX_BUF_SIZE; + if (val < vvs->buf_size) + vvs->buf_size = val; + vvs->buf_size_max = val; +} +EXPORT_SYMBOL_GPL(virtio_transport_set_max_buffer_size); + +int +virtio_transport_notify_poll_in(struct vsock_sock *vsk, + size_t target, + bool *data_ready_now) +{ + if (vsock_stream_has_data(vsk)) + *data_ready_now = true; + else + *data_ready_now = false; + + return 0; +} +EXPORT_SYMBOL_GPL(virtio_transport_notify_poll_in); + +int +virtio_transport_notify_poll_out(struct vsock_sock *vsk, + size_t target, + bool *space_avail_now) +{ + s64 free_space; + + free_space = vsock_stream_has_space(vsk); + if (free_space > 0) + *space_avail_now = true; + else if (free_space == 0) + *space_avail_now = false; + + return 0; +} +EXPORT_SYMBOL_GPL(virtio_transport_notify_poll_out); + +int virtio_transport_notify_recv_init(struct vsock_sock *vsk, + size_t target, struct vsock_transport_recv_notify_data *data) +{ + return 0; +} +EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_init); + +int virtio_transport_notify_recv_pre_block(struct vsock_sock *vsk, + size_t target, struct vsock_transport_recv_notify_data *data) +{ + return 0; +} +EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_pre_block); + +int virtio_transport_notify_recv_pre_dequeue(struct vsock_sock *vsk, + size_t target, struct vsock_transport_recv_notify_data *data) +{ + return 0; +} +EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_pre_dequeue); + +int virtio_transport_notify_recv_post_dequeue(struct vsock_sock *vsk, + size_t target, ssize_t copied, bool data_read, + struct vsock_transport_recv_notify_data *data) +{ + return 0; +} +EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_post_dequeue); + +int virtio_transport_notify_send_init(struct vsock_sock *vsk, + struct vsock_transport_send_notify_data *data) +{ + return 0; +} +EXPORT_SYMBOL_GPL(virtio_transport_notify_send_init); + +int virtio_transport_notify_send_pre_block(struct vsock_sock *vsk, + struct vsock_transport_send_notify_data *data) +{ + return 0; +} +EXPORT_SYMBOL_GPL(virtio_transport_notify_send_pre_block); + +int virtio_transport_notify_send_pre_enqueue(struct vsock_sock *vsk, + struct vsock_transport_send_notify_data *data) +{ + return 0; +} +EXPORT_SYMBOL_GPL(virtio_transport_notify_send_pre_enqueue); + +int virtio_transport_notify_send_post_enqueue(struct vsock_sock *vsk, + ssize_t written, struct vsock_transport_send_notify_data *data) +{ + return 0; +} +EXPORT_SYMBOL_GPL(virtio_transport_notify_send_post_enqueue); + +u64 virtio_transport_stream_rcvhiwat(struct vsock_sock *vsk) +{ + struct virtio_vsock_sock *vvs = vsk->trans; + + return vvs->buf_size; +} +EXPORT_SYMBOL_GPL(virtio_transport_stream_rcvhiwat); + +bool virtio_transport_stream_is_active(struct vsock_sock *vsk) +{ + return true; +} +EXPORT_SYMBOL_GPL(virtio_transport_stream_is_active); + +bool virtio_transport_stream_allow(u32 cid, u32 port) +{ + return true; +} +EXPORT_SYMBOL_GPL(virtio_transport_stream_allow); + +int virtio_transport_dgram_bind(struct vsock_sock *vsk, + struct sockaddr_vm *addr) +{ + return -EOPNOTSUPP; +} +EXPORT_SYMBOL_GPL(virtio_transport_dgram_bind); + +bool virtio_transport_dgram_allow(u32 cid, u32 port) +{ + return false; +} +EXPORT_SYMBOL_GPL(virtio_transport_dgram_allow); + +int virtio_transport_connect(struct vsock_sock *vsk) +{ + struct virtio_vsock_pkt_info info = { + .op = VIRTIO_VSOCK_OP_REQUEST, + .type = VIRTIO_VSOCK_TYPE_STREAM, + .vsk = vsk, + }; + + return virtio_transport_send_pkt_info(vsk, &info); +} +EXPORT_SYMBOL_GPL(virtio_transport_connect); + +int virtio_transport_shutdown(struct vsock_sock *vsk, int mode) +{ + struct virtio_vsock_pkt_info info = { + .op = VIRTIO_VSOCK_OP_SHUTDOWN, + .type = VIRTIO_VSOCK_TYPE_STREAM, + .flags = (mode & RCV_SHUTDOWN ? + VIRTIO_VSOCK_SHUTDOWN_RCV : 0) | + (mode & SEND_SHUTDOWN ? + VIRTIO_VSOCK_SHUTDOWN_SEND : 0), + .vsk = vsk, + }; + + return virtio_transport_send_pkt_info(vsk, &info); +} +EXPORT_SYMBOL_GPL(virtio_transport_shutdown); + +int +virtio_transport_dgram_enqueue(struct vsock_sock *vsk, + struct sockaddr_vm *remote_addr, + struct msghdr *msg, + size_t dgram_len) +{ + return -EOPNOTSUPP; +} +EXPORT_SYMBOL_GPL(virtio_transport_dgram_enqueue); + +ssize_t +virtio_transport_stream_enqueue(struct vsock_sock *vsk, + struct msghdr *msg, + size_t len) +{ + struct virtio_vsock_pkt_info info = { + .op = VIRTIO_VSOCK_OP_RW, + .type = VIRTIO_VSOCK_TYPE_STREAM, + .msg = msg, + .pkt_len = len, + .vsk = vsk, + }; + + return virtio_transport_send_pkt_info(vsk, &info); +} +EXPORT_SYMBOL_GPL(virtio_transport_stream_enqueue); + +void virtio_transport_destruct(struct vsock_sock *vsk) +{ + struct virtio_vsock_sock *vvs = vsk->trans; + + kfree(vvs); +} +EXPORT_SYMBOL_GPL(virtio_transport_destruct); + +static int virtio_transport_reset(struct vsock_sock *vsk, + struct virtio_vsock_pkt *pkt) +{ + struct virtio_vsock_pkt_info info = { + .op = VIRTIO_VSOCK_OP_RST, + .type = VIRTIO_VSOCK_TYPE_STREAM, + .reply = !!pkt, + .vsk = vsk, + }; + + /* Send RST only if the original pkt is not a RST pkt */ + if (pkt && le16_to_cpu(pkt->hdr.op) == VIRTIO_VSOCK_OP_RST) + return 0; + + return virtio_transport_send_pkt_info(vsk, &info); +} + +/* Normally packets are associated with a socket. There may be no socket if an + * attempt was made to connect to a socket that does not exist. + */ +static int virtio_transport_reset_no_sock(struct virtio_vsock_pkt *pkt) +{ + struct virtio_vsock_pkt_info info = { + .op = VIRTIO_VSOCK_OP_RST, + .type = le16_to_cpu(pkt->hdr.type), + .reply = true, + }; + + /* Send RST only if the original pkt is not a RST pkt */ + if (le16_to_cpu(pkt->hdr.op) == VIRTIO_VSOCK_OP_RST) + return 0; + + pkt = virtio_transport_alloc_pkt(&info, 0, + le64_to_cpu(pkt->hdr.dst_cid), + le32_to_cpu(pkt->hdr.dst_port), + le64_to_cpu(pkt->hdr.src_cid), + le32_to_cpu(pkt->hdr.src_port)); + if (!pkt) + return -ENOMEM; + + return virtio_transport_get_ops()->send_pkt(pkt); +} + +static void virtio_transport_wait_close(struct sock *sk, long timeout) +{ + if (timeout) { + DEFINE_WAIT(wait); + + do { + prepare_to_wait(sk_sleep(sk), &wait, + TASK_INTERRUPTIBLE); + if (sk_wait_event(sk, &timeout, + sock_flag(sk, SOCK_DONE))) + break; + } while (!signal_pending(current) && timeout); + + finish_wait(sk_sleep(sk), &wait); + } +} + +static void virtio_transport_do_close(struct vsock_sock *vsk, + bool cancel_timeout) +{ + struct sock *sk = sk_vsock(vsk); + + sock_set_flag(sk, SOCK_DONE); + vsk->peer_shutdown = SHUTDOWN_MASK; + if (vsock_stream_has_data(vsk) <= 0) + sk->sk_state = SS_DISCONNECTING; + sk->sk_state_change(sk); + + if (vsk->close_work_scheduled && + (!cancel_timeout || cancel_delayed_work(&vsk->close_work))) { + vsk->close_work_scheduled = false; + + vsock_remove_sock(vsk); + + /* Release refcnt obtained when we scheduled the timeout */ + sock_put(sk); + } +} + +static void virtio_transport_close_timeout(struct work_struct *work) +{ + struct vsock_sock *vsk = + container_of(work, struct vsock_sock, close_work.work); + struct sock *sk = sk_vsock(vsk); + + sock_hold(sk); + lock_sock(sk); + + if (!sock_flag(sk, SOCK_DONE)) { + (void)virtio_transport_reset(vsk, NULL); + + virtio_transport_do_close(vsk, false); + } + + vsk->close_work_scheduled = false; + + release_sock(sk); + sock_put(sk); +} + +/* User context, vsk->sk is locked */ +static bool virtio_transport_close(struct vsock_sock *vsk) +{ + struct sock *sk = &vsk->sk; + + if (!(sk->sk_state == SS_CONNECTED || + sk->sk_state == SS_DISCONNECTING)) + return true; + + /* Already received SHUTDOWN from peer, reply with RST */ + if ((vsk->peer_shutdown & SHUTDOWN_MASK) == SHUTDOWN_MASK) { + (void)virtio_transport_reset(vsk, NULL); + return true; + } + + if ((sk->sk_shutdown & SHUTDOWN_MASK) != SHUTDOWN_MASK) + (void)virtio_transport_shutdown(vsk, SHUTDOWN_MASK); + + if (sock_flag(sk, SOCK_LINGER) && !(current->flags & PF_EXITING)) + virtio_transport_wait_close(sk, sk->sk_lingertime); + + if (sock_flag(sk, SOCK_DONE)) { + return true; + } + + sock_hold(sk); + INIT_DELAYED_WORK(&vsk->close_work, + virtio_transport_close_timeout); + vsk->close_work_scheduled = true; + schedule_delayed_work(&vsk->close_work, VSOCK_CLOSE_TIMEOUT); + return false; +} + +void virtio_transport_release(struct vsock_sock *vsk) +{ + struct sock *sk = &vsk->sk; + bool remove_sock = true; + + lock_sock(sk); + if (sk->sk_type == SOCK_STREAM) + remove_sock = virtio_transport_close(vsk); + release_sock(sk); + + if (remove_sock) + vsock_remove_sock(vsk); +} +EXPORT_SYMBOL_GPL(virtio_transport_release); + +static int +virtio_transport_recv_connecting(struct sock *sk, + struct virtio_vsock_pkt *pkt) +{ + struct vsock_sock *vsk = vsock_sk(sk); + int err; + int skerr; + + switch (le16_to_cpu(pkt->hdr.op)) { + case VIRTIO_VSOCK_OP_RESPONSE: + sk->sk_state = SS_CONNECTED; + sk->sk_socket->state = SS_CONNECTED; + vsock_insert_connected(vsk); + sk->sk_state_change(sk); + break; + case VIRTIO_VSOCK_OP_INVALID: + break; + case VIRTIO_VSOCK_OP_RST: + skerr = ECONNRESET; + err = 0; + goto destroy; + default: + skerr = EPROTO; + err = -EINVAL; + goto destroy; + } + return 0; + +destroy: + virtio_transport_reset(vsk, pkt); + sk->sk_state = SS_UNCONNECTED; + sk->sk_err = skerr; + sk->sk_error_report(sk); + return err; +} + +static int +virtio_transport_recv_connected(struct sock *sk, + struct virtio_vsock_pkt *pkt) +{ + struct vsock_sock *vsk = vsock_sk(sk); + struct virtio_vsock_sock *vvs = vsk->trans; + int err = 0; + + switch (le16_to_cpu(pkt->hdr.op)) { + case VIRTIO_VSOCK_OP_RW: + pkt->len = le32_to_cpu(pkt->hdr.len); + pkt->off = 0; + + spin_lock_bh(&vvs->rx_lock); + virtio_transport_inc_rx_pkt(vvs, pkt); + list_add_tail(&pkt->list, &vvs->rx_queue); + spin_unlock_bh(&vvs->rx_lock); + + sk->sk_data_ready(sk); + return err; + case VIRTIO_VSOCK_OP_CREDIT_UPDATE: + sk->sk_write_space(sk); + break; + case VIRTIO_VSOCK_OP_SHUTDOWN: + if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SHUTDOWN_RCV) + vsk->peer_shutdown |= RCV_SHUTDOWN; + if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SHUTDOWN_SEND) + vsk->peer_shutdown |= SEND_SHUTDOWN; + if (vsk->peer_shutdown == SHUTDOWN_MASK && + vsock_stream_has_data(vsk) <= 0) + sk->sk_state = SS_DISCONNECTING; + if (le32_to_cpu(pkt->hdr.flags)) + sk->sk_state_change(sk); + break; + case VIRTIO_VSOCK_OP_RST: + virtio_transport_do_close(vsk, true); + break; + default: + err = -EINVAL; + break; + } + + virtio_transport_free_pkt(pkt); + return err; +} + +static void +virtio_transport_recv_disconnecting(struct sock *sk, + struct virtio_vsock_pkt *pkt) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + if (le16_to_cpu(pkt->hdr.op) == VIRTIO_VSOCK_OP_RST) + virtio_transport_do_close(vsk, true); +} + +static int +virtio_transport_send_response(struct vsock_sock *vsk, + struct virtio_vsock_pkt *pkt) +{ + struct virtio_vsock_pkt_info info = { + .op = VIRTIO_VSOCK_OP_RESPONSE, + .type = VIRTIO_VSOCK_TYPE_STREAM, + .remote_cid = le64_to_cpu(pkt->hdr.src_cid), + .remote_port = le32_to_cpu(pkt->hdr.src_port), + .reply = true, + .vsk = vsk, + }; + + return virtio_transport_send_pkt_info(vsk, &info); +} + +/* Handle server socket */ +static int +virtio_transport_recv_listen(struct sock *sk, struct virtio_vsock_pkt *pkt) +{ + struct vsock_sock *vsk = vsock_sk(sk); + struct vsock_sock *vchild; + struct sock *child; + + if (le16_to_cpu(pkt->hdr.op) != VIRTIO_VSOCK_OP_REQUEST) { + virtio_transport_reset(vsk, pkt); + return -EINVAL; + } + + if (sk_acceptq_is_full(sk)) { + virtio_transport_reset(vsk, pkt); + return -ENOMEM; + } + + child = __vsock_create(sock_net(sk), NULL, sk, GFP_KERNEL, + sk->sk_type, 0); + if (!child) { + virtio_transport_reset(vsk, pkt); + return -ENOMEM; + } + + sk->sk_ack_backlog++; + + lock_sock_nested(child, SINGLE_DEPTH_NESTING); + + child->sk_state = SS_CONNECTED; + + vchild = vsock_sk(child); + vsock_addr_init(&vchild->local_addr, le64_to_cpu(pkt->hdr.dst_cid), + le32_to_cpu(pkt->hdr.dst_port)); + vsock_addr_init(&vchild->remote_addr, le64_to_cpu(pkt->hdr.src_cid), + le32_to_cpu(pkt->hdr.src_port)); + + vsock_insert_connected(vchild); + vsock_enqueue_accept(sk, child); + virtio_transport_send_response(vchild, pkt); + + release_sock(child); + + sk->sk_data_ready(sk); + return 0; +} + +static bool virtio_transport_space_update(struct sock *sk, + struct virtio_vsock_pkt *pkt) +{ + struct vsock_sock *vsk = vsock_sk(sk); + struct virtio_vsock_sock *vvs = vsk->trans; + bool space_available; + + /* buf_alloc and fwd_cnt is always included in the hdr */ + spin_lock_bh(&vvs->tx_lock); + vvs->peer_buf_alloc = le32_to_cpu(pkt->hdr.buf_alloc); + vvs->peer_fwd_cnt = le32_to_cpu(pkt->hdr.fwd_cnt); + space_available = virtio_transport_has_space(vsk); + spin_unlock_bh(&vvs->tx_lock); + return space_available; +} + +/* We are under the virtio-vsock's vsock->rx_lock or vhost-vsock's vq->mutex + * lock. + */ +void virtio_transport_recv_pkt(struct virtio_vsock_pkt *pkt) +{ + struct sockaddr_vm src, dst; + struct vsock_sock *vsk; + struct sock *sk; + bool space_available; + + vsock_addr_init(&src, le64_to_cpu(pkt->hdr.src_cid), + le32_to_cpu(pkt->hdr.src_port)); + vsock_addr_init(&dst, le64_to_cpu(pkt->hdr.dst_cid), + le32_to_cpu(pkt->hdr.dst_port)); + + trace_virtio_transport_recv_pkt(src.svm_cid, src.svm_port, + dst.svm_cid, dst.svm_port, + le32_to_cpu(pkt->hdr.len), + le16_to_cpu(pkt->hdr.type), + le16_to_cpu(pkt->hdr.op), + le32_to_cpu(pkt->hdr.flags), + le32_to_cpu(pkt->hdr.buf_alloc), + le32_to_cpu(pkt->hdr.fwd_cnt)); + + if (le16_to_cpu(pkt->hdr.type) != VIRTIO_VSOCK_TYPE_STREAM) { + (void)virtio_transport_reset_no_sock(pkt); + goto free_pkt; + } + + /* The socket must be in connected or bound table + * otherwise send reset back + */ + sk = vsock_find_connected_socket(&src, &dst); + if (!sk) { + sk = vsock_find_bound_socket(&dst); + if (!sk) { + (void)virtio_transport_reset_no_sock(pkt); + goto free_pkt; + } + } + + vsk = vsock_sk(sk); + + space_available = virtio_transport_space_update(sk, pkt); + + lock_sock(sk); + + /* Update CID in case it has changed after a transport reset event */ + vsk->local_addr.svm_cid = dst.svm_cid; + + if (space_available) + sk->sk_write_space(sk); + + switch (sk->sk_state) { + case VSOCK_SS_LISTEN: + virtio_transport_recv_listen(sk, pkt); + virtio_transport_free_pkt(pkt); + break; + case SS_CONNECTING: + virtio_transport_recv_connecting(sk, pkt); + virtio_transport_free_pkt(pkt); + break; + case SS_CONNECTED: + virtio_transport_recv_connected(sk, pkt); + break; + case SS_DISCONNECTING: + virtio_transport_recv_disconnecting(sk, pkt); + virtio_transport_free_pkt(pkt); + break; + default: + virtio_transport_free_pkt(pkt); + break; + } + release_sock(sk); + + /* Release refcnt obtained when we fetched this socket out of the + * bound or connected list. + */ + sock_put(sk); + return; + +free_pkt: + virtio_transport_free_pkt(pkt); +} +EXPORT_SYMBOL_GPL(virtio_transport_recv_pkt); + +void virtio_transport_free_pkt(struct virtio_vsock_pkt *pkt) +{ + kfree(pkt->buf); + kfree(pkt); +} +EXPORT_SYMBOL_GPL(virtio_transport_free_pkt); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Asias He"); +MODULE_DESCRIPTION("common code for virtio vsock"); diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index 589c8b9908a5..102bf9194662 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -272,6 +272,31 @@ vmci_transport_send_control_pkt_bh(struct sockaddr_vm *src, false); } +static int +vmci_transport_alloc_send_control_pkt(struct sockaddr_vm *src, + struct sockaddr_vm *dst, + enum vmci_transport_packet_type type, + u64 size, + u64 mode, + struct vmci_transport_waiting_info *wait, + u16 proto, + struct vmci_handle handle) +{ + struct vmci_transport_packet *pkt; + int err; + + pkt = kmalloc(sizeof(*pkt), GFP_KERNEL); + if (!pkt) + return -ENOMEM; + + err = __vmci_transport_send_control_pkt(pkt, src, dst, type, size, + mode, wait, proto, handle, + true); + kfree(pkt); + + return err; +} + static int vmci_transport_send_control_pkt(struct sock *sk, enum vmci_transport_packet_type type, @@ -281,9 +306,7 @@ vmci_transport_send_control_pkt(struct sock *sk, u16 proto, struct vmci_handle handle) { - struct vmci_transport_packet *pkt; struct vsock_sock *vsk; - int err; vsk = vsock_sk(sk); @@ -293,17 +316,10 @@ vmci_transport_send_control_pkt(struct sock *sk, if (!vsock_addr_bound(&vsk->remote_addr)) return -EINVAL; - pkt = kmalloc(sizeof(*pkt), GFP_KERNEL); - if (!pkt) - return -ENOMEM; - - err = __vmci_transport_send_control_pkt(pkt, &vsk->local_addr, - &vsk->remote_addr, type, size, - mode, wait, proto, handle, - true); - kfree(pkt); - - return err; + return vmci_transport_alloc_send_control_pkt(&vsk->local_addr, + &vsk->remote_addr, + type, size, mode, + wait, proto, handle); } static int vmci_transport_send_reset_bh(struct sockaddr_vm *dst, @@ -321,12 +337,29 @@ static int vmci_transport_send_reset_bh(struct sockaddr_vm *dst, static int vmci_transport_send_reset(struct sock *sk, struct vmci_transport_packet *pkt) { + struct sockaddr_vm *dst_ptr; + struct sockaddr_vm dst; + struct vsock_sock *vsk; + if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST) return 0; - return vmci_transport_send_control_pkt(sk, - VMCI_TRANSPORT_PACKET_TYPE_RST, - 0, 0, NULL, VSOCK_PROTO_INVALID, - VMCI_INVALID_HANDLE); + + vsk = vsock_sk(sk); + + if (!vsock_addr_bound(&vsk->local_addr)) + return -EINVAL; + + if (vsock_addr_bound(&vsk->remote_addr)) { + dst_ptr = &vsk->remote_addr; + } else { + vsock_addr_init(&dst, pkt->dg.src.context, + pkt->src_port); + dst_ptr = &dst; + } + return vmci_transport_alloc_send_control_pkt(&vsk->local_addr, dst_ptr, + VMCI_TRANSPORT_PACKET_TYPE_RST, + 0, 0, NULL, VSOCK_PROTO_INVALID, + VMCI_INVALID_HANDLE); } static int vmci_transport_send_negotiate(struct sock *sk, size_t size) @@ -1623,6 +1656,10 @@ static void vmci_transport_cleanup(struct work_struct *work) static void vmci_transport_destruct(struct vsock_sock *vsk) { + /* transport can be NULL if we hit a failure at init() time */ + if (!vmci_trans(vsk)) + return; + /* Ensure that the detach callback doesn't use the sk/vsk * we are about to destruct. */ @@ -1643,6 +1680,8 @@ static void vmci_transport_destruct(struct vsock_sock *vsk) static void vmci_transport_release(struct vsock_sock *vsk) { + vsock_remove_sock(vsk); + if (!vmci_handle_is_invalid(vmci_trans(vsk)->dg_handle)) { vmci_datagram_destroy_handle(vmci_trans(vsk)->dg_handle); vmci_trans(vsk)->dg_handle = VMCI_INVALID_HANDLE; @@ -1734,11 +1773,8 @@ static int vmci_transport_dgram_dequeue(struct vsock_sock *vsk, /* Retrieve the head sk_buff from the socket's receive queue. */ err = 0; skb = skb_recv_datagram(&vsk->sk, flags, noblock, &err); - if (err) - return err; - if (!skb) - return -EAGAIN; + return err; dg = (struct vmci_datagram *)skb->data; if (!dg) @@ -2053,7 +2089,7 @@ static u32 vmci_transport_get_local_cid(void) return vmci_get_context_id(); } -static struct vsock_transport vmci_transport = { +static const struct vsock_transport vmci_transport = { .init = vmci_transport_socket_init, .destruct = vmci_transport_destruct, .release = vmci_transport_release, @@ -2153,7 +2189,7 @@ module_exit(vmci_transport_exit); MODULE_AUTHOR("VMware, Inc."); MODULE_DESCRIPTION("VMCI transport for Virtual Sockets"); -MODULE_VERSION("1.0.3.0-k"); +MODULE_VERSION("1.0.4.0-k"); MODULE_LICENSE("GPL v2"); MODULE_ALIAS("vmware_vsock"); MODULE_ALIAS_NETPROTO(PF_VSOCK); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 9efc0c1426ec..db65d94987c9 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -11721,7 +11721,7 @@ static void nl80211_send_mlme_event(struct cfg80211_registered_device *rdev, struct sk_buff *msg; void *hdr; - msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); + msg = nlmsg_new(100 + len, gfp); if (!msg) return; @@ -11873,7 +11873,7 @@ void nl80211_send_connect_result(struct cfg80211_registered_device *rdev, struct sk_buff *msg; void *hdr; - msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); + msg = nlmsg_new(100 + req_ie_len + resp_ie_len, gfp); if (!msg) return; @@ -11913,7 +11913,7 @@ void nl80211_send_roamed(struct cfg80211_registered_device *rdev, struct sk_buff *msg; void *hdr; - msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); + msg = nlmsg_new(100 + req_ie_len + resp_ie_len, gfp); if (!msg) return; @@ -11951,7 +11951,7 @@ void nl80211_send_disconnected(struct cfg80211_registered_device *rdev, struct sk_buff *msg; void *hdr; - msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + msg = nlmsg_new(100 + ie_len, GFP_KERNEL); if (!msg) return; @@ -12028,7 +12028,7 @@ void cfg80211_notify_new_peer_candidate(struct net_device *dev, const u8 *addr, trace_cfg80211_notify_new_peer_candidate(dev, addr); - msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); + msg = nlmsg_new(100 + ie_len, gfp); if (!msg) return; @@ -12397,7 +12397,7 @@ int nl80211_send_mgmt(struct cfg80211_registered_device *rdev, struct sk_buff *msg; void *hdr; - msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); + msg = nlmsg_new(100 + len, gfp); if (!msg) return -ENOMEM; @@ -12440,7 +12440,7 @@ void cfg80211_mgmt_tx_status(struct wireless_dev *wdev, u64 cookie, trace_cfg80211_mgmt_tx_status(wdev, cookie, ack); - msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); + msg = nlmsg_new(100 + len, gfp); if (!msg) return; @@ -13244,7 +13244,7 @@ void cfg80211_ft_event(struct net_device *netdev, if (!ft_event->target_ap) return; - msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + msg = nlmsg_new(100 + ft_event->ric_ies_len, GFP_KERNEL); if (!msg) return; diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 50dffd183cc6..429abf421906 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -780,7 +780,7 @@ static bool reg_does_bw_fit(const struct ieee80211_freq_range *freq_range, * definitions (the "2.4 GHz band", the "5 GHz band" and the "60GHz band"), * however it is safe for now to assume that a frequency rule should not be * part of a frequency's band if the start freq or end freq are off by more - * than 2 GHz for the 2.4 and 5 GHz bands, and by more than 10 GHz for the + * than 2 GHz for the 2.4 and 5 GHz bands, and by more than 20 GHz for the * 60 GHz band. * This resolution can be lowered and should be considered as we add * regulatory rule support for other "bands". @@ -795,7 +795,7 @@ static bool freq_in_rule_band(const struct ieee80211_freq_range *freq_range, * with the Channel starting frequency above 45 GHz. */ u32 limit = freq_khz > 45 * ONE_GHZ_IN_KHZ ? - 10 * ONE_GHZ_IN_KHZ : 2 * ONE_GHZ_IN_KHZ; + 20 * ONE_GHZ_IN_KHZ : 2 * ONE_GHZ_IN_KHZ; if (abs(freq_khz - freq_range->start_freq_khz) <= limit) return true; if (abs(freq_khz - freq_range->end_freq_khz) <= limit) diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index c6ab4da4b8e2..5dca42dbc737 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -352,17 +352,15 @@ static unsigned int x25_new_lci(struct x25_neigh *nb) unsigned int lci = 1; struct sock *sk; - read_lock_bh(&x25_list_lock); - - while ((sk = __x25_find_socket(lci, nb)) != NULL) { + while ((sk = x25_find_socket(lci, nb)) != NULL) { sock_put(sk); if (++lci == 4096) { lci = 0; break; } + cond_resched(); } - read_unlock_bh(&x25_list_lock); return lci; } @@ -680,8 +678,7 @@ static int x25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) struct sockaddr_x25 *addr = (struct sockaddr_x25 *)uaddr; int len, i, rc = 0; - if (!sock_flag(sk, SOCK_ZAPPED) || - addr_len != sizeof(struct sockaddr_x25) || + if (addr_len != sizeof(struct sockaddr_x25) || addr->sx25_family != AF_X25) { rc = -EINVAL; goto out; @@ -696,9 +693,13 @@ static int x25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) } lock_sock(sk); - x25_sk(sk)->source_addr = addr->sx25_addr; - x25_insert_socket(sk); - sock_reset_flag(sk, SOCK_ZAPPED); + if (sock_flag(sk, SOCK_ZAPPED)) { + x25_sk(sk)->source_addr = addr->sx25_addr; + x25_insert_socket(sk); + sock_reset_flag(sk, SOCK_ZAPPED); + } else { + rc = -EINVAL; + } release_sock(sk); SOCK_DEBUG(sk, "x25_bind: socket is bound\n"); out: @@ -814,8 +815,13 @@ static int x25_connect(struct socket *sock, struct sockaddr *uaddr, sock->state = SS_CONNECTED; rc = 0; out_put_neigh: - if (rc) + if (rc) { + read_lock_bh(&x25_list_lock); x25_neigh_put(x25->neighbour); + x25->neighbour = NULL; + read_unlock_bh(&x25_list_lock); + x25->state = X25_STATE_0; + } out_put_route: x25_route_put(rt); out: diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 99e013f3a88d..2e48d0b1ddd0 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -623,7 +623,7 @@ void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si) { spin_lock_bh(&net->xfrm.xfrm_state_lock); si->sadcnt = net->xfrm.state_num; - si->sadhcnt = net->xfrm.state_hmask; + si->sadhcnt = net->xfrm.state_hmask + 1; si->sadhmcnt = xfrm_state_hashmax; spin_unlock_bh(&net->xfrm.xfrm_state_lock); } diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 634b4f85090a..9d57500bbbbc 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1412,10 +1412,15 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family) if (!ut[i].family) ut[i].family = family; - if ((ut[i].mode == XFRM_MODE_TRANSPORT) && - (ut[i].family != prev_family)) - return -EINVAL; - + switch (ut[i].mode) { + case XFRM_MODE_TUNNEL: + case XFRM_MODE_BEET: + break; + default: + if (ut[i].family != prev_family) + return -EINVAL; + break; + } if (ut[i].mode >= XFRM_MODE_MAX) return -EINVAL; diff --git a/scripts/checkstack.pl b/scripts/checkstack.pl index dd8397894d5c..12a6940741fe 100755 --- a/scripts/checkstack.pl +++ b/scripts/checkstack.pl @@ -46,8 +46,8 @@ my (@stack, $re, $dre, $x, $xs, $funcre); $xs = "[0-9a-f ]"; # hex character or space $funcre = qr/^$x* <(.*)>:$/; if ($arch eq 'aarch64') { - #ffffffc0006325cc: a9bb7bfd stp x29, x30, [sp,#-80]! - $re = qr/^.*stp.*sp,\#-([0-9]{1,8})\]\!/o; + #ffffffc0006325cc: a9bb7bfd stp x29, x30, [sp, #-80]! + $re = qr/^.*stp.*sp, \#-([0-9]{1,8})\]\!/o; } elsif ($arch eq 'arm') { #c0008ffc: e24dd064 sub sp, sp, #100 ; 0x64 $re = qr/.*sub.*sp, sp, #(([0-9]{2}|[3-9])[0-9]{2})/o; diff --git a/scripts/decode_stacktrace.sh b/scripts/decode_stacktrace.sh index 00d6d53c2681..ffc46c7c3afb 100755 --- a/scripts/decode_stacktrace.sh +++ b/scripts/decode_stacktrace.sh @@ -64,7 +64,7 @@ parse_symbol() { fi # Strip out the base of the path - code=${code//$basepath/""} + code=${code//^$basepath/""} # In the case of inlines, move everything to same line code=${code//$'\n'/' '} diff --git a/scripts/kconfig/zconf.l b/scripts/kconfig/zconf.l index c410d257da06..0c7800112ff5 100644 --- a/scripts/kconfig/zconf.l +++ b/scripts/kconfig/zconf.l @@ -71,7 +71,7 @@ static void warn_ignored_character(char chr) { fprintf(stderr, "%s:%d:warning: ignoring unsupported character '%c'\n", - zconf_curname(), zconf_lineno(), chr); + current_file->name, yylineno, chr); } %} @@ -191,6 +191,8 @@ n [A-Za-z0-9_-] } <> { BEGIN(INITIAL); + yylval.string = text; + return T_WORD_QUOTE; } } diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c index 9f5cdd49ff0b..4250d3d6f391 100644 --- a/scripts/mod/file2alias.c +++ b/scripts/mod/file2alias.c @@ -47,49 +47,9 @@ typedef struct { struct devtable { const char *device_id; /* name of table, __mod___*_device_table. */ unsigned long id_size; - void *function; + int (*do_entry)(const char *filename, void *symval, char *alias); }; -#define ___cat(a,b) a ## b -#define __cat(a,b) ___cat(a,b) - -/* we need some special handling for this host tool running eventually on - * Darwin. The Mach-O section handling is a bit different than ELF section - * handling. The differnces in detail are: - * a) we have segments which have sections - * b) we need a API call to get the respective section symbols */ -#if defined(__MACH__) -#include - -#define INIT_SECTION(name) do { \ - unsigned long name ## _len; \ - char *__cat(pstart_,name) = getsectdata("__TEXT", \ - #name, &__cat(name,_len)); \ - char *__cat(pstop_,name) = __cat(pstart_,name) + \ - __cat(name, _len); \ - __cat(__start_,name) = (void *)__cat(pstart_,name); \ - __cat(__stop_,name) = (void *)__cat(pstop_,name); \ - } while (0) -#define SECTION(name) __attribute__((section("__TEXT, " #name))) - -struct devtable **__start___devtable, **__stop___devtable; -#else -#define INIT_SECTION(name) /* no-op for ELF */ -#define SECTION(name) __attribute__((section(#name))) - -/* We construct a table of pointers in an ELF section (pointers generally - * go unpadded by gcc). ld creates boundary syms for us. */ -extern struct devtable *__start___devtable[], *__stop___devtable[]; -#endif /* __MACH__ */ - -#if !defined(__used) -# if __GNUC__ == 3 && __GNUC_MINOR__ < 3 -# define __used __attribute__((__unused__)) -# else -# define __used __attribute__((__used__)) -# endif -#endif - /* Define a variable f that holds the value of field f of struct devid * based at address m. */ @@ -102,16 +62,6 @@ extern struct devtable *__start___devtable[], *__stop___devtable[]; #define DEF_FIELD_ADDR(m, devid, f) \ typeof(((struct devid *)0)->f) *f = ((m) + OFF_##devid##_##f) -/* Add a table entry. We test function type matches while we're here. */ -#define ADD_TO_DEVTABLE(device_id, type, function) \ - static struct devtable __cat(devtable,__LINE__) = { \ - device_id + 0*sizeof((function)((const char *)NULL, \ - (void *)NULL, \ - (char *)NULL)), \ - SIZE_##type, (function) }; \ - static struct devtable *SECTION(__devtable) __used \ - __cat(devtable_ptr,__LINE__) = &__cat(devtable,__LINE__) - #define ADD(str, sep, cond, field) \ do { \ strcat(str, sep); \ @@ -388,7 +338,6 @@ static int do_hid_entry(const char *filename, return 1; } -ADD_TO_DEVTABLE("hid", hid_device_id, do_hid_entry); /* Looks like: ieee1394:venNmoNspNverN */ static int do_ieee1394_entry(const char *filename, @@ -413,7 +362,6 @@ static int do_ieee1394_entry(const char *filename, add_wildcard(alias); return 1; } -ADD_TO_DEVTABLE("ieee1394", ieee1394_device_id, do_ieee1394_entry); /* Looks like: pci:vNdNsvNsdNbcNscNiN. */ static int do_pci_entry(const char *filename, @@ -457,7 +405,6 @@ static int do_pci_entry(const char *filename, add_wildcard(alias); return 1; } -ADD_TO_DEVTABLE("pci", pci_device_id, do_pci_entry); /* looks like: "ccw:tNmNdtNdmN" */ static int do_ccw_entry(const char *filename, @@ -481,7 +428,6 @@ static int do_ccw_entry(const char *filename, add_wildcard(alias); return 1; } -ADD_TO_DEVTABLE("ccw", ccw_device_id, do_ccw_entry); /* looks like: "ap:tN" */ static int do_ap_entry(const char *filename, @@ -492,7 +438,6 @@ static int do_ap_entry(const char *filename, sprintf(alias, "ap:t%02X*", dev_type); return 1; } -ADD_TO_DEVTABLE("ap", ap_device_id, do_ap_entry); /* looks like: "css:tN" */ static int do_css_entry(const char *filename, @@ -503,7 +448,6 @@ static int do_css_entry(const char *filename, sprintf(alias, "css:t%01X", type); return 1; } -ADD_TO_DEVTABLE("css", css_device_id, do_css_entry); /* Looks like: "serio:tyNprNidNexN" */ static int do_serio_entry(const char *filename, @@ -523,7 +467,6 @@ static int do_serio_entry(const char *filename, add_wildcard(alias); return 1; } -ADD_TO_DEVTABLE("serio", serio_device_id, do_serio_entry); /* looks like: "acpi:ACPI0003" or "acpi:PNP0C0B" or "acpi:LNXVIDEO" or * "acpi:bbsspp" (bb=base-class, ss=sub-class, pp=prog-if) @@ -561,7 +504,6 @@ static int do_acpi_entry(const char *filename, } return 1; } -ADD_TO_DEVTABLE("acpi", acpi_device_id, do_acpi_entry); /* looks like: "pnp:dD" */ static void do_pnp_device_entry(void *symval, unsigned long size, @@ -682,7 +624,6 @@ static int do_pcmcia_entry(const char *filename, add_wildcard(alias); return 1; } -ADD_TO_DEVTABLE("pcmcia", pcmcia_device_id, do_pcmcia_entry); static int do_of_entry (const char *filename, void *symval, char *alias) { @@ -707,7 +648,6 @@ static int do_of_entry (const char *filename, void *symval, char *alias) add_wildcard(alias); return 1; } -ADD_TO_DEVTABLE("of", of_device_id, do_of_entry); static int do_vio_entry(const char *filename, void *symval, char *alias) @@ -727,7 +667,6 @@ static int do_vio_entry(const char *filename, void *symval, add_wildcard(alias); return 1; } -ADD_TO_DEVTABLE("vio", vio_device_id, do_vio_entry); #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) @@ -800,7 +739,6 @@ static int do_input_entry(const char *filename, void *symval, do_input(alias, *swbit, 0, INPUT_DEVICE_ID_SW_MAX); return 1; } -ADD_TO_DEVTABLE("input", input_device_id, do_input_entry); static int do_eisa_entry(const char *filename, void *symval, char *alias) @@ -812,7 +750,6 @@ static int do_eisa_entry(const char *filename, void *symval, strcat(alias, "*"); return 1; } -ADD_TO_DEVTABLE("eisa", eisa_device_id, do_eisa_entry); /* Looks like: parisc:tNhvNrevNsvN */ static int do_parisc_entry(const char *filename, void *symval, @@ -832,7 +769,6 @@ static int do_parisc_entry(const char *filename, void *symval, add_wildcard(alias); return 1; } -ADD_TO_DEVTABLE("parisc", parisc_device_id, do_parisc_entry); /* Looks like: sdio:cNvNdN. */ static int do_sdio_entry(const char *filename, @@ -849,7 +785,6 @@ static int do_sdio_entry(const char *filename, add_wildcard(alias); return 1; } -ADD_TO_DEVTABLE("sdio", sdio_device_id, do_sdio_entry); /* Looks like: ssb:vNidNrevN. */ static int do_ssb_entry(const char *filename, @@ -866,7 +801,6 @@ static int do_ssb_entry(const char *filename, add_wildcard(alias); return 1; } -ADD_TO_DEVTABLE("ssb", ssb_device_id, do_ssb_entry); /* Looks like: bcma:mNidNrevNclN. */ static int do_bcma_entry(const char *filename, @@ -885,7 +819,6 @@ static int do_bcma_entry(const char *filename, add_wildcard(alias); return 1; } -ADD_TO_DEVTABLE("bcma", bcma_device_id, do_bcma_entry); /* Looks like: virtio:dNvN */ static int do_virtio_entry(const char *filename, void *symval, @@ -901,7 +834,6 @@ static int do_virtio_entry(const char *filename, void *symval, add_wildcard(alias); return 1; } -ADD_TO_DEVTABLE("virtio", virtio_device_id, do_virtio_entry); /* * Looks like: vmbus:guid @@ -924,7 +856,6 @@ static int do_vmbus_entry(const char *filename, void *symval, return 1; } -ADD_TO_DEVTABLE("vmbus", hv_vmbus_device_id, do_vmbus_entry); /* Looks like: i2c:S */ static int do_i2c_entry(const char *filename, void *symval, @@ -935,7 +866,6 @@ static int do_i2c_entry(const char *filename, void *symval, return 1; } -ADD_TO_DEVTABLE("i2c", i2c_device_id, do_i2c_entry); /* Looks like: spi:S */ static int do_spi_entry(const char *filename, void *symval, @@ -946,7 +876,6 @@ static int do_spi_entry(const char *filename, void *symval, return 1; } -ADD_TO_DEVTABLE("spi", spi_device_id, do_spi_entry); static const struct dmifield { const char *prefix; @@ -1001,7 +930,6 @@ static int do_dmi_entry(const char *filename, void *symval, strcat(alias, ":"); return 1; } -ADD_TO_DEVTABLE("dmi", dmi_system_id, do_dmi_entry); static int do_platform_entry(const char *filename, void *symval, char *alias) @@ -1010,7 +938,6 @@ static int do_platform_entry(const char *filename, sprintf(alias, PLATFORM_MODULE_PREFIX "%s", *name); return 1; } -ADD_TO_DEVTABLE("platform", platform_device_id, do_platform_entry); static int do_mdio_entry(const char *filename, void *symval, char *alias) @@ -1035,7 +962,6 @@ static int do_mdio_entry(const char *filename, return 1; } -ADD_TO_DEVTABLE("mdio", mdio_device_id, do_mdio_entry); /* Looks like: zorro:iN. */ static int do_zorro_entry(const char *filename, void *symval, @@ -1046,7 +972,6 @@ static int do_zorro_entry(const char *filename, void *symval, ADD(alias, "i", id != ZORRO_WILDCARD, id); return 1; } -ADD_TO_DEVTABLE("zorro", zorro_device_id, do_zorro_entry); /* looks like: "pnp:dD" */ static int do_isapnp_entry(const char *filename, @@ -1062,7 +987,6 @@ static int do_isapnp_entry(const char *filename, (function >> 12) & 0x0f, (function >> 8) & 0x0f); return 1; } -ADD_TO_DEVTABLE("isapnp", isapnp_device_id, do_isapnp_entry); /* Looks like: "ipack:fNvNdN". */ static int do_ipack_entry(const char *filename, @@ -1078,7 +1002,6 @@ static int do_ipack_entry(const char *filename, add_wildcard(alias); return 1; } -ADD_TO_DEVTABLE("ipack", ipack_device_id, do_ipack_entry); /* * Append a match expression for a single masked hex digit. @@ -1149,7 +1072,6 @@ static int do_amba_entry(const char *filename, return 1; } -ADD_TO_DEVTABLE("amba", amba_id, do_amba_entry); /* * looks like: "mipscdmm:tN" @@ -1165,7 +1087,6 @@ static int do_mips_cdmm_entry(const char *filename, sprintf(alias, "mipscdmm:t%02X*", type); return 1; } -ADD_TO_DEVTABLE("mipscdmm", mips_cdmm_device_id, do_mips_cdmm_entry); /* LOOKS like cpu:type:x86,venVVVVfamFFFFmodMMMM:feature:*,FEAT,* * All fields are numbers. It would be nicer to use strings for vendor @@ -1190,7 +1111,6 @@ static int do_x86cpu_entry(const char *filename, void *symval, sprintf(alias + strlen(alias), "%04X*", feature); return 1; } -ADD_TO_DEVTABLE("x86cpu", x86_cpu_id, do_x86cpu_entry); /* LOOKS like cpu:type:*:feature:*FEAT* */ static int do_cpu_entry(const char *filename, void *symval, char *alias) @@ -1200,7 +1120,6 @@ static int do_cpu_entry(const char *filename, void *symval, char *alias) sprintf(alias, "cpu:type:*:feature:*%04X*", feature); return 1; } -ADD_TO_DEVTABLE("cpu", cpu_feature, do_cpu_entry); /* Looks like: mei:S:uuid:N:* */ static int do_mei_entry(const char *filename, void *symval, @@ -1219,7 +1138,6 @@ static int do_mei_entry(const char *filename, void *symval, return 1; } -ADD_TO_DEVTABLE("mei", mei_cl_device_id, do_mei_entry); /* Looks like: rapidio:vNdNavNadN */ static int do_rio_entry(const char *filename, @@ -1239,7 +1157,6 @@ static int do_rio_entry(const char *filename, add_wildcard(alias); return 1; } -ADD_TO_DEVTABLE("rapidio", rio_device_id, do_rio_entry); /* Looks like: ulpi:vNpN */ static int do_ulpi_entry(const char *filename, void *symval, @@ -1252,7 +1169,6 @@ static int do_ulpi_entry(const char *filename, void *symval, return 1; } -ADD_TO_DEVTABLE("ulpi", ulpi_device_id, do_ulpi_entry); /* Looks like: hdaudio:vNrNaN */ static int do_hda_entry(const char *filename, void *symval, char *alias) @@ -1269,7 +1185,6 @@ static int do_hda_entry(const char *filename, void *symval, char *alias) add_wildcard(alias); return 1; } -ADD_TO_DEVTABLE("hdaudio", hda_device_id, do_hda_entry); /* Does namelen bytes of name exactly match the symbol? */ static bool sym_is(const char *name, unsigned namelen, const char *symbol) @@ -1283,12 +1198,11 @@ static bool sym_is(const char *name, unsigned namelen, const char *symbol) static void do_table(void *symval, unsigned long size, unsigned long id_size, const char *device_id, - void *function, + int (*do_entry)(const char *filename, void *symval, char *alias), struct module *mod) { unsigned int i; char alias[500]; - int (*do_entry)(const char *, void *entry, char *alias) = function; device_id_check(mod->name, device_id, size, id_size, symval); /* Leave last one: it's the terminator. */ @@ -1302,6 +1216,44 @@ static void do_table(void *symval, unsigned long size, } } +static const struct devtable devtable[] = { + {"hid", SIZE_hid_device_id, do_hid_entry}, + {"ieee1394", SIZE_ieee1394_device_id, do_ieee1394_entry}, + {"pci", SIZE_pci_device_id, do_pci_entry}, + {"ccw", SIZE_ccw_device_id, do_ccw_entry}, + {"ap", SIZE_ap_device_id, do_ap_entry}, + {"css", SIZE_css_device_id, do_css_entry}, + {"serio", SIZE_serio_device_id, do_serio_entry}, + {"acpi", SIZE_acpi_device_id, do_acpi_entry}, + {"pcmcia", SIZE_pcmcia_device_id, do_pcmcia_entry}, + {"vio", SIZE_vio_device_id, do_vio_entry}, + {"input", SIZE_input_device_id, do_input_entry}, + {"eisa", SIZE_eisa_device_id, do_eisa_entry}, + {"parisc", SIZE_parisc_device_id, do_parisc_entry}, + {"sdio", SIZE_sdio_device_id, do_sdio_entry}, + {"ssb", SIZE_ssb_device_id, do_ssb_entry}, + {"bcma", SIZE_bcma_device_id, do_bcma_entry}, + {"virtio", SIZE_virtio_device_id, do_virtio_entry}, + {"vmbus", SIZE_hv_vmbus_device_id, do_vmbus_entry}, + {"i2c", SIZE_i2c_device_id, do_i2c_entry}, + {"spi", SIZE_spi_device_id, do_spi_entry}, + {"dmi", SIZE_dmi_system_id, do_dmi_entry}, + {"platform", SIZE_platform_device_id, do_platform_entry}, + {"mdio", SIZE_mdio_device_id, do_mdio_entry}, + {"zorro", SIZE_zorro_device_id, do_zorro_entry}, + {"isapnp", SIZE_isapnp_device_id, do_isapnp_entry}, + {"ipack", SIZE_ipack_device_id, do_ipack_entry}, + {"amba", SIZE_amba_id, do_amba_entry}, + {"mipscdmm", SIZE_mips_cdmm_device_id, do_mips_cdmm_entry}, + {"x86cpu", SIZE_x86_cpu_id, do_x86cpu_entry}, + {"cpu", SIZE_cpu_feature, do_cpu_entry}, + {"mei", SIZE_mei_cl_device_id, do_mei_entry}, + {"rapidio", SIZE_rio_device_id, do_rio_entry}, + {"ulpi", SIZE_ulpi_device_id, do_ulpi_entry}, + {"hdaudio", SIZE_hda_device_id, do_hda_entry}, + {"of", SIZE_of_device_id, do_of_entry}, +}; + /* Create MODULE_ALIAS() statements. * At this time, we cannot write the actual output C source yet, * so we write into the mod->dev_table_buf buffer. */ @@ -1354,13 +1306,14 @@ void handle_moddevtable(struct module *mod, struct elf_info *info, else if (sym_is(name, namelen, "pnp_card")) do_pnp_card_entries(symval, sym->st_size, mod); else { - struct devtable **p; - INIT_SECTION(__devtable); + int i; - for (p = __start___devtable; p < __stop___devtable; p++) { - if (sym_is(name, namelen, (*p)->device_id)) { - do_table(symval, sym->st_size, (*p)->id_size, - (*p)->device_id, (*p)->function, mod); + for (i = 0; i < ARRAY_SIZE(devtable); i++) { + const struct devtable *p = &devtable[i]; + + if (sym_is(name, namelen, p->device_id)) { + do_table(symval, sym->st_size, p->id_size, + p->device_id, p->do_entry, mod); break; } } diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 064fbfbbb22c..81b1c02a76fa 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -1197,6 +1197,30 @@ static int secref_whitelist(const struct sectioncheck *mismatch, return 1; } +static inline int is_arm_mapping_symbol(const char *str) +{ + return str[0] == '$' && strchr("axtd", str[1]) + && (str[2] == '\0' || str[2] == '.'); +} + +/* + * If there's no name there, ignore it; likewise, ignore it if it's + * one of the magic symbols emitted used by current ARM tools. + * + * Otherwise if find_symbols_between() returns those symbols, they'll + * fail the whitelist tests and cause lots of false alarms ... fixable + * only by merging __exit and __init sections into __text, bloating + * the kernel (which is especially evil on embedded platforms). + */ +static inline int is_valid_name(struct elf_info *elf, Elf_Sym *sym) +{ + const char *name = elf->strtab + sym->st_name; + + if (!name || !strlen(name)) + return 0; + return !is_arm_mapping_symbol(name); +} + /** * Find symbol based on relocation record info. * In some cases the symbol supplied is a valid symbol so @@ -1222,6 +1246,8 @@ static Elf_Sym *find_elf_symbol(struct elf_info *elf, Elf64_Sword addr, continue; if (ELF_ST_TYPE(sym->st_info) == STT_SECTION) continue; + if (!is_valid_name(elf, sym)) + continue; if (sym->st_value == addr) return sym; /* Find a symbol nearby - addr are maybe negative */ @@ -1240,30 +1266,6 @@ static Elf_Sym *find_elf_symbol(struct elf_info *elf, Elf64_Sword addr, return NULL; } -static inline int is_arm_mapping_symbol(const char *str) -{ - return str[0] == '$' && strchr("axtd", str[1]) - && (str[2] == '\0' || str[2] == '.'); -} - -/* - * If there's no name there, ignore it; likewise, ignore it if it's - * one of the magic symbols emitted used by current ARM tools. - * - * Otherwise if find_symbols_between() returns those symbols, they'll - * fail the whitelist tests and cause lots of false alarms ... fixable - * only by merging __exit and __init sections into __text, bloating - * the kernel (which is especially evil on embedded platforms). - */ -static inline int is_valid_name(struct elf_info *elf, Elf_Sym *sym) -{ - const char *name = elf->strtab + sym->st_name; - - if (!name || !strlen(name)) - return 0; - return !is_arm_mapping_symbol(name); -} - /* * Find symbols before or equal addr and after addr - in the section sec. * If we find two symbols with equal offset prefer one with a valid name. diff --git a/scripts/setlocalversion b/scripts/setlocalversion index 63d91e22ed7c..966dd3924ea9 100755 --- a/scripts/setlocalversion +++ b/scripts/setlocalversion @@ -143,7 +143,7 @@ fi if test -e include/config/auto.conf; then . include/config/auto.conf else - echo "Error: kernelrelease not valid - run 'make prepare' to update it" + echo "Error: kernelrelease not valid - run 'make prepare' to update it" >&2 exit 1 fi diff --git a/security/device_cgroup.c b/security/device_cgroup.c index 03c1652c9a1f..db3bdc91c520 100644 --- a/security/device_cgroup.c +++ b/security/device_cgroup.c @@ -568,7 +568,7 @@ static int propagate_exception(struct dev_cgroup *devcg_root, devcg->behavior == DEVCG_DEFAULT_ALLOW) { rc = dev_exception_add(devcg, ex); if (rc) - break; + return rc; } else { /* * in the other possible cases: diff --git a/security/keys/key.c b/security/keys/key.c index 4d971bf88ac3..03160f1f1aa2 100644 --- a/security/keys/key.c +++ b/security/keys/key.c @@ -260,8 +260,8 @@ struct key *key_alloc(struct key_type *type, const char *desc, spin_lock(&user->lock); if (!(flags & KEY_ALLOC_QUOTA_OVERRUN)) { - if (user->qnkeys + 1 >= maxkeys || - user->qnbytes + quotalen >= maxbytes || + if (user->qnkeys + 1 > maxkeys || + user->qnbytes + quotalen > maxbytes || user->qnbytes + quotalen < user->qnbytes) goto no_quota; } diff --git a/security/keys/keyring.c b/security/keys/keyring.c index d5264f950ce1..737e60b3d4bd 100644 --- a/security/keys/keyring.c +++ b/security/keys/keyring.c @@ -628,9 +628,6 @@ static bool search_nested_keyrings(struct key *keyring, BUG_ON((ctx->flags & STATE_CHECKS) == 0 || (ctx->flags & STATE_CHECKS) == STATE_CHECKS); - if (ctx->index_key.description) - ctx->index_key.desc_len = strlen(ctx->index_key.description); - /* Check to see if this top-level keyring is what we are looking for * and whether it is valid or not. */ @@ -888,6 +885,7 @@ key_ref_t keyring_search(key_ref_t keyring, struct keyring_search_context ctx = { .index_key.type = type, .index_key.description = description, + .index_key.desc_len = strlen(description), .cred = current_cred(), .match_data.cmp = key_default_cmp, .match_data.raw_data = description, diff --git a/security/keys/proc.c b/security/keys/proc.c index 036128682463..f2c7e090a66d 100644 --- a/security/keys/proc.c +++ b/security/keys/proc.c @@ -186,9 +186,8 @@ static int proc_keys_show(struct seq_file *m, void *v) int rc; struct keyring_search_context ctx = { - .index_key.type = key->type, - .index_key.description = key->description, - .cred = current_cred(), + .index_key = key->index_key, + .cred = m->file->f_cred, .match_data.cmp = lookup_user_key_possessed, .match_data.raw_data = key, .match_data.lookup_type = KEYRING_SEARCH_LOOKUP_DIRECT, @@ -208,11 +207,7 @@ static int proc_keys_show(struct seq_file *m, void *v) } } - /* check whether the current task is allowed to view the key (assuming - * non-possession) - * - the caller holds a spinlock, and thus the RCU read lock, making our - * access to __current_cred() safe - */ + /* check whether the current task is allowed to view the key */ rc = key_task_permission(key_ref, ctx.cred, KEY_NEED_VIEW); if (rc < 0) return 0; diff --git a/security/keys/request_key.c b/security/keys/request_key.c index 3ae3acf473c8..88172c163953 100644 --- a/security/keys/request_key.c +++ b/security/keys/request_key.c @@ -544,6 +544,7 @@ struct key *request_key_and_link(struct key_type *type, struct keyring_search_context ctx = { .index_key.type = type, .index_key.description = description, + .index_key.desc_len = strlen(description), .cred = current_cred(), .match_data.cmp = key_default_cmp, .match_data.raw_data = description, diff --git a/security/keys/request_key_auth.c b/security/keys/request_key_auth.c index 217775fcd0f3..8882b729924d 100644 --- a/security/keys/request_key_auth.c +++ b/security/keys/request_key_auth.c @@ -254,7 +254,7 @@ struct key *key_get_instantiation_authkey(key_serial_t target_id) struct key *authkey; key_ref_t authkey_ref; - sprintf(description, "%x", target_id); + ctx.index_key.desc_len = sprintf(description, "%x", target_id); authkey_ref = search_process_keyrings(&ctx); diff --git a/security/lsm_audit.c b/security/lsm_audit.c index 45d927ab807d..d0b74c12d56d 100644 --- a/security/lsm_audit.c +++ b/security/lsm_audit.c @@ -308,6 +308,7 @@ static void dump_common_audit_data(struct audit_buffer *ab, if (a->u.net->sk) { struct sock *sk = a->u.net->sk; struct unix_sock *u; + struct unix_address *addr; int len = 0; char *p = NULL; @@ -338,14 +339,15 @@ static void dump_common_audit_data(struct audit_buffer *ab, #endif case AF_UNIX: u = unix_sk(sk); + addr = smp_load_acquire(&u->addr); + if (!addr) + break; if (u->path.dentry) { audit_log_d_path(ab, " path=", &u->path); break; } - if (!u->addr) - break; - len = u->addr->len-sizeof(short); - p = &u->addr->name->sun_path[0]; + len = addr->len-sizeof(short); + p = &addr->name->sun_path[0]; audit_log_format(ab, " path="); if (*p) audit_log_untrustedstring(ab, p); diff --git a/security/security.c b/security/security.c index ae05ab153c5a..42c4cb0cb122 100644 --- a/security/security.c +++ b/security/security.c @@ -862,6 +862,13 @@ int security_cred_alloc_blank(struct cred *cred, gfp_t gfp) void security_cred_free(struct cred *cred) { + /* + * There is a failure case in prepare_creds() that + * may result in a call here with ->security being NULL. + */ + if (unlikely(cred->security == NULL)) + return; + call_void_hook(cred_free, cred); } diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c index 992a31530825..965a55eacaba 100644 --- a/security/selinux/ss/policydb.c +++ b/security/selinux/ss/policydb.c @@ -726,7 +726,8 @@ static int sens_destroy(void *key, void *datum, void *p) kfree(key); if (datum) { levdatum = datum; - ebitmap_destroy(&levdatum->level->cat); + if (levdatum->level) + ebitmap_destroy(&levdatum->level->cat); kfree(levdatum->level); } kfree(datum); diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index c73361859d11..9db7c80a74aa 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -4311,6 +4311,12 @@ static int smack_key_permission(key_ref_t key_ref, int request = 0; int rc; + /* + * Validate requested permissions + */ + if (perm & ~KEY_NEED_ALL) + return -EINVAL; + keyp = key_ref_to_ptr(key_ref); if (keyp == NULL) return -EINVAL; @@ -4330,10 +4336,10 @@ static int smack_key_permission(key_ref_t key_ref, ad.a.u.key_struct.key = keyp->serial; ad.a.u.key_struct.key_desc = keyp->description; #endif - if (perm & KEY_NEED_READ) - request = MAY_READ; + if (perm & (KEY_NEED_READ | KEY_NEED_SEARCH | KEY_NEED_VIEW)) + request |= MAY_READ; if (perm & (KEY_NEED_WRITE | KEY_NEED_LINK | KEY_NEED_SETATTR)) - request = MAY_WRITE; + request |= MAY_WRITE; rc = smk_access(tkp, keyp->security, request, &ad); rc = smk_bu_note("key access", tkp, keyp->security, request, rc); return rc; diff --git a/security/tomoyo/domain.c b/security/tomoyo/domain.c index 38651454ed08..6f388e77999c 100644 --- a/security/tomoyo/domain.c +++ b/security/tomoyo/domain.c @@ -874,7 +874,8 @@ bool tomoyo_dump_page(struct linux_binprm *bprm, unsigned long pos, } /* Same with get_arg_page(bprm, pos, 0) in fs/exec.c */ #ifdef CONFIG_MMU - if (get_user_pages(current, bprm->mm, pos, 1, 0, 1, &page, NULL) <= 0) + if (get_user_pages(current, bprm->mm, pos, 1, + FOLL_FORCE, &page, NULL) <= 0) return false; #else page = bprm->page[pos / PAGE_SIZE]; diff --git a/security/yama/yama_lsm.c b/security/yama/yama_lsm.c index cb6ed10816d4..0a8808954bd8 100644 --- a/security/yama/yama_lsm.c +++ b/security/yama/yama_lsm.c @@ -288,7 +288,9 @@ static int yama_ptrace_access_check(struct task_struct *child, break; case YAMA_SCOPE_RELATIONAL: rcu_read_lock(); - if (!task_is_descendant(current, child) && + if (!pid_alive(child)) + rc = -EPERM; + if (!rc && !task_is_descendant(current, child) && !ptracer_exception_found(current, child) && !ns_capable(__task_cred(child)->user_ns, CAP_SYS_PTRACE)) rc = -EPERM; diff --git a/sound/core/compress_offload.c b/sound/core/compress_offload.c index 6163bf3e8177..3c88a3384064 100644 --- a/sound/core/compress_offload.c +++ b/sound/core/compress_offload.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -500,7 +501,8 @@ static int snd_compress_check_input(struct snd_compr_params *params) { /* first let's check the buffer parameter's */ if (params->buffer.fragment_size == 0 || - params->buffer.fragments > INT_MAX / params->buffer.fragment_size) + params->buffer.fragments > INT_MAX / params->buffer.fragment_size || + params->buffer.fragments == 0) return -EINVAL; /* now codec parameters */ @@ -858,6 +860,15 @@ static long snd_compr_ioctl(struct file *f, unsigned int cmd, unsigned long arg) return retval; } +/* support of 32bit userspace on 64bit platforms */ +#ifdef CONFIG_COMPAT +static long snd_compr_ioctl_compat(struct file *file, unsigned int cmd, + unsigned long arg) +{ + return snd_compr_ioctl(file, cmd, (unsigned long)compat_ptr(arg)); +} +#endif + static const struct file_operations snd_compr_file_ops = { .owner = THIS_MODULE, .open = snd_compr_open, @@ -865,6 +876,9 @@ static const struct file_operations snd_compr_file_ops = { .write = snd_compr_write, .read = snd_compr_read, .unlocked_ioctl = snd_compr_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = snd_compr_ioctl_compat, +#endif .mmap = snd_compr_mmap, .poll = snd_compr_poll, }; diff --git a/sound/core/info.c b/sound/core/info.c index 8ab72e0f5932..358a6947342d 100644 --- a/sound/core/info.c +++ b/sound/core/info.c @@ -724,8 +724,11 @@ snd_info_create_entry(const char *name, struct snd_info_entry *parent) INIT_LIST_HEAD(&entry->children); INIT_LIST_HEAD(&entry->list); entry->parent = parent; - if (parent) + if (parent) { + mutex_lock(&parent->access); list_add_tail(&entry->list, &parent->children); + mutex_unlock(&parent->access); + } return entry; } @@ -809,7 +812,12 @@ void snd_info_free_entry(struct snd_info_entry * entry) list_for_each_entry_safe(p, n, &entry->children, list) snd_info_free_entry(p); - list_del(&entry->list); + p = entry->parent; + if (p) { + mutex_lock(&p->access); + list_del(&entry->list); + mutex_unlock(&p->access); + } kfree(entry->name); if (entry->private_free) entry->private_free(entry); diff --git a/sound/core/init.c b/sound/core/init.c index 20f37fb3800e..67765c61e5d5 100644 --- a/sound/core/init.c +++ b/sound/core/init.c @@ -405,14 +405,7 @@ int snd_card_disconnect(struct snd_card *card) card->shutdown = 1; spin_unlock(&card->files_lock); - /* phase 1: disable fops (user space) operations for ALSA API */ - mutex_lock(&snd_card_mutex); - snd_cards[card->number] = NULL; - clear_bit(card->number, snd_cards_lock); - mutex_unlock(&snd_card_mutex); - - /* phase 2: replace file->f_op with special dummy operations */ - + /* replace file->f_op with special dummy operations */ spin_lock(&card->files_lock); list_for_each_entry(mfile, &card->files_list, list) { /* it's critical part, use endless loop */ @@ -428,7 +421,7 @@ int snd_card_disconnect(struct snd_card *card) } spin_unlock(&card->files_lock); - /* phase 3: notify all connected devices about disconnection */ + /* notify all connected devices about disconnection */ /* at this point, they cannot respond to any calls except release() */ #if IS_ENABLED(CONFIG_SND_MIXER_OSS) @@ -444,6 +437,13 @@ int snd_card_disconnect(struct snd_card *card) device_del(&card->card_dev); card->registered = false; } + + /* disable fops (user space) operations for ALSA API */ + mutex_lock(&snd_card_mutex); + snd_cards[card->number] = NULL; + clear_bit(card->number, snd_cards_lock); + mutex_unlock(&snd_card_mutex); + #ifdef CONFIG_PM wake_up(&card->power_sleep); #endif diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c index 07feb35f1935..443bb8ce8255 100644 --- a/sound/core/oss/pcm_oss.c +++ b/sound/core/oss/pcm_oss.c @@ -950,6 +950,28 @@ static int snd_pcm_oss_change_params_locked(struct snd_pcm_substream *substream) oss_frame_size = snd_pcm_format_physical_width(params_format(params)) * params_channels(params) / 8; + err = snd_pcm_oss_period_size(substream, params, sparams); + if (err < 0) + goto failure; + + n = snd_pcm_plug_slave_size(substream, runtime->oss.period_bytes / oss_frame_size); + err = snd_pcm_hw_param_near(substream, sparams, SNDRV_PCM_HW_PARAM_PERIOD_SIZE, n, NULL); + if (err < 0) + goto failure; + + err = snd_pcm_hw_param_near(substream, sparams, SNDRV_PCM_HW_PARAM_PERIODS, + runtime->oss.periods, NULL); + if (err < 0) + goto failure; + + snd_pcm_kernel_ioctl(substream, SNDRV_PCM_IOCTL_DROP, NULL); + + err = snd_pcm_kernel_ioctl(substream, SNDRV_PCM_IOCTL_HW_PARAMS, sparams); + if (err < 0) { + pcm_dbg(substream->pcm, "HW_PARAMS failed: %i\n", err); + goto failure; + } + #ifdef CONFIG_SND_PCM_OSS_PLUGINS snd_pcm_oss_plugin_clear(substream); if (!direct) { @@ -984,27 +1006,6 @@ static int snd_pcm_oss_change_params_locked(struct snd_pcm_substream *substream) } #endif - err = snd_pcm_oss_period_size(substream, params, sparams); - if (err < 0) - goto failure; - - n = snd_pcm_plug_slave_size(substream, runtime->oss.period_bytes / oss_frame_size); - err = snd_pcm_hw_param_near(substream, sparams, SNDRV_PCM_HW_PARAM_PERIOD_SIZE, n, NULL); - if (err < 0) - goto failure; - - err = snd_pcm_hw_param_near(substream, sparams, SNDRV_PCM_HW_PARAM_PERIODS, - runtime->oss.periods, NULL); - if (err < 0) - goto failure; - - snd_pcm_kernel_ioctl(substream, SNDRV_PCM_IOCTL_DROP, NULL); - - if ((err = snd_pcm_kernel_ioctl(substream, SNDRV_PCM_IOCTL_HW_PARAMS, sparams)) < 0) { - pcm_dbg(substream->pcm, "HW_PARAMS failed: %i\n", err); - goto failure; - } - if (runtime->oss.trigger) { sw_params->start_threshold = 1; } else { diff --git a/sound/core/pcm.c b/sound/core/pcm.c index 6bda8f6c5f84..cdff5f976480 100644 --- a/sound/core/pcm.c +++ b/sound/core/pcm.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -125,6 +126,7 @@ static int snd_pcm_control_ioctl(struct snd_card *card, return -EFAULT; if (stream < 0 || stream > 1) return -EINVAL; + stream = array_index_nospec(stream, 2); if (get_user(subdevice, &info->subdevice)) return -EFAULT; mutex_lock(®ister_mutex); diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c index 5bc7ddf8fc70..3ce2b8771762 100644 --- a/sound/core/pcm_lib.c +++ b/sound/core/pcm_lib.c @@ -1849,8 +1849,6 @@ int snd_pcm_lib_ioctl(struct snd_pcm_substream *substream, unsigned int cmd, void *arg) { switch (cmd) { - case SNDRV_PCM_IOCTL1_INFO: - return 0; case SNDRV_PCM_IOCTL1_RESET: return snd_pcm_lib_ioctl_reset(substream, arg); case SNDRV_PCM_IOCTL1_CHANNEL_INFO: diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index 448953cd5ec1..09b74f73952f 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -214,11 +214,7 @@ int snd_pcm_info(struct snd_pcm_substream *substream, struct snd_pcm_info *info) info->subdevices_avail = pstr->substream_count - pstr->substream_opened; strlcpy(info->subname, substream->name, sizeof(info->subname)); runtime = substream->runtime; - /* AB: FIXME!!! This is definitely nonsense */ - if (runtime) { - info->sync = runtime->sync; - substream->ops->ioctl(substream, SNDRV_PCM_IOCTL1_INFO, info); - } + return 0; } @@ -1280,8 +1276,15 @@ static int snd_pcm_pause(struct snd_pcm_substream *substream, int push) static int snd_pcm_pre_suspend(struct snd_pcm_substream *substream, int state) { struct snd_pcm_runtime *runtime = substream->runtime; - if (runtime->status->state == SNDRV_PCM_STATE_SUSPENDED) + switch (runtime->status->state) { + case SNDRV_PCM_STATE_SUSPENDED: return -EBUSY; + /* unresumable PCM state; return -EBUSY for skipping suspend */ + case SNDRV_PCM_STATE_OPEN: + case SNDRV_PCM_STATE_SETUP: + case SNDRV_PCM_STATE_DISCONNECTED: + return -EBUSY; + } runtime->trigger_master = substream; return 0; } @@ -1361,6 +1364,14 @@ int snd_pcm_suspend_all(struct snd_pcm *pcm) /* FIXME: the open/close code should lock this as well */ if (substream->runtime == NULL) continue; + + /* + * Skip BE dai link PCM's that are internal and may + * not have their substream ops set. + */ + if (!substream->ops) + continue; + err = snd_pcm_suspend(substream); if (err < 0 && err != -EBUSY) return err; diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c index 59111cadaec2..c8b2309352d7 100644 --- a/sound/core/rawmidi.c +++ b/sound/core/rawmidi.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -591,6 +592,7 @@ static int __snd_rawmidi_info_select(struct snd_card *card, return -ENXIO; if (info->stream < 0 || info->stream > 1) return -EINVAL; + info->stream = array_index_nospec(info->stream, 2); pstr = &rmidi->streams[info->stream]; if (pstr->substream_count == 0) return -ENOENT; diff --git a/sound/core/seq/oss/seq_oss_synth.c b/sound/core/seq/oss/seq_oss_synth.c index ea545f9291b4..df5b984bb33f 100644 --- a/sound/core/seq/oss/seq_oss_synth.c +++ b/sound/core/seq/oss/seq_oss_synth.c @@ -617,13 +617,14 @@ int snd_seq_oss_synth_make_info(struct seq_oss_devinfo *dp, int dev, struct synth_info *inf) { struct seq_oss_synth *rec; + struct seq_oss_synthinfo *info = get_synthinfo_nospec(dp, dev); - if (dev < 0 || dev >= dp->max_synthdev) + if (!info) return -ENXIO; - if (dp->synths[dev].is_midi) { + if (info->is_midi) { struct midi_info minf; - snd_seq_oss_midi_make_info(dp, dp->synths[dev].midi_mapped, &minf); + snd_seq_oss_midi_make_info(dp, info->midi_mapped, &minf); inf->synth_type = SYNTH_TYPE_MIDI; inf->synth_subtype = 0; inf->nr_voices = 16; diff --git a/sound/core/seq/seq_clientmgr.c b/sound/core/seq/seq_clientmgr.c index 73ee8476584d..0d0e0c2651c2 100644 --- a/sound/core/seq/seq_clientmgr.c +++ b/sound/core/seq/seq_clientmgr.c @@ -1249,7 +1249,7 @@ static int snd_seq_ioctl_set_client_info(struct snd_seq_client *client, /* fill the info fields */ if (client_info.name[0]) - strlcpy(client->name, client_info.name, sizeof(client->name)); + strscpy(client->name, client_info.name, sizeof(client->name)); client->filter = client_info.filter; client->event_lost = client_info.event_lost; @@ -1558,7 +1558,7 @@ static int snd_seq_ioctl_create_queue(struct snd_seq_client *client, /* set queue name */ if (! info.name[0]) snprintf(info.name, sizeof(info.name), "Queue-%d", q->queue); - strlcpy(q->name, info.name, sizeof(q->name)); + strscpy(q->name, info.name, sizeof(q->name)); snd_use_lock_free(&q->use_lock); if (copy_to_user(arg, &info, sizeof(info))) @@ -1636,7 +1636,7 @@ static int snd_seq_ioctl_set_queue_info(struct snd_seq_client *client, queuefree(q); return -EPERM; } - strlcpy(q->name, info.name, sizeof(q->name)); + strscpy(q->name, info.name, sizeof(q->name)); queuefree(q); return 0; diff --git a/sound/drivers/opl3/opl3_voice.h b/sound/drivers/opl3/opl3_voice.h index a371c075ac87..e26702559f61 100644 --- a/sound/drivers/opl3/opl3_voice.h +++ b/sound/drivers/opl3/opl3_voice.h @@ -41,7 +41,7 @@ void snd_opl3_timer_func(unsigned long data); /* Prototypes for opl3_drums.c */ void snd_opl3_load_drums(struct snd_opl3 *opl3); -void snd_opl3_drum_switch(struct snd_opl3 *opl3, int note, int on_off, int vel, struct snd_midi_channel *chan); +void snd_opl3_drum_switch(struct snd_opl3 *opl3, int note, int vel, int on_off, struct snd_midi_channel *chan); /* Prototypes for opl3_oss.c */ #ifdef CONFIG_SND_SEQUENCER_OSS diff --git a/sound/firewire/bebob/bebob.c b/sound/firewire/bebob/bebob.c index 091290d1f3ea..3a0361458597 100644 --- a/sound/firewire/bebob/bebob.c +++ b/sound/firewire/bebob/bebob.c @@ -382,7 +382,7 @@ static const struct ieee1394_device_id bebob_id_table[] = { /* Apogee Electronics, DA/AD/DD-16X (X-FireWire card) */ SND_BEBOB_DEV_ENTRY(VEN_APOGEE, 0x00010048, &spec_normal), /* Apogee Electronics, Ensemble */ - SND_BEBOB_DEV_ENTRY(VEN_APOGEE, 0x00001eee, &spec_normal), + SND_BEBOB_DEV_ENTRY(VEN_APOGEE, 0x01eeee, &spec_normal), /* ESI, Quatafire610 */ SND_BEBOB_DEV_ENTRY(VEN_ESI, 0x00010064, &spec_normal), /* AcousticReality, eARMasterOne */ @@ -422,7 +422,19 @@ static const struct ieee1394_device_id bebob_id_table[] = { /* Focusrite, SaffirePro 26 I/O */ SND_BEBOB_DEV_ENTRY(VEN_FOCUSRITE, 0x00000003, &saffirepro_26_spec), /* Focusrite, SaffirePro 10 I/O */ - SND_BEBOB_DEV_ENTRY(VEN_FOCUSRITE, 0x00000006, &saffirepro_10_spec), + { + // The combination of vendor_id and model_id is the same as the + // same as the one of Liquid Saffire 56. + .match_flags = IEEE1394_MATCH_VENDOR_ID | + IEEE1394_MATCH_MODEL_ID | + IEEE1394_MATCH_SPECIFIER_ID | + IEEE1394_MATCH_VERSION, + .vendor_id = VEN_FOCUSRITE, + .model_id = 0x000006, + .specifier_id = 0x00a02d, + .version = 0x010001, + .driver_data = (kernel_ulong_t)&saffirepro_10_spec, + }, /* Focusrite, Saffire(no label and LE) */ SND_BEBOB_DEV_ENTRY(VEN_FOCUSRITE, MODEL_FOCUSRITE_SAFFIRE_BOTH, &saffire_spec), diff --git a/sound/isa/sb/sb8.c b/sound/isa/sb/sb8.c index b8e2391c33ff..0c7fe1418447 100644 --- a/sound/isa/sb/sb8.c +++ b/sound/isa/sb/sb8.c @@ -111,6 +111,10 @@ static int snd_sb8_probe(struct device *pdev, unsigned int dev) /* block the 0x388 port to avoid PnP conflicts */ acard->fm_res = request_region(0x388, 4, "SoundBlaster FM"); + if (!acard->fm_res) { + err = -EBUSY; + goto _err; + } if (port[dev] != SNDRV_AUTO_PORT) { if ((err = snd_sbdsp_create(card, port[dev], irq[dev], diff --git a/sound/isa/wavefront/wavefront_synth.c b/sound/isa/wavefront/wavefront_synth.c index 69f76ff5693d..718d5e3b7806 100644 --- a/sound/isa/wavefront/wavefront_synth.c +++ b/sound/isa/wavefront/wavefront_synth.c @@ -785,6 +785,9 @@ wavefront_send_patch (snd_wavefront_t *dev, wavefront_patch_info *header) DPRINT (WF_DEBUG_LOAD_PATCH, "downloading patch %d\n", header->number); + if (header->number >= ARRAY_SIZE(dev->patch_status)) + return -EINVAL; + dev->patch_status[header->number] |= WF_SLOT_FILLED; bptr = buf; @@ -809,6 +812,9 @@ wavefront_send_program (snd_wavefront_t *dev, wavefront_patch_info *header) DPRINT (WF_DEBUG_LOAD_PATCH, "downloading program %d\n", header->number); + if (header->number >= ARRAY_SIZE(dev->prog_status)) + return -EINVAL; + dev->prog_status[header->number] = WF_SLOT_USED; /* XXX need to zero existing SLOT_USED bit for program_status[i] @@ -898,6 +904,9 @@ wavefront_send_sample (snd_wavefront_t *dev, header->number = x; } + if (header->number >= WF_MAX_SAMPLE) + return -EINVAL; + if (header->size) { /* XXX it's a debatable point whether or not RDONLY semantics diff --git a/sound/pci/cs46xx/dsp_spos.c b/sound/pci/cs46xx/dsp_spos.c index d2951ed4bf71..1984291ebd07 100644 --- a/sound/pci/cs46xx/dsp_spos.c +++ b/sound/pci/cs46xx/dsp_spos.c @@ -899,6 +899,9 @@ int cs46xx_dsp_proc_done (struct snd_cs46xx *chip) struct dsp_spos_instance * ins = chip->dsp_spos_instance; int i; + if (!ins) + return 0; + snd_info_free_entry(ins->proc_sym_info_entry); ins->proc_sym_info_entry = NULL; diff --git a/sound/pci/echoaudio/echoaudio.c b/sound/pci/echoaudio/echoaudio.c index 286f5e3686a3..d73ee11a32bd 100644 --- a/sound/pci/echoaudio/echoaudio.c +++ b/sound/pci/echoaudio/echoaudio.c @@ -1953,6 +1953,11 @@ static int snd_echo_create(struct snd_card *card, } chip->dsp_registers = (volatile u32 __iomem *) ioremap_nocache(chip->dsp_registers_phys, sz); + if (!chip->dsp_registers) { + dev_err(chip->card->dev, "ioremap failed\n"); + snd_echo_free(chip); + return -ENOMEM; + } if (request_irq(pci->irq, snd_echo_interrupt, IRQF_SHARED, KBUILD_MODNAME, chip)) { diff --git a/sound/pci/emu10k1/emufx.c b/sound/pci/emu10k1/emufx.c index 50b216fc369f..5d422d65e62b 100644 --- a/sound/pci/emu10k1/emufx.c +++ b/sound/pci/emu10k1/emufx.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include @@ -1000,6 +1001,8 @@ static int snd_emu10k1_ipcm_poke(struct snd_emu10k1 *emu, if (ipcm->substream >= EMU10K1_FX8010_PCM_COUNT) return -EINVAL; + ipcm->substream = array_index_nospec(ipcm->substream, + EMU10K1_FX8010_PCM_COUNT); if (ipcm->channels > 32) return -EINVAL; pcm = &emu->fx8010.pcm[ipcm->substream]; @@ -1046,6 +1049,8 @@ static int snd_emu10k1_ipcm_peek(struct snd_emu10k1 *emu, if (ipcm->substream >= EMU10K1_FX8010_PCM_COUNT) return -EINVAL; + ipcm->substream = array_index_nospec(ipcm->substream, + EMU10K1_FX8010_PCM_COUNT); pcm = &emu->fx8010.pcm[ipcm->substream]; mutex_lock(&emu->fx8010.lock); spin_lock_irq(&emu->reg_lock); diff --git a/sound/pci/hda/hda_bind.c b/sound/pci/hda/hda_bind.c index 6efadbfb3fe3..7ea201c05e5d 100644 --- a/sound/pci/hda/hda_bind.c +++ b/sound/pci/hda/hda_bind.c @@ -109,7 +109,8 @@ static int hda_codec_driver_probe(struct device *dev) err = snd_hda_codec_build_controls(codec); if (err < 0) goto error_module; - if (codec->card->registered) { + /* only register after the bus probe finished; otherwise it's racy */ + if (!codec->bus->bus_probing && codec->card->registered) { err = snd_card_register(codec->card); if (err < 0) goto error_module; diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c index f6d4a1046e54..ad0b23a21bc8 100644 --- a/sound/pci/hda/hda_codec.c +++ b/sound/pci/hda/hda_codec.c @@ -3004,6 +3004,7 @@ static void hda_call_codec_resume(struct hda_codec *codec) hda_jackpoll_work(&codec->jackpoll_work.work); else snd_hda_jack_report_sync(codec); + codec->core.dev.power.power_state = PMSG_ON; atomic_dec(&codec->core.in_pm); } @@ -3036,10 +3037,62 @@ static int hda_codec_runtime_resume(struct device *dev) } #endif /* CONFIG_PM */ +#ifdef CONFIG_PM_SLEEP +static int hda_codec_force_resume(struct device *dev) +{ + int ret; + + /* The get/put pair below enforces the runtime resume even if the + * device hasn't been used at suspend time. This trick is needed to + * update the jack state change during the sleep. + */ + pm_runtime_get_noresume(dev); + ret = pm_runtime_force_resume(dev); + pm_runtime_put(dev); + return ret; +} + +static int hda_codec_pm_suspend(struct device *dev) +{ + dev->power.power_state = PMSG_SUSPEND; + return pm_runtime_force_suspend(dev); +} + +static int hda_codec_pm_resume(struct device *dev) +{ + dev->power.power_state = PMSG_RESUME; + return hda_codec_force_resume(dev); +} + +static int hda_codec_pm_freeze(struct device *dev) +{ + dev->power.power_state = PMSG_FREEZE; + return pm_runtime_force_suspend(dev); +} + +static int hda_codec_pm_thaw(struct device *dev) +{ + dev->power.power_state = PMSG_THAW; + return hda_codec_force_resume(dev); +} + +static int hda_codec_pm_restore(struct device *dev) +{ + dev->power.power_state = PMSG_RESTORE; + return hda_codec_force_resume(dev); +} +#endif /* CONFIG_PM_SLEEP */ + /* referred in hda_bind.c */ const struct dev_pm_ops hda_codec_driver_pm = { - SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, - pm_runtime_force_resume) +#ifdef CONFIG_PM_SLEEP + .suspend = hda_codec_pm_suspend, + .resume = hda_codec_pm_resume, + .freeze = hda_codec_pm_freeze, + .thaw = hda_codec_pm_thaw, + .poweroff = hda_codec_pm_suspend, + .restore = hda_codec_pm_restore, +#endif /* CONFIG_PM_SLEEP */ SET_RUNTIME_PM_OPS(hda_codec_runtime_suspend, hda_codec_runtime_resume, NULL) }; diff --git a/sound/pci/hda/hda_codec.h b/sound/pci/hda/hda_codec.h index 776dffa88aee..171e11be938d 100644 --- a/sound/pci/hda/hda_codec.h +++ b/sound/pci/hda/hda_codec.h @@ -68,6 +68,7 @@ struct hda_bus { unsigned int response_reset:1; /* controller was reset */ unsigned int in_reset:1; /* during reset operation */ unsigned int no_response_fallback:1; /* don't fallback at RIRB error */ + unsigned int bus_probing :1; /* during probing process */ int primary_dig_out_type; /* primary digital out PCM type */ unsigned int mixer_assigned; /* codec addr for mixer name */ diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index f964743b104c..74c9600876d6 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2100,6 +2100,7 @@ static int azx_probe_continue(struct azx *chip) int val; int err; + to_hda_bus(bus)->bus_probing = 1; hda->probe_continued = 1; /* Request display power well for the HDA controller or codec. For @@ -2200,6 +2201,7 @@ i915_power_fail: if (err < 0) hda->init_failed = 1; complete_all(&hda->probe_wait); + to_hda_bus(bus)->bus_probing = 0; return err; } diff --git a/sound/pci/hda/hda_tegra.c b/sound/pci/hda/hda_tegra.c index 17fd81736d3d..039fbbb1e53c 100644 --- a/sound/pci/hda/hda_tegra.c +++ b/sound/pci/hda/hda_tegra.c @@ -249,10 +249,12 @@ static int hda_tegra_suspend(struct device *dev) struct snd_card *card = dev_get_drvdata(dev); struct azx *chip = card->private_data; struct hda_tegra *hda = container_of(chip, struct hda_tegra, chip); + struct hdac_bus *bus = azx_bus(chip); snd_power_change_state(card, SNDRV_CTL_POWER_D3hot); azx_stop_chip(chip); + synchronize_irq(bus->irq); azx_enter_link_reset(chip); hda_tegra_disable_clocks(hda); diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index aea3cc2abe3a..40dd46556452 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -853,6 +853,8 @@ static const struct snd_pci_quirk cxt5066_fixups[] = { SND_PCI_QUIRK(0x103c, 0x8079, "HP EliteBook 840 G3", CXT_FIXUP_HP_DOCK), SND_PCI_QUIRK(0x103c, 0x807C, "HP EliteBook 820 G3", CXT_FIXUP_HP_DOCK), SND_PCI_QUIRK(0x103c, 0x80FD, "HP ProBook 640 G2", CXT_FIXUP_HP_DOCK), + SND_PCI_QUIRK(0x103c, 0x828c, "HP EliteBook 840 G4", CXT_FIXUP_HP_DOCK), + SND_PCI_QUIRK(0x103c, 0x83b2, "HP EliteBook 840 G5", CXT_FIXUP_HP_DOCK), SND_PCI_QUIRK(0x103c, 0x83b3, "HP EliteBook 830 G5", CXT_FIXUP_HP_DOCK), SND_PCI_QUIRK(0x103c, 0x83d3, "HP ProBook 640 G4", CXT_FIXUP_HP_DOCK), SND_PCI_QUIRK(0x103c, 0x8174, "HP Spectre x360", CXT_FIXUP_HP_SPECTRE), diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 0467e5ba82e0..5d8ac2d798df 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4792,6 +4792,13 @@ static void alc280_fixup_hp_9480m(struct hda_codec *codec, } } +static void alc_fixup_disable_mic_vref(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + if (action == HDA_FIXUP_ACT_PRE_PROBE) + snd_hda_codec_set_pin_target(codec, 0x19, PIN_VREFHIZ); +} + /* for hda_fixup_thinkpad_acpi() */ #include "thinkpad_helper.c" @@ -4891,6 +4898,7 @@ enum { ALC293_FIXUP_LENOVO_SPK_NOISE, ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY, ALC255_FIXUP_DELL_SPK_NOISE, + ALC225_FIXUP_DISABLE_MIC_VREF, ALC225_FIXUP_DELL1_MIC_NO_PRESENCE, ALC295_FIXUP_DISABLE_DAC3, ALC280_FIXUP_HP_HEADSET_MIC, @@ -5546,6 +5554,12 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC255_FIXUP_DELL1_MIC_NO_PRESENCE }, + [ALC225_FIXUP_DISABLE_MIC_VREF] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc_fixup_disable_mic_vref, + .chained = true, + .chain_id = ALC269_FIXUP_DELL1_MIC_NO_PRESENCE + }, [ALC225_FIXUP_DELL1_MIC_NO_PRESENCE] = { .type = HDA_FIXUP_VERBS, .v.verbs = (const struct hda_verb[]) { @@ -5555,7 +5569,7 @@ static const struct hda_fixup alc269_fixups[] = { {} }, .chained = true, - .chain_id = ALC269_FIXUP_DELL1_MIC_NO_PRESENCE + .chain_id = ALC225_FIXUP_DISABLE_MIC_VREF }, [ALC280_FIXUP_HP_HEADSET_MIC] = { .type = HDA_FIXUP_FUNC, diff --git a/sound/pci/rme9652/hdsp.c b/sound/pci/rme9652/hdsp.c index 7c8941b8b2de..dd6c9e6a1d53 100644 --- a/sound/pci/rme9652/hdsp.c +++ b/sound/pci/rme9652/hdsp.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -4065,15 +4066,16 @@ static int snd_hdsp_channel_info(struct snd_pcm_substream *substream, struct snd_pcm_channel_info *info) { struct hdsp *hdsp = snd_pcm_substream_chip(substream); - int mapped_channel; + unsigned int channel = info->channel; - if (snd_BUG_ON(info->channel >= hdsp->max_channels)) + if (snd_BUG_ON(channel >= hdsp->max_channels)) + return -EINVAL; + channel = array_index_nospec(channel, hdsp->max_channels); + + if (hdsp->channel_map[channel] < 0) return -EINVAL; - if ((mapped_channel = hdsp->channel_map[info->channel]) < 0) - return -EINVAL; - - info->offset = mapped_channel * HDSP_CHANNEL_BUFFER_BYTES; + info->offset = hdsp->channel_map[channel] * HDSP_CHANNEL_BUFFER_BYTES; info->first = 0; info->step = 32; return 0; diff --git a/sound/soc/fsl/Kconfig b/sound/soc/fsl/Kconfig index 14dfdee05fd5..3066e068aae5 100644 --- a/sound/soc/fsl/Kconfig +++ b/sound/soc/fsl/Kconfig @@ -219,7 +219,7 @@ config SND_SOC_PHYCORE_AC97 config SND_SOC_EUKREA_TLV320 tristate "Eukrea TLV320" - depends on ARCH_MXC && I2C + depends on ARCH_MXC && !ARM64 && I2C select SND_SOC_TLV320AIC23_I2C select SND_SOC_IMX_AUDMUX select SND_SOC_IMX_SSI diff --git a/sound/soc/fsl/fsl-asoc-card.c b/sound/soc/fsl/fsl-asoc-card.c index 1b05d1c5d9fd..a32fe14b4687 100644 --- a/sound/soc/fsl/fsl-asoc-card.c +++ b/sound/soc/fsl/fsl-asoc-card.c @@ -659,6 +659,7 @@ static int fsl_asoc_card_probe(struct platform_device *pdev) asrc_fail: of_node_put(asrc_np); of_node_put(codec_np); + put_device(&cpu_pdev->dev); fail: of_node_put(cpu_np); diff --git a/sound/soc/fsl/fsl_esai.c b/sound/soc/fsl/fsl_esai.c index e8adead8be00..40075b9afb79 100644 --- a/sound/soc/fsl/fsl_esai.c +++ b/sound/soc/fsl/fsl_esai.c @@ -57,6 +57,8 @@ struct fsl_esai { u32 fifo_depth; u32 slot_width; u32 slots; + u32 tx_mask; + u32 rx_mask; u32 hck_rate[2]; u32 sck_rate[2]; bool hck_dir[2]; @@ -357,21 +359,13 @@ static int fsl_esai_set_dai_tdm_slot(struct snd_soc_dai *dai, u32 tx_mask, regmap_update_bits(esai_priv->regmap, REG_ESAI_TCCR, ESAI_xCCR_xDC_MASK, ESAI_xCCR_xDC(slots)); - regmap_update_bits(esai_priv->regmap, REG_ESAI_TSMA, - ESAI_xSMA_xS_MASK, ESAI_xSMA_xS(tx_mask)); - regmap_update_bits(esai_priv->regmap, REG_ESAI_TSMB, - ESAI_xSMB_xS_MASK, ESAI_xSMB_xS(tx_mask)); - regmap_update_bits(esai_priv->regmap, REG_ESAI_RCCR, ESAI_xCCR_xDC_MASK, ESAI_xCCR_xDC(slots)); - regmap_update_bits(esai_priv->regmap, REG_ESAI_RSMA, - ESAI_xSMA_xS_MASK, ESAI_xSMA_xS(rx_mask)); - regmap_update_bits(esai_priv->regmap, REG_ESAI_RSMB, - ESAI_xSMB_xS_MASK, ESAI_xSMB_xS(rx_mask)); - esai_priv->slot_width = slot_width; esai_priv->slots = slots; + esai_priv->tx_mask = tx_mask; + esai_priv->rx_mask = rx_mask; return 0; } @@ -394,7 +388,8 @@ static int fsl_esai_set_dai_fmt(struct snd_soc_dai *dai, unsigned int fmt) break; case SND_SOC_DAIFMT_RIGHT_J: /* Data on rising edge of bclk, frame high, right aligned */ - xccr |= ESAI_xCCR_xCKP | ESAI_xCCR_xHCKP | ESAI_xCR_xWA; + xccr |= ESAI_xCCR_xCKP | ESAI_xCCR_xHCKP; + xcr |= ESAI_xCR_xWA; break; case SND_SOC_DAIFMT_DSP_A: /* Data on rising edge of bclk, frame high, 1clk before data */ @@ -451,12 +446,12 @@ static int fsl_esai_set_dai_fmt(struct snd_soc_dai *dai, unsigned int fmt) return -EINVAL; } - mask = ESAI_xCR_xFSL | ESAI_xCR_xFSR; + mask = ESAI_xCR_xFSL | ESAI_xCR_xFSR | ESAI_xCR_xWA; regmap_update_bits(esai_priv->regmap, REG_ESAI_TCR, mask, xcr); regmap_update_bits(esai_priv->regmap, REG_ESAI_RCR, mask, xcr); mask = ESAI_xCCR_xCKP | ESAI_xCCR_xHCKP | ESAI_xCCR_xFSP | - ESAI_xCCR_xFSD | ESAI_xCCR_xCKD | ESAI_xCR_xWA; + ESAI_xCCR_xFSD | ESAI_xCCR_xCKD; regmap_update_bits(esai_priv->regmap, REG_ESAI_TCCR, mask, xccr); regmap_update_bits(esai_priv->regmap, REG_ESAI_RCCR, mask, xccr); @@ -581,6 +576,7 @@ static int fsl_esai_trigger(struct snd_pcm_substream *substream, int cmd, bool tx = substream->stream == SNDRV_PCM_STREAM_PLAYBACK; u8 i, channels = substream->runtime->channels; u32 pins = DIV_ROUND_UP(channels, esai_priv->slots); + u32 mask; switch (cmd) { case SNDRV_PCM_TRIGGER_START: @@ -593,15 +589,38 @@ static int fsl_esai_trigger(struct snd_pcm_substream *substream, int cmd, for (i = 0; tx && i < channels; i++) regmap_write(esai_priv->regmap, REG_ESAI_ETDR, 0x0); + /* + * When set the TE/RE in the end of enablement flow, there + * will be channel swap issue for multi data line case. + * In order to workaround this issue, we switch the bit + * enablement sequence to below sequence + * 1) clear the xSMB & xSMA: which is done in probe and + * stop state. + * 2) set TE/RE + * 3) set xSMB + * 4) set xSMA: xSMA is the last one in this flow, which + * will trigger esai to start. + */ regmap_update_bits(esai_priv->regmap, REG_ESAI_xCR(tx), tx ? ESAI_xCR_TE_MASK : ESAI_xCR_RE_MASK, tx ? ESAI_xCR_TE(pins) : ESAI_xCR_RE(pins)); + mask = tx ? esai_priv->tx_mask : esai_priv->rx_mask; + + regmap_update_bits(esai_priv->regmap, REG_ESAI_xSMB(tx), + ESAI_xSMB_xS_MASK, ESAI_xSMB_xS(mask)); + regmap_update_bits(esai_priv->regmap, REG_ESAI_xSMA(tx), + ESAI_xSMA_xS_MASK, ESAI_xSMA_xS(mask)); + break; case SNDRV_PCM_TRIGGER_SUSPEND: case SNDRV_PCM_TRIGGER_STOP: case SNDRV_PCM_TRIGGER_PAUSE_PUSH: regmap_update_bits(esai_priv->regmap, REG_ESAI_xCR(tx), tx ? ESAI_xCR_TE_MASK : ESAI_xCR_RE_MASK, 0); + regmap_update_bits(esai_priv->regmap, REG_ESAI_xSMA(tx), + ESAI_xSMA_xS_MASK, 0); + regmap_update_bits(esai_priv->regmap, REG_ESAI_xSMB(tx), + ESAI_xSMB_xS_MASK, 0); /* Disable and reset FIFO */ regmap_update_bits(esai_priv->regmap, REG_ESAI_xFCR(tx), @@ -886,6 +905,15 @@ static int fsl_esai_probe(struct platform_device *pdev) return ret; } + esai_priv->tx_mask = 0xFFFFFFFF; + esai_priv->rx_mask = 0xFFFFFFFF; + + /* Clear the TSMA, TSMB, RSMA, RSMB */ + regmap_write(esai_priv->regmap, REG_ESAI_TSMA, 0); + regmap_write(esai_priv->regmap, REG_ESAI_TSMB, 0); + regmap_write(esai_priv->regmap, REG_ESAI_RSMA, 0); + regmap_write(esai_priv->regmap, REG_ESAI_RSMB, 0); + ret = devm_snd_soc_register_component(&pdev->dev, &fsl_esai_component, &fsl_esai_dai, 1); if (ret) { diff --git a/sound/soc/fsl/imx-audmux.c b/sound/soc/fsl/imx-audmux.c index fc57da341d61..136df38c4536 100644 --- a/sound/soc/fsl/imx-audmux.c +++ b/sound/soc/fsl/imx-audmux.c @@ -86,49 +86,49 @@ static ssize_t audmux_read_file(struct file *file, char __user *user_buf, if (!buf) return -ENOMEM; - ret = snprintf(buf, PAGE_SIZE, "PDCR: %08x\nPTCR: %08x\n", + ret = scnprintf(buf, PAGE_SIZE, "PDCR: %08x\nPTCR: %08x\n", pdcr, ptcr); if (ptcr & IMX_AUDMUX_V2_PTCR_TFSDIR) - ret += snprintf(buf + ret, PAGE_SIZE - ret, + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "TxFS output from %s, ", audmux_port_string((ptcr >> 27) & 0x7)); else - ret += snprintf(buf + ret, PAGE_SIZE - ret, + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "TxFS input, "); if (ptcr & IMX_AUDMUX_V2_PTCR_TCLKDIR) - ret += snprintf(buf + ret, PAGE_SIZE - ret, + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "TxClk output from %s", audmux_port_string((ptcr >> 22) & 0x7)); else - ret += snprintf(buf + ret, PAGE_SIZE - ret, + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "TxClk input"); - ret += snprintf(buf + ret, PAGE_SIZE - ret, "\n"); + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n"); if (ptcr & IMX_AUDMUX_V2_PTCR_SYN) { - ret += snprintf(buf + ret, PAGE_SIZE - ret, + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "Port is symmetric"); } else { if (ptcr & IMX_AUDMUX_V2_PTCR_RFSDIR) - ret += snprintf(buf + ret, PAGE_SIZE - ret, + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "RxFS output from %s, ", audmux_port_string((ptcr >> 17) & 0x7)); else - ret += snprintf(buf + ret, PAGE_SIZE - ret, + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "RxFS input, "); if (ptcr & IMX_AUDMUX_V2_PTCR_RCLKDIR) - ret += snprintf(buf + ret, PAGE_SIZE - ret, + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "RxClk output from %s", audmux_port_string((ptcr >> 12) & 0x7)); else - ret += snprintf(buf + ret, PAGE_SIZE - ret, + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "RxClk input"); } - ret += snprintf(buf + ret, PAGE_SIZE - ret, + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\nData received from %s\n", audmux_port_string((pdcr >> 13) & 0x7)); diff --git a/sound/soc/fsl/imx-sgtl5000.c b/sound/soc/fsl/imx-sgtl5000.c index b99e0b5e00e9..8e525f7ac08d 100644 --- a/sound/soc/fsl/imx-sgtl5000.c +++ b/sound/soc/fsl/imx-sgtl5000.c @@ -115,6 +115,7 @@ static int imx_sgtl5000_probe(struct platform_device *pdev) ret = -EPROBE_DEFER; goto fail; } + put_device(&ssi_pdev->dev); codec_dev = of_find_i2c_device_by_node(codec_np); if (!codec_dev) { dev_err(&pdev->dev, "failed to find codec platform device\n"); diff --git a/sound/soc/intel/atom/sst-mfld-platform-pcm.c b/sound/soc/intel/atom/sst-mfld-platform-pcm.c index 2b96b11fbe71..1d9dfb92b3b4 100644 --- a/sound/soc/intel/atom/sst-mfld-platform-pcm.c +++ b/sound/soc/intel/atom/sst-mfld-platform-pcm.c @@ -398,7 +398,13 @@ static int sst_media_hw_params(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *params, struct snd_soc_dai *dai) { - snd_pcm_lib_malloc_pages(substream, params_buffer_bytes(params)); + int ret; + + ret = + snd_pcm_lib_malloc_pages(substream, + params_buffer_bytes(params)); + if (ret) + return ret; memset(substream->runtime->dma_area, 0, params_buffer_bytes(params)); return 0; } diff --git a/sound/soc/intel/atom/sst/sst_loader.c b/sound/soc/intel/atom/sst/sst_loader.c index 33917146d9c4..054b1d514e8a 100644 --- a/sound/soc/intel/atom/sst/sst_loader.c +++ b/sound/soc/intel/atom/sst/sst_loader.c @@ -354,14 +354,14 @@ static int sst_request_fw(struct intel_sst_drv *sst) const struct firmware *fw; retval = request_firmware(&fw, sst->firmware_name, sst->dev); - if (fw == NULL) { - dev_err(sst->dev, "fw is returning as null\n"); - return -EINVAL; - } if (retval) { dev_err(sst->dev, "request fw failed %d\n", retval); return retval; } + if (fw == NULL) { + dev_err(sst->dev, "fw is returning as null\n"); + return -EINVAL; + } mutex_lock(&sst->sst_lock); retval = sst_cache_and_parse_fw(sst, fw); mutex_unlock(&sst->sst_lock); diff --git a/sound/soc/intel/boards/broadwell.c b/sound/soc/intel/boards/broadwell.c index 3f8a1e10bed0..e5ca41ffa890 100644 --- a/sound/soc/intel/boards/broadwell.c +++ b/sound/soc/intel/boards/broadwell.c @@ -191,7 +191,7 @@ static struct snd_soc_dai_link broadwell_rt286_dais[] = { .stream_name = "Loopback", .cpu_dai_name = "Loopback Pin", .platform_name = "haswell-pcm-audio", - .dynamic = 0, + .dynamic = 1, .codec_name = "snd-soc-dummy", .codec_dai_name = "snd-soc-dummy-dai", .trigger = {SND_SOC_DPCM_TRIGGER_POST, SND_SOC_DPCM_TRIGGER_POST}, diff --git a/sound/soc/intel/boards/haswell.c b/sound/soc/intel/boards/haswell.c index 22558572cb9c..de955c2e8c4e 100644 --- a/sound/soc/intel/boards/haswell.c +++ b/sound/soc/intel/boards/haswell.c @@ -145,7 +145,7 @@ static struct snd_soc_dai_link haswell_rt5640_dais[] = { .stream_name = "Loopback", .cpu_dai_name = "Loopback Pin", .platform_name = "haswell-pcm-audio", - .dynamic = 0, + .dynamic = 1, .codec_name = "snd-soc-dummy", .codec_dai_name = "snd-soc-dummy-dai", .trigger = {SND_SOC_DPCM_TRIGGER_POST, SND_SOC_DPCM_TRIGGER_POST}, diff --git a/sound/soc/omap/omap-dmic.c b/sound/soc/omap/omap-dmic.c index 09db2aec12a3..776e809a8aab 100644 --- a/sound/soc/omap/omap-dmic.c +++ b/sound/soc/omap/omap-dmic.c @@ -48,6 +48,8 @@ struct omap_dmic { struct device *dev; void __iomem *io_base; struct clk *fclk; + struct pm_qos_request pm_qos_req; + int latency; int fclk_freq; int out_freq; int clk_div; @@ -124,6 +126,8 @@ static void omap_dmic_dai_shutdown(struct snd_pcm_substream *substream, mutex_lock(&dmic->mutex); + pm_qos_remove_request(&dmic->pm_qos_req); + if (!dai->active) dmic->active = 0; @@ -226,6 +230,8 @@ static int omap_dmic_dai_hw_params(struct snd_pcm_substream *substream, /* packet size is threshold * channels */ dma_data = snd_soc_dai_get_dma_data(dai, substream); dma_data->maxburst = dmic->threshold * channels; + dmic->latency = (OMAP_DMIC_THRES_MAX - dmic->threshold) * USEC_PER_SEC / + params_rate(params); return 0; } @@ -236,6 +242,9 @@ static int omap_dmic_dai_prepare(struct snd_pcm_substream *substream, struct omap_dmic *dmic = snd_soc_dai_get_drvdata(dai); u32 ctrl; + if (pm_qos_request_active(&dmic->pm_qos_req)) + pm_qos_update_request(&dmic->pm_qos_req, dmic->latency); + /* Configure uplink threshold */ omap_dmic_write(dmic, OMAP_DMIC_FIFO_CTRL_REG, dmic->threshold); diff --git a/sound/soc/omap/omap-mcpdm.c b/sound/soc/omap/omap-mcpdm.c index 8d0d45d330e7..8eb2d12b6a34 100644 --- a/sound/soc/omap/omap-mcpdm.c +++ b/sound/soc/omap/omap-mcpdm.c @@ -54,6 +54,8 @@ struct omap_mcpdm { unsigned long phys_base; void __iomem *io_base; int irq; + struct pm_qos_request pm_qos_req; + int latency[2]; struct mutex mutex; @@ -273,6 +275,9 @@ static void omap_mcpdm_dai_shutdown(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) { struct omap_mcpdm *mcpdm = snd_soc_dai_get_drvdata(dai); + int tx = (substream->stream == SNDRV_PCM_STREAM_PLAYBACK); + int stream1 = tx ? SNDRV_PCM_STREAM_PLAYBACK : SNDRV_PCM_STREAM_CAPTURE; + int stream2 = tx ? SNDRV_PCM_STREAM_CAPTURE : SNDRV_PCM_STREAM_PLAYBACK; mutex_lock(&mcpdm->mutex); @@ -285,6 +290,14 @@ static void omap_mcpdm_dai_shutdown(struct snd_pcm_substream *substream, } } + if (mcpdm->latency[stream2]) + pm_qos_update_request(&mcpdm->pm_qos_req, + mcpdm->latency[stream2]); + else if (mcpdm->latency[stream1]) + pm_qos_remove_request(&mcpdm->pm_qos_req); + + mcpdm->latency[stream1] = 0; + mutex_unlock(&mcpdm->mutex); } @@ -296,7 +309,7 @@ static int omap_mcpdm_dai_hw_params(struct snd_pcm_substream *substream, int stream = substream->stream; struct snd_dmaengine_dai_dma_data *dma_data; u32 threshold; - int channels; + int channels, latency; int link_mask = 0; channels = params_channels(params); @@ -336,14 +349,25 @@ static int omap_mcpdm_dai_hw_params(struct snd_pcm_substream *substream, dma_data->maxburst = (MCPDM_DN_THRES_MAX - threshold) * channels; + latency = threshold; } else { /* If playback is not running assume a stereo stream to come */ if (!mcpdm->config[!stream].link_mask) mcpdm->config[!stream].link_mask = (0x3 << 3); dma_data->maxburst = threshold * channels; + latency = (MCPDM_DN_THRES_MAX - threshold); } + /* + * The DMA must act to a DMA request within latency time (usec) to avoid + * under/overflow + */ + mcpdm->latency[stream] = latency * USEC_PER_SEC / params_rate(params); + + if (!mcpdm->latency[stream]) + mcpdm->latency[stream] = 10; + /* Check if we need to restart McPDM with this stream */ if (mcpdm->config[stream].link_mask && mcpdm->config[stream].link_mask != link_mask) @@ -358,6 +382,20 @@ static int omap_mcpdm_prepare(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) { struct omap_mcpdm *mcpdm = snd_soc_dai_get_drvdata(dai); + struct pm_qos_request *pm_qos_req = &mcpdm->pm_qos_req; + int tx = (substream->stream == SNDRV_PCM_STREAM_PLAYBACK); + int stream1 = tx ? SNDRV_PCM_STREAM_PLAYBACK : SNDRV_PCM_STREAM_CAPTURE; + int stream2 = tx ? SNDRV_PCM_STREAM_CAPTURE : SNDRV_PCM_STREAM_PLAYBACK; + int latency = mcpdm->latency[stream2]; + + /* Prevent omap hardware from hitting off between FIFO fills */ + if (!latency || mcpdm->latency[stream1] < latency) + latency = mcpdm->latency[stream1]; + + if (pm_qos_request_active(pm_qos_req)) + pm_qos_update_request(pm_qos_req, latency); + else if (latency) + pm_qos_add_request(pm_qos_req, PM_QOS_CPU_DMA_LATENCY, latency); if (!omap_mcpdm_active(mcpdm)) { omap_mcpdm_start(mcpdm); @@ -419,6 +457,9 @@ static int omap_mcpdm_remove(struct snd_soc_dai *dai) free_irq(mcpdm->irq, (void *)mcpdm); pm_runtime_disable(mcpdm->dev); + if (pm_qos_request_active(&mcpdm->pm_qos_req)) + pm_qos_remove_request(&mcpdm->pm_qos_req); + return 0; } diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index 7e8bf3aa73c7..359037f3d606 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -1723,6 +1723,7 @@ static int snd_soc_instantiate_card(struct snd_soc_card *card) } card->instantiated = 1; + dapm_mark_endpoints_dirty(card); snd_soc_dapm_sync(&card->dapm); mutex_unlock(&card->mutex); mutex_unlock(&client_mutex); diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index 0aefed8ab0cf..7e26d173da41 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -1943,19 +1943,19 @@ static ssize_t dapm_widget_power_read_file(struct file *file, out = is_connected_output_ep(w, NULL); } - ret = snprintf(buf, PAGE_SIZE, "%s: %s%s in %d out %d", + ret = scnprintf(buf, PAGE_SIZE, "%s: %s%s in %d out %d", w->name, w->power ? "On" : "Off", w->force ? " (forced)" : "", in, out); if (w->reg >= 0) - ret += snprintf(buf + ret, PAGE_SIZE - ret, + ret += scnprintf(buf + ret, PAGE_SIZE - ret, " - R%d(0x%x) mask 0x%x", w->reg, w->reg, w->mask << w->shift); - ret += snprintf(buf + ret, PAGE_SIZE - ret, "\n"); + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n"); if (w->sname) - ret += snprintf(buf + ret, PAGE_SIZE - ret, " stream %s %s\n", + ret += scnprintf(buf + ret, PAGE_SIZE - ret, " stream %s %s\n", w->sname, w->active ? "active" : "inactive"); @@ -1968,7 +1968,7 @@ static ssize_t dapm_widget_power_read_file(struct file *file, if (!p->connect) continue; - ret += snprintf(buf + ret, PAGE_SIZE - ret, + ret += scnprintf(buf + ret, PAGE_SIZE - ret, " %s \"%s\" \"%s\"\n", (rdir == SND_SOC_DAPM_DIR_IN) ? "in" : "out", p->name ? p->name : "static", diff --git a/sound/soc/soc-topology.c b/sound/soc/soc-topology.c index c1e76feb3529..824f4d7fc41f 100644 --- a/sound/soc/soc-topology.c +++ b/sound/soc/soc-topology.c @@ -1770,6 +1770,7 @@ int snd_soc_tplg_component_load(struct snd_soc_component *comp, struct snd_soc_tplg_ops *ops, const struct firmware *fw, u32 id) { struct soc_tplg tplg; + int ret; /* setup parsing context */ memset(&tplg, 0, sizeof(tplg)); @@ -1783,7 +1784,12 @@ int snd_soc_tplg_component_load(struct snd_soc_component *comp, tplg.bytes_ext_ops = ops->bytes_ext_ops; tplg.bytes_ext_ops_count = ops->bytes_ext_ops_count; - return soc_tplg_load(&tplg); + ret = soc_tplg_load(&tplg); + /* free the created components if fail to load topology */ + if (ret) + snd_soc_tplg_component_remove(comp, SND_SOC_TPLG_INDEX_ALL); + + return ret; } EXPORT_SYMBOL_GPL(snd_soc_tplg_component_load); diff --git a/sound/synth/emux/emux_hwdep.c b/sound/synth/emux/emux_hwdep.c index e557946718a9..d9fcae071b47 100644 --- a/sound/synth/emux/emux_hwdep.c +++ b/sound/synth/emux/emux_hwdep.c @@ -22,9 +22,9 @@ #include #include #include +#include #include "emux_voice.h" - #define TMP_CLIENT_ID 0x1001 /* @@ -66,13 +66,16 @@ snd_emux_hwdep_misc_mode(struct snd_emux *emu, void __user *arg) return -EFAULT; if (info.mode < 0 || info.mode >= EMUX_MD_END) return -EINVAL; + info.mode = array_index_nospec(info.mode, EMUX_MD_END); if (info.port < 0) { for (i = 0; i < emu->num_ports; i++) emu->portptrs[i]->ctrls[info.mode] = info.value; } else { - if (info.port < emu->num_ports) + if (info.port < emu->num_ports) { + info.port = array_index_nospec(info.port, emu->num_ports); emu->portptrs[info.port]->ctrls[info.mode] = info.value; + } } return 0; } diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index 97d6a18e6956..f7eb0d2f797b 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -1816,7 +1816,7 @@ static int build_audio_procunit(struct mixer_build *state, int unitid, char *name) { struct uac_processing_unit_descriptor *desc = raw_desc; - int num_ins = desc->bNrInPins; + int num_ins; struct usb_mixer_elem_info *cval; struct snd_kcontrol *kctl; int i, err, nameid, type, len; @@ -1831,7 +1831,13 @@ static int build_audio_procunit(struct mixer_build *state, int unitid, 0, NULL, default_value_info }; - if (desc->bLength < 13 || desc->bLength < 13 + num_ins || + if (desc->bLength < 13) { + usb_audio_err(state->chip, "invalid %s descriptor (id %d)\n", name, unitid); + return -EINVAL; + } + + num_ins = desc->bNrInPins; + if (desc->bLength < 13 + num_ins || desc->bLength < num_ins + uac_processing_unit_bControlSize(desc, state->mixer->protocol)) { usb_audio_err(state->chip, "invalid %s descriptor (id %d)\n", name, unitid); return -EINVAL; diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c index 91fb8a8d9cef..cfb4fc54d8af 100644 --- a/sound/usb/pcm.c +++ b/sound/usb/pcm.c @@ -314,6 +314,9 @@ static int search_roland_implicit_fb(struct usb_device *dev, int ifnum, return 0; } +/* Setup an implicit feedback endpoint from a quirk. Returns 0 if no quirk + * applies. Returns 1 if a quirk was found. + */ static int set_sync_ep_implicit_fb_quirk(struct snd_usb_substream *subs, struct usb_device *dev, struct usb_interface_descriptor *altsd, @@ -382,7 +385,7 @@ add_sync_ep: subs->data_endpoint->sync_master = subs->sync_endpoint; - return 0; + return 1; } static int set_sync_endpoint(struct snd_usb_substream *subs, @@ -421,6 +424,10 @@ static int set_sync_endpoint(struct snd_usb_substream *subs, if (err < 0) return err; + /* endpoint set by quirk */ + if (err > 0) + return 0; + if (altsd->bNumEndpoints < 2) return 0; diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h index 15cbe2565703..d32727c74a16 100644 --- a/sound/usb/quirks-table.h +++ b/sound/usb/quirks-table.h @@ -3321,6 +3321,9 @@ AU0828_DEVICE(0x2040, 0x7270, "Hauppauge", "HVR-950Q"), } } }, + { + .ifnum = -1 + }, } } }, diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h new file mode 100644 index 000000000000..a5fa3195a230 --- /dev/null +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -0,0 +1,336 @@ +#ifndef _ASM_X86_CPUFEATURES_H +#define _ASM_X86_CPUFEATURES_H + +#ifndef _ASM_X86_REQUIRED_FEATURES_H +#include +#endif + +#ifndef _ASM_X86_DISABLED_FEATURES_H +#include +#endif + +/* + * Defines x86 CPU feature bits + */ +#define NCAPINTS 19 /* N 32-bit words worth of info */ +#define NBUGINTS 1 /* N 32-bit bug flags */ + +/* + * Note: If the comment begins with a quoted string, that string is used + * in /proc/cpuinfo instead of the macro name. If the string is "", + * this feature bit is not displayed in /proc/cpuinfo at all. + */ + +/* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */ +#define X86_FEATURE_FPU ( 0*32+ 0) /* Onboard FPU */ +#define X86_FEATURE_VME ( 0*32+ 1) /* Virtual Mode Extensions */ +#define X86_FEATURE_DE ( 0*32+ 2) /* Debugging Extensions */ +#define X86_FEATURE_PSE ( 0*32+ 3) /* Page Size Extensions */ +#define X86_FEATURE_TSC ( 0*32+ 4) /* Time Stamp Counter */ +#define X86_FEATURE_MSR ( 0*32+ 5) /* Model-Specific Registers */ +#define X86_FEATURE_PAE ( 0*32+ 6) /* Physical Address Extensions */ +#define X86_FEATURE_MCE ( 0*32+ 7) /* Machine Check Exception */ +#define X86_FEATURE_CX8 ( 0*32+ 8) /* CMPXCHG8 instruction */ +#define X86_FEATURE_APIC ( 0*32+ 9) /* Onboard APIC */ +#define X86_FEATURE_SEP ( 0*32+11) /* SYSENTER/SYSEXIT */ +#define X86_FEATURE_MTRR ( 0*32+12) /* Memory Type Range Registers */ +#define X86_FEATURE_PGE ( 0*32+13) /* Page Global Enable */ +#define X86_FEATURE_MCA ( 0*32+14) /* Machine Check Architecture */ +#define X86_FEATURE_CMOV ( 0*32+15) /* CMOV instructions */ + /* (plus FCMOVcc, FCOMI with FPU) */ +#define X86_FEATURE_PAT ( 0*32+16) /* Page Attribute Table */ +#define X86_FEATURE_PSE36 ( 0*32+17) /* 36-bit PSEs */ +#define X86_FEATURE_PN ( 0*32+18) /* Processor serial number */ +#define X86_FEATURE_CLFLUSH ( 0*32+19) /* CLFLUSH instruction */ +#define X86_FEATURE_DS ( 0*32+21) /* "dts" Debug Store */ +#define X86_FEATURE_ACPI ( 0*32+22) /* ACPI via MSR */ +#define X86_FEATURE_MMX ( 0*32+23) /* Multimedia Extensions */ +#define X86_FEATURE_FXSR ( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */ +#define X86_FEATURE_XMM ( 0*32+25) /* "sse" */ +#define X86_FEATURE_XMM2 ( 0*32+26) /* "sse2" */ +#define X86_FEATURE_SELFSNOOP ( 0*32+27) /* "ss" CPU self snoop */ +#define X86_FEATURE_HT ( 0*32+28) /* Hyper-Threading */ +#define X86_FEATURE_ACC ( 0*32+29) /* "tm" Automatic clock control */ +#define X86_FEATURE_IA64 ( 0*32+30) /* IA-64 processor */ +#define X86_FEATURE_PBE ( 0*32+31) /* Pending Break Enable */ + +/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */ +/* Don't duplicate feature flags which are redundant with Intel! */ +#define X86_FEATURE_SYSCALL ( 1*32+11) /* SYSCALL/SYSRET */ +#define X86_FEATURE_MP ( 1*32+19) /* MP Capable. */ +#define X86_FEATURE_NX ( 1*32+20) /* Execute Disable */ +#define X86_FEATURE_MMXEXT ( 1*32+22) /* AMD MMX extensions */ +#define X86_FEATURE_FXSR_OPT ( 1*32+25) /* FXSAVE/FXRSTOR optimizations */ +#define X86_FEATURE_GBPAGES ( 1*32+26) /* "pdpe1gb" GB pages */ +#define X86_FEATURE_RDTSCP ( 1*32+27) /* RDTSCP */ +#define X86_FEATURE_LM ( 1*32+29) /* Long Mode (x86-64) */ +#define X86_FEATURE_3DNOWEXT ( 1*32+30) /* AMD 3DNow! extensions */ +#define X86_FEATURE_3DNOW ( 1*32+31) /* 3DNow! */ + +/* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */ +#define X86_FEATURE_RECOVERY ( 2*32+ 0) /* CPU in recovery mode */ +#define X86_FEATURE_LONGRUN ( 2*32+ 1) /* Longrun power control */ +#define X86_FEATURE_LRTI ( 2*32+ 3) /* LongRun table interface */ + +/* Other features, Linux-defined mapping, word 3 */ +/* This range is used for feature bits which conflict or are synthesized */ +#define X86_FEATURE_CXMMX ( 3*32+ 0) /* Cyrix MMX extensions */ +#define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */ +#define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */ +#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */ +/* cpu types for specific tunings: */ +#define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */ +#define X86_FEATURE_K7 ( 3*32+ 5) /* "" Athlon */ +#define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */ +#define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */ +#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */ +#define X86_FEATURE_UP ( 3*32+ 9) /* smp kernel running on up */ +/* free, was #define X86_FEATURE_FXSAVE_LEAK ( 3*32+10) * "" FXSAVE leaks FOP/FIP/FOP */ +#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* Intel Architectural PerfMon */ +#define X86_FEATURE_PEBS ( 3*32+12) /* Precise-Event Based Sampling */ +#define X86_FEATURE_BTS ( 3*32+13) /* Branch Trace Store */ +#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in ia32 userspace */ +#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in ia32 userspace */ +#define X86_FEATURE_REP_GOOD ( 3*32+16) /* rep microcode works well */ +#define X86_FEATURE_MFENCE_RDTSC ( 3*32+17) /* "" Mfence synchronizes RDTSC */ +#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" Lfence synchronizes RDTSC */ +/* free, was #define X86_FEATURE_11AP ( 3*32+19) * "" Bad local APIC aka 11AP */ +#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */ +#define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */ +#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* cpu topology enum extensions */ +#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */ +#define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */ +/* free, was #define X86_FEATURE_CLFLUSH_MONITOR ( 3*32+25) * "" clflush reqd with monitor */ +#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* has extended APICID (8 bits) */ +#define X86_FEATURE_AMD_DCM ( 3*32+27) /* multi-node processor */ +#define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */ +#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */ + +/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ +#define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */ +#define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* PCLMULQDQ instruction */ +#define X86_FEATURE_DTES64 ( 4*32+ 2) /* 64-bit Debug Store */ +#define X86_FEATURE_MWAIT ( 4*32+ 3) /* "monitor" Monitor/Mwait support */ +#define X86_FEATURE_DSCPL ( 4*32+ 4) /* "ds_cpl" CPL Qual. Debug Store */ +#define X86_FEATURE_VMX ( 4*32+ 5) /* Hardware virtualization */ +#define X86_FEATURE_SMX ( 4*32+ 6) /* Safer mode */ +#define X86_FEATURE_EST ( 4*32+ 7) /* Enhanced SpeedStep */ +#define X86_FEATURE_TM2 ( 4*32+ 8) /* Thermal Monitor 2 */ +#define X86_FEATURE_SSSE3 ( 4*32+ 9) /* Supplemental SSE-3 */ +#define X86_FEATURE_CID ( 4*32+10) /* Context ID */ +#define X86_FEATURE_SDBG ( 4*32+11) /* Silicon Debug */ +#define X86_FEATURE_FMA ( 4*32+12) /* Fused multiply-add */ +#define X86_FEATURE_CX16 ( 4*32+13) /* CMPXCHG16B */ +#define X86_FEATURE_XTPR ( 4*32+14) /* Send Task Priority Messages */ +#define X86_FEATURE_PDCM ( 4*32+15) /* Performance Capabilities */ +#define X86_FEATURE_PCID ( 4*32+17) /* Process Context Identifiers */ +#define X86_FEATURE_DCA ( 4*32+18) /* Direct Cache Access */ +#define X86_FEATURE_XMM4_1 ( 4*32+19) /* "sse4_1" SSE-4.1 */ +#define X86_FEATURE_XMM4_2 ( 4*32+20) /* "sse4_2" SSE-4.2 */ +#define X86_FEATURE_X2APIC ( 4*32+21) /* x2APIC */ +#define X86_FEATURE_MOVBE ( 4*32+22) /* MOVBE instruction */ +#define X86_FEATURE_POPCNT ( 4*32+23) /* POPCNT instruction */ +#define X86_FEATURE_TSC_DEADLINE_TIMER ( 4*32+24) /* Tsc deadline timer */ +#define X86_FEATURE_AES ( 4*32+25) /* AES instructions */ +#define X86_FEATURE_XSAVE ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */ +#define X86_FEATURE_OSXSAVE ( 4*32+27) /* "" XSAVE enabled in the OS */ +#define X86_FEATURE_AVX ( 4*32+28) /* Advanced Vector Extensions */ +#define X86_FEATURE_F16C ( 4*32+29) /* 16-bit fp conversions */ +#define X86_FEATURE_RDRAND ( 4*32+30) /* The RDRAND instruction */ +#define X86_FEATURE_HYPERVISOR ( 4*32+31) /* Running on a hypervisor */ + +/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */ +#define X86_FEATURE_XSTORE ( 5*32+ 2) /* "rng" RNG present (xstore) */ +#define X86_FEATURE_XSTORE_EN ( 5*32+ 3) /* "rng_en" RNG enabled */ +#define X86_FEATURE_XCRYPT ( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */ +#define X86_FEATURE_XCRYPT_EN ( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */ +#define X86_FEATURE_ACE2 ( 5*32+ 8) /* Advanced Cryptography Engine v2 */ +#define X86_FEATURE_ACE2_EN ( 5*32+ 9) /* ACE v2 enabled */ +#define X86_FEATURE_PHE ( 5*32+10) /* PadLock Hash Engine */ +#define X86_FEATURE_PHE_EN ( 5*32+11) /* PHE enabled */ +#define X86_FEATURE_PMM ( 5*32+12) /* PadLock Montgomery Multiplier */ +#define X86_FEATURE_PMM_EN ( 5*32+13) /* PMM enabled */ + +/* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */ +#define X86_FEATURE_LAHF_LM ( 6*32+ 0) /* LAHF/SAHF in long mode */ +#define X86_FEATURE_CMP_LEGACY ( 6*32+ 1) /* If yes HyperThreading not valid */ +#define X86_FEATURE_SVM ( 6*32+ 2) /* Secure virtual machine */ +#define X86_FEATURE_EXTAPIC ( 6*32+ 3) /* Extended APIC space */ +#define X86_FEATURE_CR8_LEGACY ( 6*32+ 4) /* CR8 in 32-bit mode */ +#define X86_FEATURE_ABM ( 6*32+ 5) /* Advanced bit manipulation */ +#define X86_FEATURE_SSE4A ( 6*32+ 6) /* SSE-4A */ +#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* Misaligned SSE mode */ +#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* 3DNow prefetch instructions */ +#define X86_FEATURE_OSVW ( 6*32+ 9) /* OS Visible Workaround */ +#define X86_FEATURE_IBS ( 6*32+10) /* Instruction Based Sampling */ +#define X86_FEATURE_XOP ( 6*32+11) /* extended AVX instructions */ +#define X86_FEATURE_SKINIT ( 6*32+12) /* SKINIT/STGI instructions */ +#define X86_FEATURE_WDT ( 6*32+13) /* Watchdog timer */ +#define X86_FEATURE_LWP ( 6*32+15) /* Light Weight Profiling */ +#define X86_FEATURE_FMA4 ( 6*32+16) /* 4 operands MAC instructions */ +#define X86_FEATURE_TCE ( 6*32+17) /* translation cache extension */ +#define X86_FEATURE_NODEID_MSR ( 6*32+19) /* NodeId MSR */ +#define X86_FEATURE_TBM ( 6*32+21) /* trailing bit manipulations */ +#define X86_FEATURE_TOPOEXT ( 6*32+22) /* topology extensions CPUID leafs */ +#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* core performance counter extensions */ +#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */ +#define X86_FEATURE_BPEXT (6*32+26) /* data breakpoint extension */ +#define X86_FEATURE_PERFCTR_L2 ( 6*32+28) /* L2 performance counter extensions */ +#define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX) */ + +/* + * Auxiliary flags: Linux defined - For features scattered in various + * CPUID levels like 0x6, 0xA etc, word 7. + * + * Reuse free bits when adding new feature flags! + */ + +#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */ +#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */ +#define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 4) /* Effectively INVPCID && CR4.PCIDE=1 */ + +#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ +#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ + +#define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ +#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */ + +#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ +#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */ + +#define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */ +#define X86_FEATURE_SSBD ( 7*32+17) /* Speculative Store Bypass Disable */ + +/* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */ +#define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_PAGE_TABLE_ISOLATION w/o nokaiser */ + +#define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled*/ +#define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */ +#define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* "" Disable Speculative Store Bypass. */ +#define X86_FEATURE_LS_CFG_SSBD ( 7*32+24) /* "" AMD SSBD implementation */ + +#define X86_FEATURE_IBRS ( 7*32+25) /* Indirect Branch Restricted Speculation */ +#define X86_FEATURE_IBPB ( 7*32+26) /* Indirect Branch Prediction Barrier */ +#define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_ZEN ( 7*32+28) /* "" CPU is AMD family 0x17 (Zen) */ +#define X86_FEATURE_L1TF_PTEINV ( 7*32+29) /* "" L1TF workaround PTE inversion */ + +/* Virtualization flags: Linux defined, word 8 */ +#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ +#define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */ +#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 2) /* Intel FlexPriority */ +#define X86_FEATURE_EPT ( 8*32+ 3) /* Intel Extended Page Table */ +#define X86_FEATURE_VPID ( 8*32+ 4) /* Intel Virtual Processor ID */ + +#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer vmmcall to vmcall */ +#define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */ + + +/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */ +#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/ +#define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* TSC adjustment MSR 0x3b */ +#define X86_FEATURE_BMI1 ( 9*32+ 3) /* 1st group bit manipulation extensions */ +#define X86_FEATURE_HLE ( 9*32+ 4) /* Hardware Lock Elision */ +#define X86_FEATURE_AVX2 ( 9*32+ 5) /* AVX2 instructions */ +#define X86_FEATURE_SMEP ( 9*32+ 7) /* Supervisor Mode Execution Protection */ +#define X86_FEATURE_BMI2 ( 9*32+ 8) /* 2nd group bit manipulation extensions */ +#define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB */ +#define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */ +#define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */ +#define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */ +#define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */ +#define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */ +#define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */ +#define X86_FEATURE_ADX ( 9*32+19) /* The ADCX and ADOX instructions */ +#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */ +#define X86_FEATURE_PCOMMIT ( 9*32+22) /* PCOMMIT instruction */ +#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */ +#define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */ +#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */ +#define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */ +#define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */ +#define X86_FEATURE_SHA_NI ( 9*32+29) /* SHA1/SHA256 Instruction Extensions */ + +/* Extended state features, CPUID level 0x0000000d:1 (eax), word 10 */ +#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT */ +#define X86_FEATURE_XSAVEC (10*32+ 1) /* XSAVEC */ +#define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 */ +#define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS */ + +/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (edx), word 11 */ +#define X86_FEATURE_CQM_LLC (11*32+ 1) /* LLC QoS if 1 */ + +/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (edx), word 12 */ +#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring if 1 */ + +/* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */ +#define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */ +#define X86_FEATURE_AMD_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */ +#define X86_FEATURE_AMD_IBRS (13*32+14) /* Indirect Branch Restricted Speculation */ +#define X86_FEATURE_AMD_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */ + +/* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */ +#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ +#define X86_FEATURE_IDA (14*32+ 1) /* Intel Dynamic Acceleration */ +#define X86_FEATURE_ARAT (14*32+ 2) /* Always Running APIC Timer */ +#define X86_FEATURE_PLN (14*32+ 4) /* Intel Power Limit Notification */ +#define X86_FEATURE_PTS (14*32+ 6) /* Intel Package Thermal Status */ +#define X86_FEATURE_HWP (14*32+ 7) /* Intel Hardware P-states */ +#define X86_FEATURE_HWP_NOTIFY (14*32+ 8) /* HWP Notification */ +#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */ +#define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */ +#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */ + +/* AMD SVM Feature Identification, CPUID level 0x8000000a (edx), word 15 */ +#define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */ +#define X86_FEATURE_LBRV (15*32+ 1) /* LBR Virtualization support */ +#define X86_FEATURE_SVML (15*32+ 2) /* "svm_lock" SVM locking MSR */ +#define X86_FEATURE_NRIPS (15*32+ 3) /* "nrip_save" SVM next_rip save */ +#define X86_FEATURE_TSCRATEMSR (15*32+ 4) /* "tsc_scale" TSC scaling support */ +#define X86_FEATURE_VMCBCLEAN (15*32+ 5) /* "vmcb_clean" VMCB clean bits support */ +#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* flush-by-ASID support */ +#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* Decode Assists support */ +#define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */ +#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */ + +/* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */ +#define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */ +#define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */ + +/* AMD-defined CPU features, CPUID level 0x80000007 (ebx), word 17 */ +#define X86_FEATURE_OVERFLOW_RECOV (17*32+0) /* MCA overflow recovery support */ +#define X86_FEATURE_SUCCOR (17*32+1) /* Uncorrectable error containment and recovery */ +#define X86_FEATURE_SMCA (17*32+3) /* Scalable MCA */ + + +/* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */ +#define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */ +#define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ +#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ +#define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_FLUSH_L1D (18*32+28) /* Flush L1D cache */ +#define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */ +#define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* "" Speculative Store Bypass Disable */ + +/* + * BUG word(s) + */ +#define X86_BUG(x) (NCAPINTS*32 + (x)) + +#define X86_BUG_F00F X86_BUG(0) /* Intel F00F */ +#define X86_BUG_FDIV X86_BUG(1) /* FPU FDIV */ +#define X86_BUG_COMA X86_BUG(2) /* Cyrix 6x86 coma */ +#define X86_BUG_AMD_TLB_MMATCH X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */ +#define X86_BUG_AMD_APIC_C1E X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */ +#define X86_BUG_11AP X86_BUG(5) /* Bad local APIC aka 11AP */ +#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */ +#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */ +#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* SYSRET doesn't fix up SS attrs */ +#define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */ +#define X86_BUG_SPECTRE_V1 X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */ +#define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */ +#define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* CPU is affected by speculative store bypass attack */ +#define X86_BUG_L1TF X86_BUG(18) /* CPU is affected by L1 Terminal Fault */ + +#endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h new file mode 100644 index 000000000000..1f8cca459c6c --- /dev/null +++ b/tools/arch/x86/include/asm/disabled-features.h @@ -0,0 +1,65 @@ +#ifndef _ASM_X86_DISABLED_FEATURES_H +#define _ASM_X86_DISABLED_FEATURES_H + +/* These features, although they might be available in a CPU + * will not be used because the compile options to support + * them are not present. + * + * This code allows them to be checked and disabled at + * compile time without an explicit #ifdef. Use + * cpu_feature_enabled(). + */ + +#ifdef CONFIG_X86_INTEL_MPX +# define DISABLE_MPX 0 +#else +# define DISABLE_MPX (1<<(X86_FEATURE_MPX & 31)) +#endif + +#ifdef CONFIG_X86_64 +# define DISABLE_VME (1<<(X86_FEATURE_VME & 31)) +# define DISABLE_K6_MTRR (1<<(X86_FEATURE_K6_MTRR & 31)) +# define DISABLE_CYRIX_ARR (1<<(X86_FEATURE_CYRIX_ARR & 31)) +# define DISABLE_CENTAUR_MCR (1<<(X86_FEATURE_CENTAUR_MCR & 31)) +# define DISABLE_PCID 0 +#else +# define DISABLE_VME 0 +# define DISABLE_K6_MTRR 0 +# define DISABLE_CYRIX_ARR 0 +# define DISABLE_CENTAUR_MCR 0 +# define DISABLE_PCID (1<<(X86_FEATURE_PCID & 31)) +#endif /* CONFIG_X86_64 */ + +#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS +# define DISABLE_PKU 0 +# define DISABLE_OSPKE 0 +#else +# define DISABLE_PKU (1<<(X86_FEATURE_PKU & 31)) +# define DISABLE_OSPKE (1<<(X86_FEATURE_OSPKE & 31)) +#endif /* CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS */ + +/* + * Make sure to add features to the correct mask + */ +#define DISABLED_MASK0 (DISABLE_VME) +#define DISABLED_MASK1 0 +#define DISABLED_MASK2 0 +#define DISABLED_MASK3 (DISABLE_CYRIX_ARR|DISABLE_CENTAUR_MCR|DISABLE_K6_MTRR) +#define DISABLED_MASK4 (DISABLE_PCID) +#define DISABLED_MASK5 0 +#define DISABLED_MASK6 0 +#define DISABLED_MASK7 0 +#define DISABLED_MASK8 0 +#define DISABLED_MASK9 (DISABLE_MPX) +#define DISABLED_MASK10 0 +#define DISABLED_MASK11 0 +#define DISABLED_MASK12 0 +#define DISABLED_MASK13 0 +#define DISABLED_MASK14 0 +#define DISABLED_MASK15 0 +#define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE) +#define DISABLED_MASK17 0 +#define DISABLED_MASK18 0 +#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19) + +#endif /* _ASM_X86_DISABLED_FEATURES_H */ diff --git a/tools/arch/x86/include/asm/required-features.h b/tools/arch/x86/include/asm/required-features.h new file mode 100644 index 000000000000..6847d85400a8 --- /dev/null +++ b/tools/arch/x86/include/asm/required-features.h @@ -0,0 +1,106 @@ +#ifndef _ASM_X86_REQUIRED_FEATURES_H +#define _ASM_X86_REQUIRED_FEATURES_H + +/* Define minimum CPUID feature set for kernel These bits are checked + really early to actually display a visible error message before the + kernel dies. Make sure to assign features to the proper mask! + + Some requirements that are not in CPUID yet are also in the + CONFIG_X86_MINIMUM_CPU_FAMILY which is checked too. + + The real information is in arch/x86/Kconfig.cpu, this just converts + the CONFIGs into a bitmask */ + +#ifndef CONFIG_MATH_EMULATION +# define NEED_FPU (1<<(X86_FEATURE_FPU & 31)) +#else +# define NEED_FPU 0 +#endif + +#if defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64) +# define NEED_PAE (1<<(X86_FEATURE_PAE & 31)) +#else +# define NEED_PAE 0 +#endif + +#ifdef CONFIG_X86_CMPXCHG64 +# define NEED_CX8 (1<<(X86_FEATURE_CX8 & 31)) +#else +# define NEED_CX8 0 +#endif + +#if defined(CONFIG_X86_CMOV) || defined(CONFIG_X86_64) +# define NEED_CMOV (1<<(X86_FEATURE_CMOV & 31)) +#else +# define NEED_CMOV 0 +#endif + +#ifdef CONFIG_X86_USE_3DNOW +# define NEED_3DNOW (1<<(X86_FEATURE_3DNOW & 31)) +#else +# define NEED_3DNOW 0 +#endif + +#if defined(CONFIG_X86_P6_NOP) || defined(CONFIG_X86_64) +# define NEED_NOPL (1<<(X86_FEATURE_NOPL & 31)) +#else +# define NEED_NOPL 0 +#endif + +#ifdef CONFIG_MATOM +# define NEED_MOVBE (1<<(X86_FEATURE_MOVBE & 31)) +#else +# define NEED_MOVBE 0 +#endif + +#ifdef CONFIG_X86_64 +#ifdef CONFIG_PARAVIRT +/* Paravirtualized systems may not have PSE or PGE available */ +#define NEED_PSE 0 +#define NEED_PGE 0 +#else +#define NEED_PSE (1<<(X86_FEATURE_PSE) & 31) +#define NEED_PGE (1<<(X86_FEATURE_PGE) & 31) +#endif +#define NEED_MSR (1<<(X86_FEATURE_MSR & 31)) +#define NEED_FXSR (1<<(X86_FEATURE_FXSR & 31)) +#define NEED_XMM (1<<(X86_FEATURE_XMM & 31)) +#define NEED_XMM2 (1<<(X86_FEATURE_XMM2 & 31)) +#define NEED_LM (1<<(X86_FEATURE_LM & 31)) +#else +#define NEED_PSE 0 +#define NEED_MSR 0 +#define NEED_PGE 0 +#define NEED_FXSR 0 +#define NEED_XMM 0 +#define NEED_XMM2 0 +#define NEED_LM 0 +#endif + +#define REQUIRED_MASK0 (NEED_FPU|NEED_PSE|NEED_MSR|NEED_PAE|\ + NEED_CX8|NEED_PGE|NEED_FXSR|NEED_CMOV|\ + NEED_XMM|NEED_XMM2) +#define SSE_MASK (NEED_XMM|NEED_XMM2) + +#define REQUIRED_MASK1 (NEED_LM|NEED_3DNOW) + +#define REQUIRED_MASK2 0 +#define REQUIRED_MASK3 (NEED_NOPL) +#define REQUIRED_MASK4 (NEED_MOVBE) +#define REQUIRED_MASK5 0 +#define REQUIRED_MASK6 0 +#define REQUIRED_MASK7 0 +#define REQUIRED_MASK8 0 +#define REQUIRED_MASK9 0 +#define REQUIRED_MASK10 0 +#define REQUIRED_MASK11 0 +#define REQUIRED_MASK12 0 +#define REQUIRED_MASK13 0 +#define REQUIRED_MASK14 0 +#define REQUIRED_MASK15 0 +#define REQUIRED_MASK16 0 +#define REQUIRED_MASK17 0 +#define REQUIRED_MASK18 0 +#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19) + +#endif /* _ASM_X86_REQUIRED_FEATURES_H */ diff --git a/tools/arch/x86/lib/memcpy_64.S b/tools/arch/x86/lib/memcpy_64.S new file mode 100644 index 000000000000..a0de849435ad --- /dev/null +++ b/tools/arch/x86/lib/memcpy_64.S @@ -0,0 +1,179 @@ +/* Copyright 2002 Andi Kleen */ + +#include +#include +#include + +/* + * We build a jump to memcpy_orig by default which gets NOPped out on + * the majority of x86 CPUs which set REP_GOOD. In addition, CPUs which + * have the enhanced REP MOVSB/STOSB feature (ERMS), change those NOPs + * to a jmp to memcpy_erms which does the REP; MOVSB mem copy. + */ + +.weak memcpy + +/* + * memcpy - Copy a memory block. + * + * Input: + * rdi destination + * rsi source + * rdx count + * + * Output: + * rax original destination + */ +ENTRY(__memcpy) +ENTRY(memcpy) + ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \ + "jmp memcpy_erms", X86_FEATURE_ERMS + + movq %rdi, %rax + movq %rdx, %rcx + shrq $3, %rcx + andl $7, %edx + rep movsq + movl %edx, %ecx + rep movsb + ret +ENDPROC(memcpy) +ENDPROC(__memcpy) + +/* + * memcpy_erms() - enhanced fast string memcpy. This is faster and + * simpler than memcpy. Use memcpy_erms when possible. + */ +ENTRY(memcpy_erms) + movq %rdi, %rax + movq %rdx, %rcx + rep movsb + ret +ENDPROC(memcpy_erms) + +ENTRY(memcpy_orig) + movq %rdi, %rax + + cmpq $0x20, %rdx + jb .Lhandle_tail + + /* + * We check whether memory false dependence could occur, + * then jump to corresponding copy mode. + */ + cmp %dil, %sil + jl .Lcopy_backward + subq $0x20, %rdx +.Lcopy_forward_loop: + subq $0x20, %rdx + + /* + * Move in blocks of 4x8 bytes: + */ + movq 0*8(%rsi), %r8 + movq 1*8(%rsi), %r9 + movq 2*8(%rsi), %r10 + movq 3*8(%rsi), %r11 + leaq 4*8(%rsi), %rsi + + movq %r8, 0*8(%rdi) + movq %r9, 1*8(%rdi) + movq %r10, 2*8(%rdi) + movq %r11, 3*8(%rdi) + leaq 4*8(%rdi), %rdi + jae .Lcopy_forward_loop + addl $0x20, %edx + jmp .Lhandle_tail + +.Lcopy_backward: + /* + * Calculate copy position to tail. + */ + addq %rdx, %rsi + addq %rdx, %rdi + subq $0x20, %rdx + /* + * At most 3 ALU operations in one cycle, + * so append NOPS in the same 16 bytes trunk. + */ + .p2align 4 +.Lcopy_backward_loop: + subq $0x20, %rdx + movq -1*8(%rsi), %r8 + movq -2*8(%rsi), %r9 + movq -3*8(%rsi), %r10 + movq -4*8(%rsi), %r11 + leaq -4*8(%rsi), %rsi + movq %r8, -1*8(%rdi) + movq %r9, -2*8(%rdi) + movq %r10, -3*8(%rdi) + movq %r11, -4*8(%rdi) + leaq -4*8(%rdi), %rdi + jae .Lcopy_backward_loop + + /* + * Calculate copy position to head. + */ + addl $0x20, %edx + subq %rdx, %rsi + subq %rdx, %rdi +.Lhandle_tail: + cmpl $16, %edx + jb .Lless_16bytes + + /* + * Move data from 16 bytes to 31 bytes. + */ + movq 0*8(%rsi), %r8 + movq 1*8(%rsi), %r9 + movq -2*8(%rsi, %rdx), %r10 + movq -1*8(%rsi, %rdx), %r11 + movq %r8, 0*8(%rdi) + movq %r9, 1*8(%rdi) + movq %r10, -2*8(%rdi, %rdx) + movq %r11, -1*8(%rdi, %rdx) + retq + .p2align 4 +.Lless_16bytes: + cmpl $8, %edx + jb .Lless_8bytes + /* + * Move data from 8 bytes to 15 bytes. + */ + movq 0*8(%rsi), %r8 + movq -1*8(%rsi, %rdx), %r9 + movq %r8, 0*8(%rdi) + movq %r9, -1*8(%rdi, %rdx) + retq + .p2align 4 +.Lless_8bytes: + cmpl $4, %edx + jb .Lless_3bytes + + /* + * Move data from 4 bytes to 7 bytes. + */ + movl (%rsi), %ecx + movl -4(%rsi, %rdx), %r8d + movl %ecx, (%rdi) + movl %r8d, -4(%rdi, %rdx) + retq + .p2align 4 +.Lless_3bytes: + subl $1, %edx + jb .Lend + /* + * Move data from 1 bytes to 3 bytes. + */ + movzbl (%rsi), %ecx + jz .Lstore_1byte + movzbq 1(%rsi), %r8 + movzbq (%rsi, %rdx), %r9 + movb %r8b, 1(%rdi) + movb %r9b, (%rdi, %rdx) +.Lstore_1byte: + movb %cl, (%rdi) + +.Lend: + retq +ENDPROC(memcpy_orig) diff --git a/tools/arch/x86/lib/memset_64.S b/tools/arch/x86/lib/memset_64.S new file mode 100644 index 000000000000..c9c81227ea37 --- /dev/null +++ b/tools/arch/x86/lib/memset_64.S @@ -0,0 +1,138 @@ +/* Copyright 2002 Andi Kleen, SuSE Labs */ + +#include +#include +#include + +.weak memset + +/* + * ISO C memset - set a memory block to a byte value. This function uses fast + * string to get better performance than the original function. The code is + * simpler and shorter than the orignal function as well. + * + * rdi destination + * rsi value (char) + * rdx count (bytes) + * + * rax original destination + */ +ENTRY(memset) +ENTRY(__memset) + /* + * Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended + * to use it when possible. If not available, use fast string instructions. + * + * Otherwise, use original memset function. + */ + ALTERNATIVE_2 "jmp memset_orig", "", X86_FEATURE_REP_GOOD, \ + "jmp memset_erms", X86_FEATURE_ERMS + + movq %rdi,%r9 + movq %rdx,%rcx + andl $7,%edx + shrq $3,%rcx + /* expand byte value */ + movzbl %sil,%esi + movabs $0x0101010101010101,%rax + imulq %rsi,%rax + rep stosq + movl %edx,%ecx + rep stosb + movq %r9,%rax + ret +ENDPROC(memset) +ENDPROC(__memset) + +/* + * ISO C memset - set a memory block to a byte value. This function uses + * enhanced rep stosb to override the fast string function. + * The code is simpler and shorter than the fast string function as well. + * + * rdi destination + * rsi value (char) + * rdx count (bytes) + * + * rax original destination + */ +ENTRY(memset_erms) + movq %rdi,%r9 + movb %sil,%al + movq %rdx,%rcx + rep stosb + movq %r9,%rax + ret +ENDPROC(memset_erms) + +ENTRY(memset_orig) + movq %rdi,%r10 + + /* expand byte value */ + movzbl %sil,%ecx + movabs $0x0101010101010101,%rax + imulq %rcx,%rax + + /* align dst */ + movl %edi,%r9d + andl $7,%r9d + jnz .Lbad_alignment +.Lafter_bad_alignment: + + movq %rdx,%rcx + shrq $6,%rcx + jz .Lhandle_tail + + .p2align 4 +.Lloop_64: + decq %rcx + movq %rax,(%rdi) + movq %rax,8(%rdi) + movq %rax,16(%rdi) + movq %rax,24(%rdi) + movq %rax,32(%rdi) + movq %rax,40(%rdi) + movq %rax,48(%rdi) + movq %rax,56(%rdi) + leaq 64(%rdi),%rdi + jnz .Lloop_64 + + /* Handle tail in loops. The loops should be faster than hard + to predict jump tables. */ + .p2align 4 +.Lhandle_tail: + movl %edx,%ecx + andl $63&(~7),%ecx + jz .Lhandle_7 + shrl $3,%ecx + .p2align 4 +.Lloop_8: + decl %ecx + movq %rax,(%rdi) + leaq 8(%rdi),%rdi + jnz .Lloop_8 + +.Lhandle_7: + andl $7,%edx + jz .Lende + .p2align 4 +.Lloop_1: + decl %edx + movb %al,(%rdi) + leaq 1(%rdi),%rdi + jnz .Lloop_1 + +.Lende: + movq %r10,%rax + ret + +.Lbad_alignment: + cmpq $7,%rdx + jbe .Lhandle_7 + movq %rax,(%rdi) /* unaligned store */ + movq $8,%r8 + subq %r9,%r8 + addq %r8,%rdi + subq %r8,%rdx + jmp .Lafter_bad_alignment +.Lfinal: +ENDPROC(memset_orig) diff --git a/tools/perf/util/include/asm/alternative-asm.h b/tools/include/asm/alternative-asm.h similarity index 66% rename from tools/perf/util/include/asm/alternative-asm.h rename to tools/include/asm/alternative-asm.h index 3a3a0f16456a..2a4d1bfa2988 100644 --- a/tools/perf/util/include/asm/alternative-asm.h +++ b/tools/include/asm/alternative-asm.h @@ -1,5 +1,5 @@ -#ifndef _PERF_ASM_ALTERNATIVE_ASM_H -#define _PERF_ASM_ALTERNATIVE_ASM_H +#ifndef _TOOLS_ASM_ALTERNATIVE_ASM_H +#define _TOOLS_ASM_ALTERNATIVE_ASM_H /* Just disable it so we can build arch/x86/lib/memcpy_64.S for perf bench: */ diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 6e4a10fe9dd0..743746a3c50d 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -2419,7 +2419,7 @@ static int arg_num_eval(struct print_arg *arg, long long *val) static char *arg_eval (struct print_arg *arg) { long long val; - static char buf[20]; + static char buf[24]; switch (arg->type) { case PRINT_ATOM: diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index eeb21eb43898..0b657702aede 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST @@ -11,6 +11,11 @@ tools/arch/sparc/include/asm/barrier_32.h tools/arch/sparc/include/asm/barrier_64.h tools/arch/tile/include/asm/barrier.h tools/arch/x86/include/asm/barrier.h +tools/arch/x86/include/asm/cpufeatures.h +tools/arch/x86/include/asm/disabled-features.h +tools/arch/x86/include/asm/required-features.h +tools/arch/x86/lib/memcpy_64.S +tools/arch/x86/lib/memset_64.S tools/arch/xtensa/include/asm/barrier.h tools/scripts tools/build @@ -25,6 +30,7 @@ tools/lib/rbtree.c tools/lib/symbol/kallsyms.c tools/lib/symbol/kallsyms.h tools/lib/util/find_next_bit.c +tools/include/asm/alternative-asm.h tools/include/asm/atomic.h tools/include/asm/barrier.h tools/include/asm/bug.h @@ -66,8 +72,6 @@ include/linux/swab.h arch/*/include/asm/unistd*.h arch/*/include/uapi/asm/unistd*.h arch/*/include/uapi/asm/perf_regs.h -arch/*/lib/memcpy*.S -arch/*/lib/memset*.S include/linux/poison.h include/linux/hw_breakpoint.h include/uapi/linux/perf_event.h diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index fb1c9ddc3478..508f916569e1 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -313,6 +313,21 @@ export srctree OUTPUT RM CC LD AR CFLAGS V BISON FLEX AWK include $(srctree)/tools/build/Makefile.include $(PERF_IN): prepare FORCE + @(test -f ../../arch/x86/include/asm/disabled-features.h && ( \ + (diff -B ../arch/x86/include/asm/disabled-features.h ../../arch/x86/include/asm/disabled-features.h >/dev/null) \ + || echo "Warning: tools/arch/x86/include/asm/disabled-features.h differs from kernel" >&2 )) || true + @(test -f ../../arch/x86/include/asm/required-features.h && ( \ + (diff -B ../arch/x86/include/asm/required-features.h ../../arch/x86/include/asm/required-features.h >/dev/null) \ + || echo "Warning: tools/arch/x86/include/asm/required-features.h differs from kernel" >&2 )) || true + @(test -f ../../arch/x86/include/asm/cpufeatures.h && ( \ + (diff -B ../arch/x86/include/asm/cpufeatures.h ../../arch/x86/include/asm/cpufeatures.h >/dev/null) \ + || echo "Warning: tools/arch/x86/include/asm/cpufeatures.h differs from kernel" >&2 )) || true + @(test -f ../../arch/x86/lib/memcpy_64.S && ( \ + (diff -B ../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memcpy_64.S >/dev/null) \ + || echo "Warning: tools/arch/x86/lib/memcpy_64.S differs from kernel" >&2 )) || true + @(test -f ../../arch/x86/lib/memset_64.S && ( \ + (diff -B ../arch/x86/lib/memset_64.S ../../arch/x86/lib/memset_64.S >/dev/null) \ + || echo "Warning: tools/arch/x86/lib/memset_64.S differs from kernel" >&2 )) || true $(Q)$(MAKE) $(build)=perf $(OUTPUT)perf: $(PERFLIBS) $(PERF_IN) $(LIBTRACEEVENT_DYNAMIC_LIST) diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c index de3965c4e4aa..15e8e0ec28a5 100644 --- a/tools/perf/arch/x86/util/intel-pt.c +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -473,10 +473,21 @@ static int intel_pt_validate_config(struct perf_pmu *intel_pt_pmu, struct perf_evsel *evsel) { int err; + char c; if (!evsel) return 0; + /* + * If supported, force pass-through config term (pt=1) even if user + * sets pt=0, which avoids senseless kernel errors. + */ + if (perf_pmu__scan_file(intel_pt_pmu, "format/pt", "%c", &c) == 1 && + !(evsel->attr.config & 1)) { + pr_warning("pt=0 doesn't make sense, forcing pt=1\n"); + evsel->attr.config |= 1; + } + err = intel_pt_val_config_term(intel_pt_pmu, "caps/cycle_thresholds", "cyc_thresh", "caps/psb_cyc", evsel->attr.config); diff --git a/tools/perf/arch/x86/util/kvm-stat.c b/tools/perf/arch/x86/util/kvm-stat.c index 14e4e668fad7..f97696a418cc 100644 --- a/tools/perf/arch/x86/util/kvm-stat.c +++ b/tools/perf/arch/x86/util/kvm-stat.c @@ -146,7 +146,7 @@ int cpu_isa_init(struct perf_kvm_stat *kvm, const char *cpuid) if (strstr(cpuid, "Intel")) { kvm->exit_reasons = vmx_exit_reasons; kvm->exit_reasons_isa = "VMX"; - } else if (strstr(cpuid, "AMD")) { + } else if (strstr(cpuid, "AMD") || strstr(cpuid, "Hygon")) { kvm->exit_reasons = svm_exit_reasons; kvm->exit_reasons_isa = "SVM"; } else diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm.S b/tools/perf/bench/mem-memcpy-x86-64-asm.S index e4c2c30143b9..9d82c44a6d71 100644 --- a/tools/perf/bench/mem-memcpy-x86-64-asm.S +++ b/tools/perf/bench/mem-memcpy-x86-64-asm.S @@ -1,7 +1,7 @@ #define memcpy MEMCPY /* don't hide glibc's memcpy() */ #define altinstr_replacement text #define globl p2align 4; .globl -#include "../../../arch/x86/lib/memcpy_64.S" +#include "../../arch/x86/lib/memcpy_64.S" /* * We need to provide note.GNU-stack section, saying that we want * NOT executable stack. Otherwise the final linking will assume that diff --git a/tools/perf/bench/mem-memset-x86-64-asm.S b/tools/perf/bench/mem-memset-x86-64-asm.S index de278784c866..58407aa24c1b 100644 --- a/tools/perf/bench/mem-memset-x86-64-asm.S +++ b/tools/perf/bench/mem-memset-x86-64-asm.S @@ -1,7 +1,7 @@ #define memset MEMSET /* don't hide glibc's memset() */ #define altinstr_replacement text #define globl p2align 4; .globl -#include "../../../arch/x86/lib/memset_64.S" +#include "../../arch/x86/lib/memset_64.S" /* * We need to provide note.GNU-stack section, saying that we want diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 47719bde34c6..4e64ba8163bb 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1320,8 +1320,9 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) symbol_conf.priv_size = sizeof(struct annotation); symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL); - if (symbol__init(NULL) < 0) - return -1; + status = symbol__init(NULL); + if (status < 0) + goto out_delete_evlist; sort__setup_elide(stdout); diff --git a/tools/perf/tests/evsel-tp-sched.c b/tools/perf/tests/evsel-tp-sched.c index 790e413d9a1f..ea3161170f9e 100644 --- a/tools/perf/tests/evsel-tp-sched.c +++ b/tools/perf/tests/evsel-tp-sched.c @@ -16,7 +16,7 @@ static int perf_evsel__test_field(struct perf_evsel *evsel, const char *name, return -1; } - is_signed = !!(field->flags | FIELD_IS_SIGNED); + is_signed = !!(field->flags & FIELD_IS_SIGNED); if (should_be_signed && !is_signed) { pr_debug("%s: \"%s\" signedness(%d) is wrong, should be %d\n", evsel->name, name, is_signed, should_be_signed); @@ -42,7 +42,7 @@ int test__perf_evsel__tp_sched_test(void) return -1; } - if (perf_evsel__test_field(evsel, "prev_comm", 16, true)) + if (perf_evsel__test_field(evsel, "prev_comm", 16, false)) ret = -1; if (perf_evsel__test_field(evsel, "prev_pid", 4, true)) @@ -54,7 +54,7 @@ int test__perf_evsel__tp_sched_test(void) if (perf_evsel__test_field(evsel, "prev_state", sizeof(long), true)) ret = -1; - if (perf_evsel__test_field(evsel, "next_comm", 16, true)) + if (perf_evsel__test_field(evsel, "next_comm", 16, false)) ret = -1; if (perf_evsel__test_field(evsel, "next_pid", 4, true)) @@ -72,7 +72,7 @@ int test__perf_evsel__tp_sched_test(void) return -1; } - if (perf_evsel__test_field(evsel, "comm", 16, true)) + if (perf_evsel__test_field(evsel, "comm", 16, false)) ret = -1; if (perf_evsel__test_field(evsel, "pid", 4, true)) @@ -84,5 +84,6 @@ int test__perf_evsel__tp_sched_test(void) if (perf_evsel__test_field(evsel, "target_cpu", 4, true)) ret = -1; + perf_evsel__delete(evsel); return ret; } diff --git a/tools/perf/tests/openat-syscall-all-cpus.c b/tools/perf/tests/openat-syscall-all-cpus.c index 2006485a2859..3848d5ab378d 100644 --- a/tools/perf/tests/openat-syscall-all-cpus.c +++ b/tools/perf/tests/openat-syscall-all-cpus.c @@ -35,7 +35,7 @@ int test__openat_syscall_event_on_all_cpus(void) if (IS_ERR(evsel)) { tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "syscalls", "sys_enter_openat"); pr_debug("%s\n", errbuf); - goto out_thread_map_delete; + goto out_cpu_map_delete; } if (perf_evsel__open(evsel, cpus, threads) < 0) { @@ -109,6 +109,8 @@ out_close_fd: perf_evsel__close_fd(evsel, 1, threads->nr); out_evsel_delete: perf_evsel__delete(evsel); +out_cpu_map_delete: + cpu_map__put(cpus); out_thread_map_delete: thread_map__put(threads); return err; diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 5053fac29f05..960de8951884 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -1230,9 +1230,9 @@ static int __auxtrace_mmap__read(struct auxtrace_mmap *mm, } /* padding must be written by fn() e.g. record__process_auxtrace() */ - padding = size & 7; + padding = size & (PERF_AUXTRACE_RECORD_ALIGNMENT - 1); if (padding) - padding = 8 - padding; + padding = PERF_AUXTRACE_RECORD_ALIGNMENT - padding; memset(&ev, 0, sizeof(ev)); ev.auxtrace.header.type = PERF_RECORD_AUXTRACE; diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index adb53e7bcabf..f05a5c4738c0 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -37,6 +37,9 @@ struct record_opts; struct auxtrace_info_event; struct events_stats; +/* Auxtrace records must have the same alignment as perf event records */ +#define PERF_AUXTRACE_RECORD_ALIGNMENT 8 + enum auxtrace_type { PERF_AUXTRACE_UNKNOWN, PERF_AUXTRACE_INTEL_PT, diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 6523e1a8eea5..189f79e52c30 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -128,7 +128,12 @@ struct cpu_map *cpu_map__new(const char *cpu_list) if (!cpu_list) return cpu_map__read_all_cpu_map(); - if (!isdigit(*cpu_list)) + /* + * must handle the case of empty cpumap to cover + * TOPOLOGY header for NUMA nodes with no CPU + * ( e.g., because of CPU hotplug) + */ + if (!isdigit(*cpu_list) && *cpu_list != '\0') goto out; while (isdigit(*cpu_list)) { @@ -175,8 +180,10 @@ struct cpu_map *cpu_map__new(const char *cpu_list) if (nr_cpus > 0) cpus = cpu_map__trim_new(nr_cpus, tmp_cpus); - else + else if (*cpu_list != '\0') cpus = cpu_map__default_new(); + else + cpus = cpu_map__dummy_new(); invalid: free(tmp_cpus); out: diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 8a84f82845f3..9aa10043103a 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1093,6 +1093,7 @@ void perf_evsel__exit(struct perf_evsel *evsel) { assert(list_empty(&evsel->node)); assert(evsel->evlist == NULL); + perf_evsel__free_counts(evsel); perf_evsel__free_fd(evsel); perf_evsel__free_id(evsel); perf_evsel__free_config_terms(evsel); diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index dc17c881275d..62b38f2ff60d 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -26,6 +26,7 @@ #include "../cache.h" #include "../util.h" +#include "../auxtrace.h" #include "intel-pt-insn-decoder.h" #include "intel-pt-pkt-decoder.h" @@ -237,19 +238,15 @@ struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params) if (!(decoder->tsc_ctc_ratio_n % decoder->tsc_ctc_ratio_d)) decoder->tsc_ctc_mult = decoder->tsc_ctc_ratio_n / decoder->tsc_ctc_ratio_d; - - /* - * Allow for timestamps appearing to backwards because a TSC - * packet has slipped past a MTC packet, so allow 2 MTC ticks - * or ... - */ - decoder->tsc_slip = multdiv(2 << decoder->mtc_shift, - decoder->tsc_ctc_ratio_n, - decoder->tsc_ctc_ratio_d); } - /* ... or 0x100 paranoia */ - if (decoder->tsc_slip < 0x100) - decoder->tsc_slip = 0x100; + + /* + * A TSC packet can slip past MTC packets so that the timestamp appears + * to go backwards. One estimate is that can be up to about 40 CPU + * cycles, which is certainly less than 0x1000 TSC ticks, but accept + * slippage an order of magnitude more to be on the safe side. + */ + decoder->tsc_slip = 0x10000; intel_pt_log("timestamp: mtc_shift %u\n", decoder->mtc_shift); intel_pt_log("timestamp: tsc_ctc_ratio_n %u\n", decoder->tsc_ctc_ratio_n); @@ -1281,7 +1278,6 @@ static int intel_pt_overflow(struct intel_pt_decoder *decoder) { intel_pt_log("ERROR: Buffer overflow\n"); intel_pt_clear_tx_flags(decoder); - decoder->cbr = 0; decoder->timestamp_insn_cnt = 0; decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC; decoder->overflow = true; @@ -2321,6 +2317,34 @@ static int intel_pt_tsc_cmp(uint64_t tsc1, uint64_t tsc2) } } +#define MAX_PADDING (PERF_AUXTRACE_RECORD_ALIGNMENT - 1) + +/** + * adj_for_padding - adjust overlap to account for padding. + * @buf_b: second buffer + * @buf_a: first buffer + * @len_a: size of first buffer + * + * @buf_a might have up to 7 bytes of padding appended. Adjust the overlap + * accordingly. + * + * Return: A pointer into @buf_b from where non-overlapped data starts + */ +static unsigned char *adj_for_padding(unsigned char *buf_b, + unsigned char *buf_a, size_t len_a) +{ + unsigned char *p = buf_b - MAX_PADDING; + unsigned char *q = buf_a + len_a - MAX_PADDING; + int i; + + for (i = MAX_PADDING; i; i--, p++, q++) { + if (*p != *q) + break; + } + + return p; +} + /** * intel_pt_find_overlap_tsc - determine start of non-overlapped trace data * using TSC. @@ -2371,8 +2395,11 @@ static unsigned char *intel_pt_find_overlap_tsc(unsigned char *buf_a, /* Same TSC, so buffers are consecutive */ if (!cmp && rem_b >= rem_a) { + unsigned char *start; + *consecutive = true; - return buf_b + len_b - (rem_b - rem_a); + start = buf_b + len_b - (rem_b - rem_a); + return adj_for_padding(start, buf_a, len_a); } if (cmp < 0) return buf_b; /* tsc_a < tsc_b => no overlap */ @@ -2435,7 +2462,7 @@ unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a, found = memmem(buf_a, len_a, buf_b, len_a); if (found) { *consecutive = true; - return buf_b + len_a; + return adj_for_padding(buf_b + len_a, buf_a, len_a); } /* Try again at next PSB in buffer 'a' */ diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 6e825dbaddea..4a7e0132294f 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1936,7 +1936,7 @@ restart: if (!name_only && strlen(syms->alias)) snprintf(name, MAX_NAME_LEN, "%s OR %s", syms->symbol, syms->alias); else - strncpy(name, syms->symbol, MAX_NAME_LEN); + strlcpy(name, syms->symbol, MAX_NAME_LEN); evt_list[evt_i] = strdup(name); if (evt_list[evt_i] == NULL) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 593066c68e3d..4f650ebd564a 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -100,7 +100,7 @@ static int perf_pmu__parse_scale(struct perf_pmu_alias *alias, char *dir, char * char path[PATH_MAX]; const char *lc; - snprintf(path, PATH_MAX, "%s/%s.scale", dir, name); + scnprintf(path, PATH_MAX, "%s/%s.scale", dir, name); fd = open(path, O_RDONLY); if (fd == -1) @@ -147,7 +147,7 @@ static int perf_pmu__parse_unit(struct perf_pmu_alias *alias, char *dir, char *n ssize_t sret; int fd; - snprintf(path, PATH_MAX, "%s/%s.unit", dir, name); + scnprintf(path, PATH_MAX, "%s/%s.unit", dir, name); fd = open(path, O_RDONLY); if (fd == -1) @@ -177,7 +177,7 @@ perf_pmu__parse_per_pkg(struct perf_pmu_alias *alias, char *dir, char *name) char path[PATH_MAX]; int fd; - snprintf(path, PATH_MAX, "%s/%s.per-pkg", dir, name); + scnprintf(path, PATH_MAX, "%s/%s.per-pkg", dir, name); fd = open(path, O_RDONLY); if (fd == -1) @@ -195,7 +195,7 @@ static int perf_pmu__parse_snapshot(struct perf_pmu_alias *alias, char path[PATH_MAX]; int fd; - snprintf(path, PATH_MAX, "%s/%s.snapshot", dir, name); + scnprintf(path, PATH_MAX, "%s/%s.snapshot", dir, name); fd = open(path, O_RDONLY); if (fd == -1) diff --git a/tools/perf/util/svghelper.c b/tools/perf/util/svghelper.c index eec6c1149f44..132878d4847a 100644 --- a/tools/perf/util/svghelper.c +++ b/tools/perf/util/svghelper.c @@ -333,7 +333,7 @@ static char *cpu_model(void) if (file) { while (fgets(buf, 255, file)) { if (strstr(buf, "model name")) { - strncpy(cpu_m, &buf[13], 255); + strlcpy(cpu_m, &buf[13], 255); break; } } diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 7c97ecaeae48..2070c02de3af 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -74,6 +74,11 @@ static inline uint8_t elf_sym__type(const GElf_Sym *sym) return GELF_ST_TYPE(sym->st_info); } +static inline uint8_t elf_sym__visibility(const GElf_Sym *sym) +{ + return GELF_ST_VISIBILITY(sym->st_other); +} + #ifndef STT_GNU_IFUNC #define STT_GNU_IFUNC 10 #endif @@ -98,7 +103,9 @@ static inline int elf_sym__is_label(const GElf_Sym *sym) return elf_sym__type(sym) == STT_NOTYPE && sym->st_name != 0 && sym->st_shndx != SHN_UNDEF && - sym->st_shndx != SHN_ABS; + sym->st_shndx != SHN_ABS && + elf_sym__visibility(sym) != STV_HIDDEN && + elf_sym__visibility(sym) != STV_INTERNAL; } static bool elf_sym__is_a(GElf_Sym *sym, enum map_type type) diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c index 60edec383281..bf5ee8906fb2 100644 --- a/tools/perf/util/unwind-libdw.c +++ b/tools/perf/util/unwind-libdw.c @@ -41,13 +41,13 @@ static int __report_module(struct addr_location *al, u64 ip, Dwarf_Addr s; dwfl_module_info(mod, NULL, &s, NULL, NULL, NULL, NULL, NULL); - if (s != al->map->start) + if (s != al->map->start - al->map->pgoff) mod = 0; } if (!mod) mod = dwfl_report_elf(ui->dwfl, dso->short_name, - dso->long_name, -1, al->map->start, + (dso->symsrc_filename ? dso->symsrc_filename : dso->long_name), -1, al->map->start - al->map->pgoff, false); return mod && dwfl_addrmodule(ui->dwfl, ip) == mod ? 0 : -1; diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 33c79e415075..532e7bf06868 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -3089,6 +3089,9 @@ int fork_it(char **argv) signal(SIGQUIT, SIG_IGN); if (waitpid(child_pid, &status, 0) == -1) err(status, "waitpid"); + + if (WIFEXITED(status)) + status = WEXITSTATUS(status); } /* * n.b. fork_it() does not check for errors from for_all_cpus() diff --git a/Documentation/networking/timestamping/.gitignore b/tools/testing/selftests/networking/timestamping/.gitignore similarity index 100% rename from Documentation/networking/timestamping/.gitignore rename to tools/testing/selftests/networking/timestamping/.gitignore diff --git a/tools/testing/selftests/networking/timestamping/Makefile b/tools/testing/selftests/networking/timestamping/Makefile new file mode 100644 index 000000000000..ccbb9edbbbb9 --- /dev/null +++ b/tools/testing/selftests/networking/timestamping/Makefile @@ -0,0 +1,8 @@ +TEST_PROGS := hwtstamp_config timestamping txtimestamp + +all: $(TEST_PROGS) + +include ../../lib.mk + +clean: + rm -fr $(TEST_PROGS) diff --git a/Documentation/networking/timestamping/hwtstamp_config.c b/tools/testing/selftests/networking/timestamping/hwtstamp_config.c similarity index 100% rename from Documentation/networking/timestamping/hwtstamp_config.c rename to tools/testing/selftests/networking/timestamping/hwtstamp_config.c diff --git a/Documentation/networking/timestamping/timestamping.c b/tools/testing/selftests/networking/timestamping/timestamping.c similarity index 100% rename from Documentation/networking/timestamping/timestamping.c rename to tools/testing/selftests/networking/timestamping/timestamping.c diff --git a/Documentation/networking/timestamping/txtimestamp.c b/tools/testing/selftests/networking/timestamping/txtimestamp.c similarity index 100% rename from Documentation/networking/timestamping/txtimestamp.c rename to tools/testing/selftests/networking/timestamping/txtimestamp.c diff --git a/tools/virtio/linux/dma-mapping.h b/tools/virtio/linux/dma-mapping.h new file mode 100644 index 000000000000..4f93af89ae16 --- /dev/null +++ b/tools/virtio/linux/dma-mapping.h @@ -0,0 +1,17 @@ +#ifndef _LINUX_DMA_MAPPING_H +#define _LINUX_DMA_MAPPING_H + +#ifdef CONFIG_HAS_DMA +# error Virtio userspace code does not support CONFIG_HAS_DMA +#endif + +#define PCI_DMA_BUS_IS_PHYS 1 + +enum dma_data_direction { + DMA_BIDIRECTIONAL = 0, + DMA_TO_DEVICE = 1, + DMA_FROM_DEVICE = 2, + DMA_NONE = 3, +}; + +#endif diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 5d10f104f3eb..964df643509d 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -821,7 +821,6 @@ static int vgic_handle_mmio_access(struct kvm_vcpu *vcpu, struct vgic_dist *dist = &vcpu->kvm->arch.vgic; struct vgic_io_device *iodev = container_of(this, struct vgic_io_device, dev); - struct kvm_run *run = vcpu->run; const struct vgic_io_range *range; struct kvm_exit_mmio mmio; bool updated_state; @@ -850,12 +849,6 @@ static int vgic_handle_mmio_access(struct kvm_vcpu *vcpu, updated_state = false; } spin_unlock(&dist->lock); - run->mmio.is_write = is_write; - run->mmio.len = len; - run->mmio.phys_addr = addr; - memcpy(run->mmio.data, val, len); - - kvm_handle_mmio_return(vcpu, run); if (updated_state) vgic_kick_vcpus(vcpu->kvm); diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index 4f70d12e392d..eddce59986ee 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c @@ -80,7 +80,7 @@ static void async_pf_execute(struct work_struct *work) might_sleep(); - get_user_pages_unlocked(NULL, mm, addr, 1, 1, 0, NULL); + get_user_pages_unlocked(NULL, mm, addr, 1, NULL, FOLL_WRITE); kvm_async_page_present_sync(vcpu, apf); spin_lock(&vcpu->async_pf.lock); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index b814ae6822b6..08a954582e31 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1352,10 +1352,15 @@ static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault, npages = get_user_page_nowait(current, current->mm, addr, write_fault, page); up_read(¤t->mm->mmap_sem); - } else + } else { + unsigned int flags = FOLL_TOUCH | FOLL_HWPOISON; + + if (write_fault) + flags |= FOLL_WRITE; + npages = __get_user_pages_unlocked(current, current->mm, addr, 1, - write_fault, 0, page, - FOLL_TOUCH|FOLL_HWPOISON); + page, flags); + } if (npages != 1) return npages; @@ -2606,6 +2611,9 @@ static long kvm_device_ioctl(struct file *filp, unsigned int ioctl, { struct kvm_device *dev = filp->private_data; + if (dev->kvm->mm != current->mm) + return -EIO; + switch (ioctl) { case KVM_SET_DEVICE_ATTR: return kvm_device_ioctl_attr(dev, dev->ops->set_attr, arg); @@ -2706,14 +2714,15 @@ static int kvm_ioctl_create_device(struct kvm *kvm, return ret; } + kvm_get_kvm(kvm); ret = anon_inode_getfd(ops->name, &kvm_device_fops, dev, O_RDWR | O_CLOEXEC); if (ret < 0) { + kvm_put_kvm(kvm); ops->destroy(dev); return ret; } list_add(&dev->vm_node, &kvm->devices); - kvm_get_kvm(kvm); cd->fd = ret; return 0; }